PDF::FromHTML 没有锚点和提前终止

PDF::FromHTML No Anchors and Early Termination

进一步置顶此问题:PDF::FromHTML - Corrupt file and no output

有问题的代码是 'working',因为它可以很好地生成 PDF 文档,只有 NONE 个 HTML 锚被翻译,并且在较大的文档上处理停止在 PDF 的第 11 页 - 没有错误,它很好地关闭了文档!

编辑:为了节省查看问题的时间 Link:

    # print "<p>".$textblob."</p>";

    $textblob='<html><head></head><body>'.$textblob.'</body></html>';

    # $textblob = decode('UTF-8', $textblob);

    my $output;
    if(defined($query->param('PDF'))){
        my $pdf = PDF::FromHTML->new( encoding => 'utf-8' );
        $pdf->load_file($textblob);
        $pdf->convert(
            # With PDF::API2, font names such as 'traditional' also works
             Font        => 'Arial',
             LineHeight  => 10,
             Landscape   => 0,
        );
        $pdf->write_file($output);
        print $output;
    }

$textblob 当取消注释打印和注释掉 PDF 部分时显示完整的 400 个参考冒险,链接在 html 就好了...

更新:绝望的是这里是整个脚本(它不是太长......)

#!/usr/bin/perl
use cPanelUserConfig;
use CGI::Carp qw(fatalsToBrowser);
use CGI;
use List::Util qw(shuffle);
use PDF::FromHTML;
require "authenticate.pl";

$query = new CGI;

if(defined($query->param('PDF'))){
    print $query->header(-type=>'application/pdf');
}
else{
    print $query->header(-charset=>'utf-8');
    &html_header;
    print "\n\n\n\n<!-- -------------------------- BEGIN: ff.net Script generated text ------------------------------------------- -->";
    print "Randomise working? Let me know if you find a bug.<br />";
}

if(defined($query->param('doc'))){
    $doc=$query->param('doc')."\nEOF";
    %refhash = $doc =~ /^[\n\s\t\.\#]*(\d+)[\s\t\.\#\n]+(?!\n*^[\n\s\t\.\#]*\d+[\s\t\.\#\n]+)(.+?)(?=^[\s\t\.\#\n]*\d+[\s\t\.\#\n]+|EOF)/smcgi; # refhash{key}=content, where key==refnumber and content==well, ref content
    &display_refhash(\%refhash);

}

elsif(defined($query->param('references'))){
    my %anchors;
    my $refhashref=&recreate_refhash($query->param('references'),\%anchors);

    if(defined($query->param('Randomise'))){
        $refhashref=&randomise($refhashref,\%anchors);
        print "Your adventure looks like this: <br /><br />";
        &display_refhash($refhashref);
    }
    elsif(defined($query->param('Save'))){
        &save($refhashref);
    }
    elsif(defined($query->param('Auto-HTML Tag'))){
        print "Your adventure looks like this: <br /><br />";
        &display_refhash($refhashref);
    }
    elsif(defined($query->param('Auto-ABML Tag'))){
        &autoABML($refhashref);
        print "Your adventure looks like this: <br /><br />";
        &display_refhash($refhashref);      
    }
    elsif(defined($query->param('PDF'))){
        &output_pdf($refhashref);
    }
    else{
        print "undefined function call";
    }
}
else{ # output form to input doc content
    print "Please input your document text into the textarea below (copy and paste should do it):";
    print '<form method="post" action="doc_to_refs.cgi" enctype="multipart/form-data" name="doc_to_refs_form">';
    print $query->textarea(-name=>'doc',-rows=>20,-cols=>100, -style=>"font-family:arial;width:98%");
    print $query->submit('Go!');
    print '</form>';
}

&html_footer;

# print "<!-- -------------------------- END: ff.net Script generated text ------------------------------------------- -->";    

sub recreate_refhash{
    my %refhash;
    my $references=shift;
    my $anchors_ref=shift;
    for(my $x=0;$x<$references;$x++){
        my $referencekey="reference"."$x";
        my $referencecontent="reftext"."$x";
        my $anchorname="anchor"."$x";
        my $deletename="delete"."$x";

        if(!defined($query->param($deletename))){
            $refhash{$query->param($referencekey)}=$query->param($referencecontent);

            if(defined($query->param($anchorname))){
                $$anchors_ref{$query->param($anchorname)}=$x;
            }
        }
    }

    return \%refhash;
}

sub randomise{
    my $refhashref=shift;
    my $anchor_ref=shift;
    my %refhash=%$refhashref;
    my %randomisedrefhash, %Xrefhash, @refstack, $ref;
    my %anchors=%$anchor_ref;

    # randomise the list
    @refstack=shuffle sort {$a <=> $b} keys %refhash; # inflict an order on the pre-shuffle (therefore we can xref predicatably?) not sure this makes ANY sense i'm melting....

    ## transpose anchors back to their required location
    for($x=0;$x<@refstack;$x++){
        if(defined($anchors{$refstack[$x]})){
            my $anchor=$refstack[$anchors{$refstack[$x]}];
            my $temp=$refstack[$x];
            $refstack[$x]=$$anchor;

            print "---Swapping $temp with ".$$anchor;
            $$anchor=$temp;

            if(defined($anchors{$refstack[$x]})){
                if($refstack[$anchors{$refstack[$x]}] ne $$anchor){
                    $x--;
                }
            }
        }
    }

    ## randomise the refs and the content associations, and create the cross-ref hash
    foreach $ref(sort {$a <=> $b} keys %refhash){
        $key=shift @refstack;
        $randomisedrefhash{$ref}=$refhash{$key};
        $Xrefhash{$key}=$ref;
    }

    ## now do the content link substitutions
    foreach $ref(keys %randomisedrefhash){
        $randomisedrefhash{$ref}=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s)*(\d+)/&substitute_xref(,,,,,\%Xrefhash)/egi;
    }

    print "You asked for the following anchors:";
    foreach $key(keys %anchors){
        print $anchors{$key};
    }

    return \%randomisedrefhash;
}

sub substitute_xref{ ## not sure that this is necessary but the verboseness was easier to work out
    my $pretext1=shift;
    my $pretext2=shift;
    my $pretext3=shift;
    my $pretext4=shift;
    my $link=shift;
    my $Xrefhashref=shift;
    my %Xrefhash=%$Xrefhashref;   

    my $newlink=$Xrefhash{$link};

    return "$pretext1$pretext2$pretext3$pretext4$newlink";

}

sub save{
    print "Will Save soon";
}

sub display_refhash{
    my $refhashref=shift;
    my %refhash=%$refhashref;

    print '<form method="post" action="doc_to_refs.cgi" enctype="multipart/form-data" name="doc_to_refs_form">';
    my $x=0;
    my $ref,$textblob;
    foreach $ref (sort {$a <=> $b} keys %refhash){
        my $reference="reference"."$x";
        my $reftext="reftext"."$x";
        my $anchor="anchor"."$x";
        my $delete="delete"."$x";
        my $default=$refhash{$ref};

        print "Reference is: ".$query->textfield(-name=>$reference,-value=>$ref, -override=>1)."<br />";
        print $query->checkbox_group(-name=>$delete,-values=>$ref,-labels=>{$ref=>'Delete Me'})."<br />";
        print $query->checkbox_group(-name=>$anchor,-values=>$ref, -labels=>{$ref=>'Anchor Me (Will NOT get Randomised)'})."<br />";
        print "Content is: ".$query->textarea(-name=>$reftext, -default=>$default, -rows=>5, -override=>1, -cols=>100, -style=>"font-family:arial;width:98%")."<br />";
        print "<br /><br />";

        if((defined($query->param('Auto-HTML Tag'))) or (defined($query->param('PDF')))){
            $ref=~s/(\d+)/\<a id\=\"\"\>\<\/a\>/gi;
            $default=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s*)(\d+)/\<a href\=\"\#\"\>    \<\/a\>/gi;
        }       

        if(defined($query->param('Auto-ABML Tag'))){
            # $ref=~s/(\d+)/\<a id\=\"\"\>\<\/a\>/gi;
            $default=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s*)(\d+)/\&lt\;tt ref\=\"\"\&gt\;    \&lt\;\/tt\&gt\;/gi;
        }       

        $textblob.=$ref." ".$default."<br /><br />";

        $x++;
    }

    print $query->hidden(-name=>'references',-value=>$x,override=>1);
#   print $query->submit(-name=>'Save');
    print $query->submit('Randomise');
    print $query->submit('Auto-ABML Tag');  
    print $query->submit('Auto-HTML Tag');  
    print $query->submit('PDF');    
    print "</form><br /><br /><br />";
    $textblob=~s/\n/\<br \/\>/gi;

    print "<p>".$textblob."</p>";

}

sub output_pdf{
    my $refhashref=shift;
    my %refhash=%$refhashref;

    my $x=0;
    my $ref,$textblob;
    foreach $ref (sort {$a <=> $b} keys %refhash){
        my $reference="reference"."$x";
        my $reftext="reftext"."$x";
        my $anchor="anchor"."$x";
        my $delete="delete"."$x";
        my $default=$refhash{$ref};

        if((defined($query->param('Auto-HTML Tag'))) or (defined($query->param('PDF')))){
            $ref=~s/(\d+)/\<a id\=\"\"\>\<\/a\>/gi;
            $default=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s*)(\d+)/\<a href\=\"\#\"\>    \<\/a\>/gi;
        }       

        if(defined($query->param('Auto-ABML Tag'))){
            # $ref=~s/(\d+)/\<a id\=\"\"\>\<\/a\>/gi;
            $default=~s/(return\sto|go\sto|turn\sto)(\s+)(page|paragraph|reference|section)*(\s*)(\d+)/\&lt\;tt ref\=\"\"\&gt\;    \&lt\;\/tt\&gt\;/gi;
        }       

        $textblob.=$ref." ".$default."<br /><br />";

        $x++;
    }

    $textblob=~s/\n/\<br \/\>/gi;

    # print "<p>".$textblob."</p>";

    $textblob='<html><head></head><body>'.$textblob.'</body></html>';

    my $output;
    if(defined($query->param('PDF'))){
        my $pdf = PDF::FromHTML->new( encoding => 'utf-8' );
        $pdf->load_file($textblob);
        $pdf->convert(
            # With PDF::API2, font names such as 'traditional' also works
             Font        => 'Arial',
             LineHeight  => 10,
             Landscape   => 0,
        );
        $pdf->write_file($output);
        print $output;
    }
}

sub html_header{

}

sub html_footer{

}

如果你想要示例数据,请告诉我,我会把它上传到某个地方

"...对于较大的文档,处理在 PDF 的第 11 页停止..."

这似乎是由于 PDF::FromHTML::Template::Container::PageDef 中的一个错误。注意这一行:

last if $::x++ > 10;

这意味着它永远不会创建超过 11 个页面。我已经提交了 bug report