TCPDF + FPDI : 确保所有字体完全嵌入

TCPDF + FPDI : Ensure that all fonts are fully embedded

我正在使用 TCPDF + FPDI。我想完全嵌入所有字体。使用 addTTFfont()SetFont() 方法时,字体嵌入效果很好。但是,当我尝试使用 FPDI 打开现有的 pdf 文件时,我无法获得已用字体的列表,也无法确定其中哪些已被嵌入。这是我正在尝试做的事情:

  1. 获取源pdf文件的字体信息(名称、类型等);
  2. 确定哪些字体没有完全嵌入;
  3. 嵌入这些字体。

到目前为止,我尝试让 TCPDF class protected 成为成员 fonts public。所以,我更改了 tcpdf.php 文件:

/**
 * Array of used fonts.
 * @protected
 */
public $fonts = array();

main.php

$pdf = new FPDI();
$pdf->setSourceFile('Simple.pdf');
$pdf->AddPage();
$tplIdx = $pdf->importPage(1);
$pdf->useTemplate($tplIdx, 10, 10, 200);
$pdf->setFontSubsetting(false);
var_dump($pdf->fonts);

输出:

array (size=2)
  'helvetica' => 
    array (size=17)
      'fontkey' => string 'helvetica' (length=9)
      'i' => int 1
      'type' => string 'core' (length=4)
      'name' => string 'Helvetica' (length=9)
      'desc' => 
        array (size=13)
          'Flags' => int 32
          'FontBBox' => string '[-166 -225 1000 931]' (length=20)
          'ItalicAngle' => int 0
          'Ascent' => int 931
          'Descent' => int -225
          'Leading' => int 0
          'CapHeight' => int 718
          'XHeight' => int 523
          'StemV' => int 88
          'StemH' => int 76
          'AvgWidth' => int 513
          'MaxWidth' => int 1015
          'MissingWidth' => int 513
      'up' => int -100
      'ut' => int 50
      'cw' => 
        array (size=256)
          0 => int 500
          1 => int 500
          2 => int 500
          ...
          ...

      'cbbox' => 
        array (size=0)
          empty
      'dw' => int 513
      'enc' => string '' (length=0)
      'cidinfo' => 
        array (size=4)
          'Registry' => string 'Adobe' (length=5)
          'Ordering' => string 'Identity' (length=8)
          'Supplement' => int 0
          'uni2cid' => 
            array (size=0)
              empty
      'file' => string '' (length=0)
      'ctg' => string '' (length=0)
      'subset' => boolean false
      'subsetchars' => 
        array (size=255)
          0 => boolean true
          1 => boolean true
          2 => boolean true
          3 => boolean true
          ...
          ...
      'n' => int 3
  'helveticaB' => 
    array (size=17)
      'fontkey' => string 'helveticaB' (length=10)
      'i' => int 2
      'type' => string 'core' (length=4)
      'name' => string 'Helvetica-Bold' (length=14)
      'desc' => 
        array (size=13)
          'Flags' => int 32
          'FontBBox' => string '[-170 -228 1003 962]' (length=20)
          'ItalicAngle' => int 0
          'Ascent' => int 962
          'Descent' => int -228
          'Leading' => int 0
          'CapHeight' => int 718
          'XHeight' => int 532
          'StemV' => int 140
          'StemH' => int 118
          'AvgWidth' => int 535
          'MaxWidth' => int 1000
          'MissingWidth' => int 535
      'up' => int -100
      'ut' => int 50
      'cw' => 
        array (size=256)
          0 => int 278
          1 => int 278
          2 => int 278
          3 => int 278
          ...
          ...
      'cbbox' => 
        array (size=0)
          empty
      'dw' => int 535
      'enc' => string '' (length=0)
      'cidinfo' => 
        array (size=4)
          'Registry' => string 'Adobe' (length=5)
          'Ordering' => string 'Identity' (length=8)
          'Supplement' => int 0
          'uni2cid' => 
            array (size=0)
              empty
      'file' => string '' (length=0)
      'ctg' => string '' (length=0)
      'subset' => boolean false
      'subsetchars' => 
        array (size=255)
          0 => boolean true
          1 => boolean true
          2 => boolean true
          3 => boolean true
          ...
          ...
      'n' => int 5

有关于 2 种字体的信息:“helvetica”(根据 TCPDF 文档,这是默认字体)和“helveticaB”。但是源文件也包含其他字体。它们没有列出。如何列出它们?

提前致谢...

您需要确保导入的 document/pages 已预先嵌入所有字体。无法保证导入页面中的字体是通过FPDI嵌入的。

您也不能 access/list 带有 FPDI 的字体。

在深入研究 .pdf 文件并阅读 pdf 文档的一些主题后,我设法以某种方式解决了问题。以下代码只是草稿,需要大量改进。我已经测试了几个 .pdf 文件并且它有效,但是有很多东西需要更好的方法。

在尝试解决问题时,我决定保留 TCPDF 和 FPDI 库不变。我没有使用过这些库的任何方法,因为我不知道这些库的内部功能。

P.S:不幸的是,我没有太多时间而且我的代码很长(而且不稳定):

 <?php

require_once('tcpdf.php');
require_once('fpdi.php');

        class PDF extends FPDI
        {
            var $_tplIdx;
            private $objects;
            private $font_list;
            private $raw_data;
            private $raw_data_len;
            private $font_read_offset;
            private $embedded_font_addr;

            private $is_descendant;

            function __construct($filename)
            {
                $this->objects = array();
                $this->font_list = array();
                $this->raw_data = file_get_contents($filename);
                $this->raw_data_len = strlen($this->raw_data);
                $this->font_read_offset = 0;

                $this->is_descendant = false;

                parent::__construct();
            }

            private function EmbeddedFontRefs()
            {
                do
                {
                    $f = $this->GetXObj('Subtype /CIDFontType2');
                    if($f == NULL)
                        return;

                    //var_dump($f);
                    $keys = array_keys($f);
                    $f = $f[$keys[0]];
                    $name = '';
                    $ref = '';
                    if(preg_match('/Subtype \/CIDFontType2/', $f))
                    {
                        $matches = array();
                        if(preg_match('/BaseFont \/\S+/', $f, $matches))
                            $name = substr($matches[0], 10);

                        if(preg_match('/FontDescriptor \d+ \d+ R+/', $f, $matches))
                        {
                            $ref = substr($matches[0], 15);
                            $ref = $this->GetObjFromRef($ref);
                        }

                        $this->embedded_font_addr[$name] = $ref;
                    }
                }while($f !== NULL);
            }

            private function ParseObj($obj_search_string, $referrer_object_addr=NULL, $referrer_object_name=NULL)
            {

            $object_data = $this->GetObj($obj_search_string);
            if($object_data == NULL)
                return false;

            $object_addr = array_keys($object_data);
            $object_addr = $object_addr[0];
            $object_raw_data = $object_data[$object_addr];

            $embedded = false;
            $subset = false;

            if(!array_key_exists($object_addr, $this->objects))
            {
                $this->objects[$object_addr] = array();

                $obj_content_items = preg_split('/\n\r?/', $object_raw_data);

                for($i = 0; $i < count($obj_content_items); $i++)
                {
                        $item = explode(' ', $obj_content_items[$i], 2);

                        if($item[0][0] == '/')
                            $item[0] = substr($item[0], 1);

                        if($item[1][0] == '/')
                            $item[1] = substr($item[1], 1);

                        if(($i + 1) < count($obj_content_items))
                            $item[1] = substr($item[1], 0, -1);

                        if(($item[0] == 'FontFile') || ($item[0] == 'FontFile2') || ($item[0] == 'FontFile3'))
                            $embedded = true;
                        else if(($item[0] == 'BaseFont') || ($item[0] == 'FontName'))
                        {
                            $item[1] = str_replace('#20', ' ', $item[1]);

                            if(preg_match('/[A-Z]{5}\+/', $item[1]))
                                $subset = true;
                        }

                        $this->objects[$object_addr][$item[0]] = $item[1]; // item_name => item_value
                }
            }


            if($referrer_object_addr == NULL)
            {   
                if(array_key_exists('Subtype', $this->objects[$object_addr]))
                {
                    if($this->objects[$object_addr]['Subtype'] == 'Type0') //Composite Fonts
                    {
                        if(array_key_exists('DescendantFonts', $this->objects[$object_addr]))
                        {
                            $descendant_fonts_obj_addr = $this->GetObjFromRef($this->objects[$object_addr]['DescendantFonts']);

                            array_push($this->font_list, '');
                            $this->font_list[count($this->font_list)-1] = $this->objects[$object_addr];

                            $this->ParseObj($descendant_fonts_obj_addr, $object_addr, 'DescendantFonts');
                        }
                    }
                }

                if(array_key_exists('FontDescriptor', $this->objects[$object_addr]))
                {
                    $font_desc_obj_addr = $this->GetObjFromRef($this->objects[$object_addr]['FontDescriptor']);
                    array_push($this->font_list, '');
                    $this->font_list[count($this->font_list)-1] = $this->objects[$object_addr];
                    $this->ParseObj($font_desc_obj_addr, $object_addr, 'FontDescriptor');
                }
            }
            else
            {
                if(array_key_exists($referrer_object_addr, $this->objects))
                {
                if($referrer_object_name == 'DescendantFonts')
                {
                    $this->font_list[count($this->font_list)-1]['DescendantFonts'] = array();
                    array_push($this->font_list[count($this->font_list)-1]['DescendantFonts'], $this->objects[$object_addr]);

                    $this->is_descendant = true;

                    if(array_key_exists('FontDescriptor', $this->objects[$object_addr]))
                    {
                        $font_desc_obj_addr = $this->GetObjFromRef($this->objects[$object_addr]['FontDescriptor']);
                        $this->ParseObj($font_desc_obj_addr, $object_addr, 'FontDescriptor');
                    }
                }
                else if($referrer_object_name == 'FontDescriptor')
                {
                    if(array_key_exists('Type', $this->objects[$object_addr]))
                        unset($this->objects[$object_addr]['Type']);

                    //Embedded or not?
                    $keys = array_keys($this->objects[$object_addr]);
                    foreach($keys as $key)
                    {
                        if(($key == 'FontFile') || ($key == 'FontFile2') || ($key == 'FontFile3'))
                        {
                            $embedded = true;
                            break;
                        }
                    }
                    if(!$this->is_descendant)
                    {
                        $this->font_list[count($this->font_list)-1]['Embedded'] = $embedded;
                        $this->font_list[count($this->font_list)-1]['Subset'] = $subset;
                        $this->font_list[count($this->font_list)-1]['FontDescriptor'] = $this->objects[$object_addr];
                    }
                    else
                    {
                        $this->font_list[count($this->font_list)-1]['DescendantFonts'][0]['Embedded'] = $embedded;
                        $this->font_list[count($this->font_list)-1]['DescendantFonts'][0]['Subset'] = $subset;
                        $this->font_list[count($this->font_list)-1]['DescendantFonts'][0]['FontDescriptor'] = $this->objects[$object_addr];
                        $this->is_descendant = false;
                    }
                }
            }
            }
            return true;
        }

        private function GetObj($obj_search_string)
        {
            $obj_offset = strpos($this->raw_data, $obj_search_string, $this->font_read_offset);
            if($obj_offset == false)
            {
                return NULL;
            }

            $obj_start = 0;
            $obj_end = 0;
            $object_content_start = 0;
            $object_content_end = 0;
            $obj_start = strrpos($this->raw_data, 'endobj', $obj_offset - $this->raw_data_len) + 8; // for 'endobj\n\r';
            $obj_end = strpos($this->raw_data, 'endobj', $obj_start);
            $this->font_read_offset = $obj_end;
            $object_content_start = strpos($this->raw_data, '<<', $obj_start) + 2; //for '<<'
            $object_content_end = strpos($this->raw_data, '>>', $object_content_start) - 2; //for /n/r;
            $object_addr = substr($this->raw_data, $obj_start, $object_content_start - $obj_start - 4); // -2 for /n/r; 

            $object_raw_data = substr($this->raw_data, $object_content_start, $object_content_end - $object_content_start);

            return array($object_addr => $object_raw_data);
        }

            private function GetXObj($obj_search_string)
        {
            $obj_offset = strpos($this->raw_data, $obj_search_string, $this->font_read_offset);
            if($obj_offset == false)
                return NULL;

            $obj_start = 0;
            $obj_end = 0;
            $object_content_start = 0;
            $object_content_end = 0;
            $obj_start = strrpos($this->raw_data, 'endobj', $obj_offset - $this->raw_data_len) + 8; // for 'endobj\n\r';
            $obj_end = strpos($this->raw_data, 'endobj', $obj_start);
            $this->font_read_offset = $obj_end;
            $object_content_start = strpos($this->raw_data, '<<', $obj_start) + 2; //for '<<'
            $object_content_end = $obj_end - 1;
            $object_addr = substr($this->raw_data, $obj_start, $object_content_start - $obj_start - 3); // -2 for /n/r; 

            $object_raw_data = substr($this->raw_data, $object_content_start, $object_content_end - $object_content_start);

            return array($object_addr => $object_raw_data);
        }

            private function GetFontDescriptor($font_object_addr)
            {
            if(array_key_exists('FontDescriptor', $this->objects[$font_object_addr]))
                $font_desc_ref = $this->objects[$font_object_addr]['FontDescriptor'];
                $font_desc_obj_addr = $this->GetObjFromRef($font_desc_ref);
                $this->ParseObj($font_desc_obj_addr, $font_object_addr);
            }
            }

            private function GetObjFromRef($obj_ref)
            {
                $obj_ref_num = explode(' ', $obj_ref);
                $obj = '';
                if(end($obj_ref_num) == 'R')
                {
                    foreach($obj_ref_num as $num)
                    {
                        if($num != 'R')
                            $obj .= (string)$num. ' ';
                        else
                            $obj .= 'obj';
                    }
                }
                else if(end($obj_ref_num) == 'obj')
                    $obj = $obj_ref;

                return $obj;
            }


            public function EnsureFonts($filename)
            {
                $this->raw_data = file_get_contents($filename);
                $this->raw_data_len = strlen($this->raw_data);
                $this->font_read_offset = 0;

                $this->EmbeddedFontRefs();
                $this->font_read_offset = 0;

                $font_objs = array();
                do
                {
                    $xobj = $this->GetXObj('/Type /XObject');
                    if($xobj !== NULL)
                    {
                        $xobj_addr = array_keys($xobj);
                        $xobj_addr = $xobj_addr[0];
                        $matches = array();
                        if(preg_match('/Resources \d+ \d+ R/', $xobj[$xobj_addr], $matches))
                        {
                            array_push($font_objs, $this->GetObjFromRef(substr($matches[0], 10)));  //Resources+' '
                        }
                    }
                }while($xobj !== NULL);


                $fxobjs = array();
                foreach($font_objs as $font_obj)
                {
                        $fobj = $this->GetXObj($font_obj);
                        if($fobj !== NULL)
                        {
                            $xobj_addr = array_keys($fobj);
                            $xobj_addr = $xobj_addr[0];
                            $matches = array();
                            if(preg_match('/Font \d+ \d+ R/', $fobj[$xobj_addr], $matches))
                            {
                                array_push($fxobjs, $this->GetObjFromRef(substr($matches[0], 10))); //Resources+' '
                            }
                        }
                }

                $actual_font_objs = array();
                foreach($fxobjs as $fxobj)
                {
                        $f_obj = $this->GetXObj($fxobj);
                        if($f_obj !== NULL)
                        {
                            $xobj_addr = array_keys($f_obj);
                            $xobj_addr = $xobj_addr[0];
                            $matches = array();
                            if(preg_match('/F\d+/', $f_obj[$xobj_addr], $matches))
                            {
                                array_push($actual_font_objs, $this->GetObjFromRef(substr($matches[0], 10)));   //Resources+' '
                            }
                        }
                }

                foreach($actual_font_objs as $fo)
                {
                    $f = $this->GetXObj($fxobj);
                        if($f !== NULL)
                        {
                            $xobj_addr = array_keys($f);
                            $xobj_addr = $xobj_addr[0];
                            $matches = array();
                            $f_name = '';
                            if(preg_match('/BaseFont \/\S+/', $f[$xobj_addr], $matches))
                            {
                                $f_name = substr($matches[0], 10); // BaseFont+' '+'/'
                            }
                            if(preg_match('/FontDescriptor \d+ \d+ R/', $f[$xobj_addr], $matches))
                            {
                                $fd_replace_start_pos = strpos($this->raw_data, $matches[0]) + 15; //FontDescriptor+' ';
                                $fd_replace_end_pos = strpos($this->raw_data, 'R', $fd_replace_start_pos) + 1; //For 'R';

                                $s = substr($this->raw_data, 0, $fd_replace_start_pos);
                                $r = substr($this->embedded_font_addr[$f_name], 0, -3).'R'; //-4 for obj.
                                $e = substr($this->raw_data, $fd_replace_end_pos);

                                $this->raw_data = $s . $r . $e;
                            }
                        }
                }
                file_put_contents($filename, $this->raw_data);
            }

            public function GetFontInfo()
            {
                do
                {
                    $this->ParseObj('/Type /Font');
                }while($this->ParseObj('/Type /Font') == true);

                return $this->GetNotEmbeddedFonts();
            }

            private function GetNotEmbeddedFonts()
            {
                $result = array();
                foreach($this->font_list as $font)
                {
                    if(array_key_exists('DescendantFonts', $font))
                    {
                        foreach($font['DescendantFonts'] as $desc_font)
                        {
                            if((array_key_exists('Embedded', $desc_font)) && (array_key_exists('BaseFont', $desc_font)))
                                if(!$desc_font['Embedded'])
                                    array_push($result, $desc_font['BaseFont']);
                        }
                    }
                    else
                    {
                        if((array_key_exists('Embedded', $font)) && (array_key_exists('BaseFont', $font)))
                            if(!$font['Embedded'])
                                array_push($result, $font['BaseFont']);
                    }
                }

                for($i= 0; $i<count($result); $i++)
                {
                    if(strpos($result[$i], ' ') || strpos($result[$i], ','))    //Adobe Acrobat system font name list. Needs improvment..
                    {
                        switch($r)
                        {
                            case 'Times New Roman':
                                $result[$i] = 'TimesNewRomanPSMT';
                                break;
                            case 'Times New Roman,Bold':
                                $result[$i] = 'TimesNewRomanPS-BoldMT';
                                break;
                            case 'Times New Roman,Italic':
                                $result[$i] = 'TimesNewRomanPS-ItalicMT';
                                break;
                            case 'Times New Roman,BoldItalic':
                                $result[$i] = 'TimesNewRomanPS-BoldItalicMT';
                                break;
                            case 'Aial':
                                $result[$i] = 'ArialMT';
                                break;
                        }
                    }
                }
                return $result;
            }
        }

        $pdf = new PDF($filename);
        $pdf->setSourceFile($filename);
        $pdf->AddPage();
        $tplIdx = $pdf->importPage(1);
        $pdf->useTemplate($tplIdx);

        $pdf->SetMargins(PDF_MARGIN_LEFT, 40, PDF_MARGIN_RIGHT);
        $pdf->SetAutoPageBreak(true, 40);
        $pdf->setFontSubsetting(false);

        $not_embedded_fonts = $pdf->GetFontInfo();

        foreach($not_embedded_fonts as $f)
        {
            $font_name = strtolower($f);
            $font_name = preg_replace('/[^a-z0-9_]/', '', $font_name);
            $search  = array('bold', 'oblique', 'italic', 'regular');
            $replace = array('b', 'i', 'i', '');
            $font_name = str_replace($search, $replace, $font_name);

            if($pdf->AddFont($font_name) !== false)
                $pdf->SetFont($font_name);
            else
                print('"' .$font_name. '" font not found!');
        }

        $pdf->Output();
        ?>