从受保护的 PDF 中读取附件

Reading attachment from a secured PDF

我正在处理一个 PDF 文件,这是一个受保护的文件,PDF 文件中附加了一个 excel。

以下是我试过的代码。

    static void Main(string[] args)
    {
        Program pgm = new Program();
        pgm.EmbedAttachments();
        //pgm.ExtractAttachments(pgm.pdfFile);
    }

    private void ExtractAttachments(string _pdfFile)
    {
        try
        {
            if (!Directory.Exists(attExtPath))
                Directory.CreateDirectory(attExtPath);

            byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
            //byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("Password");


            PdfDictionary documentNames = null;
            PdfDictionary embeddedFiles = null;
            PdfDictionary fileArray = null;
            PdfDictionary file = null;
            PRStream stream = null;

            //PdfReader reader = new PdfReader(_pdfFile);

            PdfReader reader = new PdfReader(_pdfFile, password);

            PdfDictionary catalog = reader.Catalog;

            documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));

            if (documentNames != null)
            {
                embeddedFiles = (PdfDictionary)PdfReader.GetPdfObject(documentNames.Get(PdfName.EMBEDDEDFILES));
                if (embeddedFiles != null)
                {
                    PdfArray filespecs = embeddedFiles.GetAsArray(PdfName.NAMES);

                    for (int i = 0; i < filespecs.Size; i++)
                    {
                        i++;
                        fileArray = filespecs.GetAsDict(i);
                        file = fileArray.GetAsDict(PdfName.EF);

                        foreach (PdfName key in file.Keys)
                        {
                            stream = (PRStream)PdfReader.GetPdfObject(file.GetAsIndirectObject(key));
                            string attachedFileName = fileArray.GetAsString(key).ToString();
                            byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);

                            System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
                        }

                    }
                }
                else
                    throw new Exception("Unable to Read the attachment or There may be no Attachment");
            }
            else
            {
                throw new Exception("Unable to Read the document");
            }

        }
        catch (Exception ex)
        {
            Console.WriteLine(ex.ToString());
            Console.ReadKey();
        }
    }

    private void EmbedAttachments()
    {
        try
        {

            if (File.Exists(pdfFile))
                File.Delete(pdfFile);

            Document PDFD = new Document(PageSize.LETTER);



            PdfWriter writer;
            writer = PdfWriter.GetInstance(PDFD, new FileStream(pdfFile, FileMode.Create));

            PDFD.Open();
            PDFD.NewPage();
            PDFD.Add(new Paragraph("This is test"));

            PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, @"C:\PDFReader.xls", "11.xls", null);

            //PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, attFile, "11", File.ReadAllBytes(attFile), true);
            writer.AddFileAttachment(pfs);
            //writer.AddAnnotation(PdfAnnotation.CreateFileAttachment(writer, new iTextSharp.text.Rectangle(100, 100, 100, 100), "File Attachment", PdfFileSpecification.FileExtern(writer, "C:\test.xml")));

            //writer.Close();
            PDFD.Close();

            Program pgm=new Program();

            using (Stream input = new FileStream(pgm.pdfFile, FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                using (Stream output = new FileStream(pgm.epdfFile, FileMode.Create, FileAccess.Write, FileShare.None))
                {
                    PdfReader reader = new PdfReader(input);
                    PdfEncryptor.Encrypt(reader, output, true, "Password", "secret", PdfWriter.ALLOW_SCREENREADERS);
                }
            }
        }
        catch (Exception ex)
        {
            Console.WriteLine(ex.StackTrace.ToString());
            Console.ReadKey();
        }
    }
}

以上代码包含创建带有 excel 附件的加密 PDF 以及提取附件。

现在真正的问题是我已经作为需求文档的文件(我无法共享文件),它也有一个 excel 附件,就像我的例子一样。

但是上面的代码适用于我创建的受保护的 PDF,但不适用于实际的受保护的 PDF。

调试的时候发现问题出在下面的代码

documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));

其中,

catalog.Get(PdfName.NAMES)

返回为 NULL,而我创建的文件提供了预期的输出。

以上内容请指导。

TIA。

正如 mkl 所建议的,它已作为带注释的附件附上。但示例中使用的引用提供了 ZipFile 方法,不再支持。因此,我在下面找到了备用代码。

public void ExtractAttachments(byte[] src)
    {
        PRStream stream = null;
        string attExtPath = @"C:\PDFReader\Extract\";

        if (!Directory.Exists(attExtPath))
            Directory.CreateDirectory(attExtPath);

        byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
        PdfReader reader = new PdfReader(src, password);
        for (int i = 1; i <= reader.NumberOfPages; i++)
        {
            PdfArray array = reader.GetPageN(i).GetAsArray(PdfName.ANNOTS);
            if (array == null) continue;
            for (int j = 0; j < array.Size; j++)
            {
                PdfDictionary annot = array.GetAsDict(j);
                if (PdfName.FILEATTACHMENT.Equals(
                    annot.GetAsName(PdfName.SUBTYPE)))
                {
                    PdfDictionary fs = annot.GetAsDict(PdfName.FS);
                    PdfDictionary refs = fs.GetAsDict(PdfName.EF);
                    foreach (PdfName name in refs.Keys)
                    {
                        //zip.AddEntry(
                        //  fs.GetAsString(name).ToString(),
                        //  PdfReader.GetStreamBytes((PRStream)refs.GetAsStream(name))
                        //);
                        stream = (PRStream)PdfReader.GetPdfObject(refs.GetAsIndirectObject(name));
                        string attachedFileName = fs.GetAsString(name).ToString();
                        var splitname = attachedFileName.Split('\');
                        if (splitname.Length != 1)
                            attachedFileName = splitname[splitname.Length - 1].ToString();
                        byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);

                        System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
                    }
                }
            }
        }
    }

如果可以通过任何其他方式实现,请告诉我。

谢谢!!!