从受保护的 PDF 中读取附件
Reading attachment from a secured PDF
我正在处理一个 PDF 文件,这是一个受保护的文件,PDF 文件中附加了一个 excel。
以下是我试过的代码。
static void Main(string[] args)
{
Program pgm = new Program();
pgm.EmbedAttachments();
//pgm.ExtractAttachments(pgm.pdfFile);
}
private void ExtractAttachments(string _pdfFile)
{
try
{
if (!Directory.Exists(attExtPath))
Directory.CreateDirectory(attExtPath);
byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
//byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("Password");
PdfDictionary documentNames = null;
PdfDictionary embeddedFiles = null;
PdfDictionary fileArray = null;
PdfDictionary file = null;
PRStream stream = null;
//PdfReader reader = new PdfReader(_pdfFile);
PdfReader reader = new PdfReader(_pdfFile, password);
PdfDictionary catalog = reader.Catalog;
documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));
if (documentNames != null)
{
embeddedFiles = (PdfDictionary)PdfReader.GetPdfObject(documentNames.Get(PdfName.EMBEDDEDFILES));
if (embeddedFiles != null)
{
PdfArray filespecs = embeddedFiles.GetAsArray(PdfName.NAMES);
for (int i = 0; i < filespecs.Size; i++)
{
i++;
fileArray = filespecs.GetAsDict(i);
file = fileArray.GetAsDict(PdfName.EF);
foreach (PdfName key in file.Keys)
{
stream = (PRStream)PdfReader.GetPdfObject(file.GetAsIndirectObject(key));
string attachedFileName = fileArray.GetAsString(key).ToString();
byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);
System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
}
}
}
else
throw new Exception("Unable to Read the attachment or There may be no Attachment");
}
else
{
throw new Exception("Unable to Read the document");
}
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
Console.ReadKey();
}
}
private void EmbedAttachments()
{
try
{
if (File.Exists(pdfFile))
File.Delete(pdfFile);
Document PDFD = new Document(PageSize.LETTER);
PdfWriter writer;
writer = PdfWriter.GetInstance(PDFD, new FileStream(pdfFile, FileMode.Create));
PDFD.Open();
PDFD.NewPage();
PDFD.Add(new Paragraph("This is test"));
PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, @"C:\PDFReader.xls", "11.xls", null);
//PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, attFile, "11", File.ReadAllBytes(attFile), true);
writer.AddFileAttachment(pfs);
//writer.AddAnnotation(PdfAnnotation.CreateFileAttachment(writer, new iTextSharp.text.Rectangle(100, 100, 100, 100), "File Attachment", PdfFileSpecification.FileExtern(writer, "C:\test.xml")));
//writer.Close();
PDFD.Close();
Program pgm=new Program();
using (Stream input = new FileStream(pgm.pdfFile, FileMode.Open, FileAccess.Read, FileShare.Read))
{
using (Stream output = new FileStream(pgm.epdfFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
PdfReader reader = new PdfReader(input);
PdfEncryptor.Encrypt(reader, output, true, "Password", "secret", PdfWriter.ALLOW_SCREENREADERS);
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.StackTrace.ToString());
Console.ReadKey();
}
}
}
以上代码包含创建带有 excel 附件的加密 PDF 以及提取附件。
现在真正的问题是我已经作为需求文档的文件(我无法共享文件),它也有一个 excel 附件,就像我的例子一样。
但是上面的代码适用于我创建的受保护的 PDF,但不适用于实际的受保护的 PDF。
调试的时候发现问题出在下面的代码
documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));
其中,
catalog.Get(PdfName.NAMES)
返回为 NULL,而我创建的文件提供了预期的输出。
以上内容请指导。
TIA。
正如 mkl 所建议的,它已作为带注释的附件附上。但示例中使用的引用提供了 ZipFile 方法,不再支持。因此,我在下面找到了备用代码。
public void ExtractAttachments(byte[] src)
{
PRStream stream = null;
string attExtPath = @"C:\PDFReader\Extract\";
if (!Directory.Exists(attExtPath))
Directory.CreateDirectory(attExtPath);
byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
PdfReader reader = new PdfReader(src, password);
for (int i = 1; i <= reader.NumberOfPages; i++)
{
PdfArray array = reader.GetPageN(i).GetAsArray(PdfName.ANNOTS);
if (array == null) continue;
for (int j = 0; j < array.Size; j++)
{
PdfDictionary annot = array.GetAsDict(j);
if (PdfName.FILEATTACHMENT.Equals(
annot.GetAsName(PdfName.SUBTYPE)))
{
PdfDictionary fs = annot.GetAsDict(PdfName.FS);
PdfDictionary refs = fs.GetAsDict(PdfName.EF);
foreach (PdfName name in refs.Keys)
{
//zip.AddEntry(
// fs.GetAsString(name).ToString(),
// PdfReader.GetStreamBytes((PRStream)refs.GetAsStream(name))
//);
stream = (PRStream)PdfReader.GetPdfObject(refs.GetAsIndirectObject(name));
string attachedFileName = fs.GetAsString(name).ToString();
var splitname = attachedFileName.Split('\');
if (splitname.Length != 1)
attachedFileName = splitname[splitname.Length - 1].ToString();
byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);
System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
}
}
}
}
}
如果可以通过任何其他方式实现,请告诉我。
谢谢!!!
我正在处理一个 PDF 文件,这是一个受保护的文件,PDF 文件中附加了一个 excel。
以下是我试过的代码。
static void Main(string[] args)
{
Program pgm = new Program();
pgm.EmbedAttachments();
//pgm.ExtractAttachments(pgm.pdfFile);
}
private void ExtractAttachments(string _pdfFile)
{
try
{
if (!Directory.Exists(attExtPath))
Directory.CreateDirectory(attExtPath);
byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
//byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("Password");
PdfDictionary documentNames = null;
PdfDictionary embeddedFiles = null;
PdfDictionary fileArray = null;
PdfDictionary file = null;
PRStream stream = null;
//PdfReader reader = new PdfReader(_pdfFile);
PdfReader reader = new PdfReader(_pdfFile, password);
PdfDictionary catalog = reader.Catalog;
documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));
if (documentNames != null)
{
embeddedFiles = (PdfDictionary)PdfReader.GetPdfObject(documentNames.Get(PdfName.EMBEDDEDFILES));
if (embeddedFiles != null)
{
PdfArray filespecs = embeddedFiles.GetAsArray(PdfName.NAMES);
for (int i = 0; i < filespecs.Size; i++)
{
i++;
fileArray = filespecs.GetAsDict(i);
file = fileArray.GetAsDict(PdfName.EF);
foreach (PdfName key in file.Keys)
{
stream = (PRStream)PdfReader.GetPdfObject(file.GetAsIndirectObject(key));
string attachedFileName = fileArray.GetAsString(key).ToString();
byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);
System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
}
}
}
else
throw new Exception("Unable to Read the attachment or There may be no Attachment");
}
else
{
throw new Exception("Unable to Read the document");
}
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
Console.ReadKey();
}
}
private void EmbedAttachments()
{
try
{
if (File.Exists(pdfFile))
File.Delete(pdfFile);
Document PDFD = new Document(PageSize.LETTER);
PdfWriter writer;
writer = PdfWriter.GetInstance(PDFD, new FileStream(pdfFile, FileMode.Create));
PDFD.Open();
PDFD.NewPage();
PDFD.Add(new Paragraph("This is test"));
PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, @"C:\PDFReader.xls", "11.xls", null);
//PdfFileSpecification pfs = PdfFileSpecification.FileEmbedded(writer, attFile, "11", File.ReadAllBytes(attFile), true);
writer.AddFileAttachment(pfs);
//writer.AddAnnotation(PdfAnnotation.CreateFileAttachment(writer, new iTextSharp.text.Rectangle(100, 100, 100, 100), "File Attachment", PdfFileSpecification.FileExtern(writer, "C:\test.xml")));
//writer.Close();
PDFD.Close();
Program pgm=new Program();
using (Stream input = new FileStream(pgm.pdfFile, FileMode.Open, FileAccess.Read, FileShare.Read))
{
using (Stream output = new FileStream(pgm.epdfFile, FileMode.Create, FileAccess.Write, FileShare.None))
{
PdfReader reader = new PdfReader(input);
PdfEncryptor.Encrypt(reader, output, true, "Password", "secret", PdfWriter.ALLOW_SCREENREADERS);
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.StackTrace.ToString());
Console.ReadKey();
}
}
}
以上代码包含创建带有 excel 附件的加密 PDF 以及提取附件。
现在真正的问题是我已经作为需求文档的文件(我无法共享文件),它也有一个 excel 附件,就像我的例子一样。
但是上面的代码适用于我创建的受保护的 PDF,但不适用于实际的受保护的 PDF。
调试的时候发现问题出在下面的代码
documentNames = (PdfDictionary)PdfReader.GetPdfObject(catalog.Get(PdfName.NAMES));
其中,
catalog.Get(PdfName.NAMES)
返回为 NULL,而我创建的文件提供了预期的输出。
以上内容请指导。
TIA。
正如 mkl 所建议的,它已作为带注释的附件附上。但示例中使用的引用提供了 ZipFile 方法,不再支持。因此,我在下面找到了备用代码。
public void ExtractAttachments(byte[] src)
{
PRStream stream = null;
string attExtPath = @"C:\PDFReader\Extract\";
if (!Directory.Exists(attExtPath))
Directory.CreateDirectory(attExtPath);
byte[] password = System.Text.ASCIIEncoding.ASCII.GetBytes("TFAER13052016");
PdfReader reader = new PdfReader(src, password);
for (int i = 1; i <= reader.NumberOfPages; i++)
{
PdfArray array = reader.GetPageN(i).GetAsArray(PdfName.ANNOTS);
if (array == null) continue;
for (int j = 0; j < array.Size; j++)
{
PdfDictionary annot = array.GetAsDict(j);
if (PdfName.FILEATTACHMENT.Equals(
annot.GetAsName(PdfName.SUBTYPE)))
{
PdfDictionary fs = annot.GetAsDict(PdfName.FS);
PdfDictionary refs = fs.GetAsDict(PdfName.EF);
foreach (PdfName name in refs.Keys)
{
//zip.AddEntry(
// fs.GetAsString(name).ToString(),
// PdfReader.GetStreamBytes((PRStream)refs.GetAsStream(name))
//);
stream = (PRStream)PdfReader.GetPdfObject(refs.GetAsIndirectObject(name));
string attachedFileName = fs.GetAsString(name).ToString();
var splitname = attachedFileName.Split('\');
if (splitname.Length != 1)
attachedFileName = splitname[splitname.Length - 1].ToString();
byte[] attachedFileBytes = PdfReader.GetStreamBytes(stream);
System.IO.File.WriteAllBytes(attExtPath + attachedFileName, attachedFileBytes);
}
}
}
}
}
如果可以通过任何其他方式实现,请告诉我。
谢谢!!!