将 PDF 页面从纵向更改为横向
Change PDF pages from portrait to landscape
给定一个包含纵向页面的现有 PDF 文件,我如何以编程方式(使用 .NET)处理该文件以在横向页面上生成具有相同内容的新文件。
新页面应充分利用可用的横向宽度。
页数可能会增加,因为现有的纵向页面可能无法放入单个横向页面。
背景故事:我们使用 Google Sheets REST API 生成 pdf 文档。如果有很多列,文档可能会很宽。不幸的是,Google Drive REST API 始终以纵向模式导出并且不提供更改为横向的选项。
这是我们需要处理的示例 PDF 文件:https://drive.google.com/file/d/1dVf1GD7zmDx9wJhseGEbfPCVYTJbN-uG/view?usp=sharing
使用 iTextSharp(iText for .Net v5.5.13)和 PdfVeryDenseMergeTool
和 PageVerticalAnalyzer
类 从 (in "UPDATE 2" and "UPDATE 3" where the OP posted his port of the Java solution from 到 C#)像这样:
List<PdfReader> Files = new List<PdfReader>();
Files.Add(new PdfReader(@"Example.pdf"));
PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(new RectangleReadOnly(595, 420), 18, 18, 10);
using (MemoryStream ms = new MemoryStream())
{
tool.Merge(ms, Files);
byte[] bytes = ms.ToArray();
// bytes contains the result
}
我得到一个五页的横向结果 PDF,如下所示:
您可以使用 Docotic.Pdf library 来做到这一点。最简单的解决方案是将每个源页面转换为 XObject,然后将其缩放为横向并在多个目标页面上绘制。
示例如下:
using System.Linq;
using BitMiracle.Docotic.Pdf;
namespace SplitToMultiplePages
{
public static class SplitToMultiplePages
{
public static void Main()
{
// NOTE:
// When used in trial mode, the library imposes some restrictions.
// Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
// for more information.
BitMiracle.Docotic.LicenseManager.AddLicenseData("temporary or permanent license key here");
using (var src = new PdfDocument(@"Example.pdf"))
{
// Calculate common parameters based on the first page.
// That makes sense when all pages have the same size, portrait orientation, and margins.
PdfPage srcPage = src.Pages[0];
PdfCollection<PdfTextData> words = srcPage.GetWords();
double topMargin = words[0].Position.Y;
double bottomMargin = srcPage.Height - words[words.Count - 1].Bounds.Bottom;
double scale = srcPage.Height / srcPage.Width;
const int BorderHeight = 1;
// This sample shows how to convert existing PDF content in portrait orientation to landscape
Debug.Assert(scale > 1);
using (var dest = new PdfDocument())
{
bool addDestPage = false;
double destPageY = topMargin;
for (int s = 0; s < src.PageCount; ++s)
{
if (s > 0)
{
srcPage = src.Pages[s];
words = srcPage.GetWords();
}
// skip empty pages
if (words.Count == 0)
continue;
// Get content of the source page, scale to landscape and draw on multiple pages
double textStartY = words[0].Bounds.Top;
double[] lineBottomPositions = words
.Select(w => (w.Bounds.Bottom - textStartY + BorderHeight) * scale)
.Distinct()
.ToArray();
double contentHeight = lineBottomPositions[lineBottomPositions.Length - 1];
PdfXObject xobj = dest.CreateXObject(srcPage);
double remainingHeight = contentHeight;
while (true)
{
PdfPage destPage = addDestPage ? dest.AddPage() : dest.Pages[dest.PageCount - 1];
destPage.Width = srcPage.Height;
destPage.Height = srcPage.Width;
double availableHeight = destPage.Height - destPageY - bottomMargin;
if (remainingHeight > availableHeight)
availableHeight = adjustToNearestLine(availableHeight, lineBottomPositions);
PdfCanvas destCanvas = destPage.Canvas;
destCanvas.SaveState();
destCanvas.TranslateTransform(0, destPageY);
destCanvas.AppendRectangle(new PdfRectangle(0, 0, destPage.Width, availableHeight), 0);
destCanvas.SetClip(PdfFillMode.Winding);
double y = -topMargin * scale - (contentHeight - remainingHeight);
destCanvas.DrawXObject(xobj, 0, y, xobj.Width * scale, xobj.Height * scale, 0);
destCanvas.RestoreState();
if (remainingHeight <= availableHeight)
{
// Move to next source page
addDestPage = false;
destPageY = remainingHeight + bottomMargin;
break;
}
// Need more pages in the resulting document
remainingHeight -= availableHeight;
addDestPage = true;
destPageY = topMargin;
}
}
// Optionally you can use Single Column layout by default
//dest.PageLayout = PdfPageLayout.OneColumn;
dest.Save("SplitToMultiplePages.pdf");
}
}
}
private static double adjustToNearestLine(double height, double[] lineHeights)
{
// TODO: Use binary search for better performance
for (int i = lineHeights.Length - 1; i >= 0; --i)
{
double lh = lineHeights[i];
if (height > lh)
return lh;
}
return lineHeights[0];
}
}
}
示例产生以下结果:https://drive.google.com/file/d/1ITtV3Uw84wKd9mouV4kBpPoeWtsHlB9A/view?usp=sharing
根据您的要求,您还可以跳过 headers 除第一页以外的所有页面。这是此案例的示例:
using System.Linq;
using BitMiracle.Docotic.Pdf;
namespace SplitToMultiplePages
{
public static class SplitToMultiplePages
{
public static void Main()
{
// NOTE:
// When used in trial mode, the library imposes some restrictions.
// Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
// for more information.
BitMiracle.Docotic.LicenseManager.AddLicenseData("temporary or permanent license key here");
using (var src = new PdfDocument(@"Example.pdf"))
{
// Calculate common parameters based on the first page.
// That makes sense when all pages have the same size, portrait orientation, and margins.
PdfPage srcPage = src.Pages[0];
PdfCollection<PdfTextData> words = srcPage.GetWords();
double topMargin = words[0].Position.Y;
double bottomMargin = srcPage.Height - words[words.Count - 1].Bounds.Bottom;
double scale = srcPage.Height / srcPage.Width;
const int BorderHeight = 1;
// This sample shows how to convert existing PDF content in portrait orientation to landscape
Debug.Assert(scale > 1);
using (var dest = new PdfDocument())
{
bool addDestPage = false;
double destPageY = topMargin;
for (int s = 0; s < src.PageCount; ++s)
{
if (s > 0)
{
srcPage = src.Pages[s];
words = srcPage.GetWords();
}
// skip empty pages
if (words.Count == 0)
continue;
// Get content of the source page, scale to landscape and draw on multiple pages
double textStartY = words[0].Bounds.Top;
// Skip the header line of all pages except first
if (s > 0)
{
double? firstDataRowY = words.Select(w => w.Bounds.Top).FirstOrDefault(y => y > textStartY);
if (!firstDataRowY.HasValue)
continue;
textStartY = firstDataRowY.Value;
}
double[] lineBottomPositions = words
.Select(w => (w.Bounds.Bottom - textStartY + BorderHeight) * scale)
.Distinct()
.ToArray();
double contentHeight = lineBottomPositions[lineBottomPositions.Length - 1];
PdfXObject xobj = dest.CreateXObject(srcPage);
double remainingHeight = contentHeight;
while (true)
{
PdfPage destPage = addDestPage ? dest.AddPage() : dest.Pages[dest.PageCount - 1];
destPage.Width = srcPage.Height;
destPage.Height = srcPage.Width;
double availableHeight = destPage.Height - destPageY - bottomMargin;
if (remainingHeight > availableHeight)
availableHeight = adjustToNearestLine(availableHeight, lineBottomPositions);
PdfCanvas destCanvas = destPage.Canvas;
destCanvas.SaveState();
destCanvas.TranslateTransform(0, destPageY);
destCanvas.AppendRectangle(new PdfRectangle(0, 0, destPage.Width, availableHeight), 0);
destCanvas.SetClip(PdfFillMode.Winding);
double y = -textStartY * scale - (contentHeight - remainingHeight);
destCanvas.DrawXObject(xobj, 0, y, xobj.Width * scale, xobj.Height * scale, 0);
destCanvas.RestoreState();
if (remainingHeight <= availableHeight)
{
// Move to the next source page
addDestPage = false;
destPageY = remainingHeight + bottomMargin;
break;
}
// Need more pages in the resulting document
remainingHeight -= availableHeight;
addDestPage = true;
destPageY = topMargin;
}
}
// Optionally you can use Single Column layout by default
//dest.PageLayout = PdfPageLayout.OneColumn;
dest.Save("SplitToMultiplePages.pdf");
}
}
}
private static double adjustToNearestLine(double height, double[] lineHeights)
{
// TODO: Use binary search for better performance
for (int i = lineHeights.Length - 1; i >= 0; --i)
{
double lh = lineHeights[i];
if (height > lh)
return lh;
}
return lineHeights[0];
}
}
}
“跳过 headers”样本的结果文件:https://drive.google.com/file/d/1v9lPYIposkNNgheUzz8kD3XSwMxGBJIz/view?usp=sharing
给定一个包含纵向页面的现有 PDF 文件,我如何以编程方式(使用 .NET)处理该文件以在横向页面上生成具有相同内容的新文件。
新页面应充分利用可用的横向宽度。 页数可能会增加,因为现有的纵向页面可能无法放入单个横向页面。
背景故事:我们使用 Google Sheets REST API 生成 pdf 文档。如果有很多列,文档可能会很宽。不幸的是,Google Drive REST API 始终以纵向模式导出并且不提供更改为横向的选项。
这是我们需要处理的示例 PDF 文件:https://drive.google.com/file/d/1dVf1GD7zmDx9wJhseGEbfPCVYTJbN-uG/view?usp=sharing
使用 iTextSharp(iText for .Net v5.5.13)和 PdfVeryDenseMergeTool
和 PageVerticalAnalyzer
类 从
List<PdfReader> Files = new List<PdfReader>();
Files.Add(new PdfReader(@"Example.pdf"));
PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(new RectangleReadOnly(595, 420), 18, 18, 10);
using (MemoryStream ms = new MemoryStream())
{
tool.Merge(ms, Files);
byte[] bytes = ms.ToArray();
// bytes contains the result
}
我得到一个五页的横向结果 PDF,如下所示:
您可以使用 Docotic.Pdf library 来做到这一点。最简单的解决方案是将每个源页面转换为 XObject,然后将其缩放为横向并在多个目标页面上绘制。
示例如下:
using System.Linq;
using BitMiracle.Docotic.Pdf;
namespace SplitToMultiplePages
{
public static class SplitToMultiplePages
{
public static void Main()
{
// NOTE:
// When used in trial mode, the library imposes some restrictions.
// Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
// for more information.
BitMiracle.Docotic.LicenseManager.AddLicenseData("temporary or permanent license key here");
using (var src = new PdfDocument(@"Example.pdf"))
{
// Calculate common parameters based on the first page.
// That makes sense when all pages have the same size, portrait orientation, and margins.
PdfPage srcPage = src.Pages[0];
PdfCollection<PdfTextData> words = srcPage.GetWords();
double topMargin = words[0].Position.Y;
double bottomMargin = srcPage.Height - words[words.Count - 1].Bounds.Bottom;
double scale = srcPage.Height / srcPage.Width;
const int BorderHeight = 1;
// This sample shows how to convert existing PDF content in portrait orientation to landscape
Debug.Assert(scale > 1);
using (var dest = new PdfDocument())
{
bool addDestPage = false;
double destPageY = topMargin;
for (int s = 0; s < src.PageCount; ++s)
{
if (s > 0)
{
srcPage = src.Pages[s];
words = srcPage.GetWords();
}
// skip empty pages
if (words.Count == 0)
continue;
// Get content of the source page, scale to landscape and draw on multiple pages
double textStartY = words[0].Bounds.Top;
double[] lineBottomPositions = words
.Select(w => (w.Bounds.Bottom - textStartY + BorderHeight) * scale)
.Distinct()
.ToArray();
double contentHeight = lineBottomPositions[lineBottomPositions.Length - 1];
PdfXObject xobj = dest.CreateXObject(srcPage);
double remainingHeight = contentHeight;
while (true)
{
PdfPage destPage = addDestPage ? dest.AddPage() : dest.Pages[dest.PageCount - 1];
destPage.Width = srcPage.Height;
destPage.Height = srcPage.Width;
double availableHeight = destPage.Height - destPageY - bottomMargin;
if (remainingHeight > availableHeight)
availableHeight = adjustToNearestLine(availableHeight, lineBottomPositions);
PdfCanvas destCanvas = destPage.Canvas;
destCanvas.SaveState();
destCanvas.TranslateTransform(0, destPageY);
destCanvas.AppendRectangle(new PdfRectangle(0, 0, destPage.Width, availableHeight), 0);
destCanvas.SetClip(PdfFillMode.Winding);
double y = -topMargin * scale - (contentHeight - remainingHeight);
destCanvas.DrawXObject(xobj, 0, y, xobj.Width * scale, xobj.Height * scale, 0);
destCanvas.RestoreState();
if (remainingHeight <= availableHeight)
{
// Move to next source page
addDestPage = false;
destPageY = remainingHeight + bottomMargin;
break;
}
// Need more pages in the resulting document
remainingHeight -= availableHeight;
addDestPage = true;
destPageY = topMargin;
}
}
// Optionally you can use Single Column layout by default
//dest.PageLayout = PdfPageLayout.OneColumn;
dest.Save("SplitToMultiplePages.pdf");
}
}
}
private static double adjustToNearestLine(double height, double[] lineHeights)
{
// TODO: Use binary search for better performance
for (int i = lineHeights.Length - 1; i >= 0; --i)
{
double lh = lineHeights[i];
if (height > lh)
return lh;
}
return lineHeights[0];
}
}
}
示例产生以下结果:https://drive.google.com/file/d/1ITtV3Uw84wKd9mouV4kBpPoeWtsHlB9A/view?usp=sharing
根据您的要求,您还可以跳过 headers 除第一页以外的所有页面。这是此案例的示例:
using System.Linq;
using BitMiracle.Docotic.Pdf;
namespace SplitToMultiplePages
{
public static class SplitToMultiplePages
{
public static void Main()
{
// NOTE:
// When used in trial mode, the library imposes some restrictions.
// Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
// for more information.
BitMiracle.Docotic.LicenseManager.AddLicenseData("temporary or permanent license key here");
using (var src = new PdfDocument(@"Example.pdf"))
{
// Calculate common parameters based on the first page.
// That makes sense when all pages have the same size, portrait orientation, and margins.
PdfPage srcPage = src.Pages[0];
PdfCollection<PdfTextData> words = srcPage.GetWords();
double topMargin = words[0].Position.Y;
double bottomMargin = srcPage.Height - words[words.Count - 1].Bounds.Bottom;
double scale = srcPage.Height / srcPage.Width;
const int BorderHeight = 1;
// This sample shows how to convert existing PDF content in portrait orientation to landscape
Debug.Assert(scale > 1);
using (var dest = new PdfDocument())
{
bool addDestPage = false;
double destPageY = topMargin;
for (int s = 0; s < src.PageCount; ++s)
{
if (s > 0)
{
srcPage = src.Pages[s];
words = srcPage.GetWords();
}
// skip empty pages
if (words.Count == 0)
continue;
// Get content of the source page, scale to landscape and draw on multiple pages
double textStartY = words[0].Bounds.Top;
// Skip the header line of all pages except first
if (s > 0)
{
double? firstDataRowY = words.Select(w => w.Bounds.Top).FirstOrDefault(y => y > textStartY);
if (!firstDataRowY.HasValue)
continue;
textStartY = firstDataRowY.Value;
}
double[] lineBottomPositions = words
.Select(w => (w.Bounds.Bottom - textStartY + BorderHeight) * scale)
.Distinct()
.ToArray();
double contentHeight = lineBottomPositions[lineBottomPositions.Length - 1];
PdfXObject xobj = dest.CreateXObject(srcPage);
double remainingHeight = contentHeight;
while (true)
{
PdfPage destPage = addDestPage ? dest.AddPage() : dest.Pages[dest.PageCount - 1];
destPage.Width = srcPage.Height;
destPage.Height = srcPage.Width;
double availableHeight = destPage.Height - destPageY - bottomMargin;
if (remainingHeight > availableHeight)
availableHeight = adjustToNearestLine(availableHeight, lineBottomPositions);
PdfCanvas destCanvas = destPage.Canvas;
destCanvas.SaveState();
destCanvas.TranslateTransform(0, destPageY);
destCanvas.AppendRectangle(new PdfRectangle(0, 0, destPage.Width, availableHeight), 0);
destCanvas.SetClip(PdfFillMode.Winding);
double y = -textStartY * scale - (contentHeight - remainingHeight);
destCanvas.DrawXObject(xobj, 0, y, xobj.Width * scale, xobj.Height * scale, 0);
destCanvas.RestoreState();
if (remainingHeight <= availableHeight)
{
// Move to the next source page
addDestPage = false;
destPageY = remainingHeight + bottomMargin;
break;
}
// Need more pages in the resulting document
remainingHeight -= availableHeight;
addDestPage = true;
destPageY = topMargin;
}
}
// Optionally you can use Single Column layout by default
//dest.PageLayout = PdfPageLayout.OneColumn;
dest.Save("SplitToMultiplePages.pdf");
}
}
}
private static double adjustToNearestLine(double height, double[] lineHeights)
{
// TODO: Use binary search for better performance
for (int i = lineHeights.Length - 1; i >= 0; --i)
{
double lh = lineHeights[i];
if (height > lh)
return lh;
}
return lineHeights[0];
}
}
}
“跳过 headers”样本的结果文件:https://drive.google.com/file/d/1v9lPYIposkNNgheUzz8kD3XSwMxGBJIz/view?usp=sharing