使用 javascript(使用 pdf.js)将 pdf 转换为 png 数组
Turn pdf into array of png's using javascript (with pdf.js)
我正在尝试开发一个前端代码,要求用户提供 pdf,然后在内部(在用户浏览器中)生成一个 png 数组(通过数据到 url),其中数组中的每个条目对应pdf中的一页:
dat[0] = 第 1 页的 png
dat[1] = 第 2 页的 png
...
当我测试下面的代码时,页面以某种方式呈现在彼此之上并旋转。
<script src="http://cdnjs.cloudflare.com/ajax/libs/processing.js/1.4.1/processing-api.min.js"></script><html>
<!--
Created using jsbin.com
Source can be edited via http://jsbin.com/pdfjs-helloworld-v2/8598/edit
-->
<body>
<canvas id="the-canvas" style="border:1px solid black"></canvas>
<input id='pdf' type='file'/>
<!-- Use latest PDF.js build from Github -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script>
<script src="pdf.js"></script>
<script src="pdf.worker.js"></script>
<script type="text/javascript">
//
// Asynchronous download PDF as an ArrayBuffer
//
dat = [];
var pdf = document.getElementById('pdf');
pdf.onchange = function(ev) {
if (file = document.getElementById('pdf').files[0]) {
fileReader = new FileReader();
fileReader.onload = function(ev) {
//console.log(ev);
PDFJS.getDocument(fileReader.result).then(function getPdfHelloWorld(pdf) {
//
// Fetch the first page
//
number_of_pages = pdf.numPages;
for(i = 1; i < number_of_pages+1; ++i) {
pdf.getPage(i).then(function getPageHelloWorld(page) {
var scale = 1;
var viewport = page.getViewport(scale);
//
// Prepare canvas using PDF page dimensions
//
var canvas = document.getElementById('the-canvas');
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
//
// Render PDF page into canvas context
//
var renderContext = {
canvasContext: context,
viewport: viewport};
page.render(renderContext).then(function() {
dat.push(canvas.toDataURL('image/png'));
});
});
}
//console.log(pdf.numPages);
//console.log(pdf)
}, function(error){
console.log(error);
});
};
fileReader.readAsArrayBuffer(file);
}
}
</script>
<style id="jsbin-css">
</style>
<script>
</script>
</body>
</html>
我只对数组数据感兴趣。当我渲染数组中的图像时,我看到了
dat[0] = 第 1 页的 png(正确)
dat[1] = 第 1 页的 png 和第 2 页的 png 相互旋转 180°
...
如何确保正确呈现数组的每个条目中的单个页面?
尝试在不同的 canvas 上呈现页面。您可以创建一个 canvas
并使用
将其附加到容器
var canvasdiv = document.getElementById('canvas');
var canvas = document.createElement('canvas');
canvasdiv.appendChild(canvas);
var url = 'https://raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf';
var PDFJS = window['pdfjs-dist/build/pdf'];
PDFJS.GlobalWorkerOptions.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';
var loadingTask = PDFJS.getDocument(url);
loadingTask.promise.then(function(pdf) {
var canvasdiv = document.getElementById('canvas');
var totalPages = pdf.numPages
var data = [];
for (let pageNumber = 1; pageNumber <= totalPages; pageNumber++) {
pdf.getPage(pageNumber).then(function(page) {
var scale = 1.5;
var viewport = page.getViewport({ scale: scale });
var canvas = document.createElement('canvas');
canvasdiv.appendChild(canvas);
// Prepare canvas using PDF page dimensions
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
// Render PDF page into canvas context
var renderContext = { canvasContext: context, viewport: viewport };
var renderTask = page.render(renderContext);
renderTask.promise.then(function() {
data.push(canvas.toDataURL('image/png'))
console.log(data.length + ' page(s) loaded in data')
});
});
}
}, function(reason) {
// PDF loading error
console.error(reason);
});
canvas {
border: 1px solid black;
margin: 5px;
width: 25%;
}
<script src="//mozilla.github.io/pdf.js/build/pdf.js"></script>
<div id="canvas"></div>
对于那些从 google 来到这里寻求 Angular 解决方案的人来说,这里是一个实现,在不同的 canvas.
上呈现每个页面
pdf-viewer.component.html
<div *ngFor="let page of pages>
<canvas #canvas hidden ></canvas>
<img [src]="page">
</div>
pdf-viewer.component.ts
import * as pdfjsLib from 'pdfjs-dist';
pdfjsLib.GlobalWorkerOptions.workerSrc = 'pdf.worker.js';
@Component({
selector: 'app-pdf-viewer',
templateUrl: './pdf-viewer.component.html',
styleUrls: ['./pdf-viewer.component.scss'],
})
export class PdfViewerComponent implements OnInit {
constructor() { }
@ViewChildren('canvas') canvas: QueryList<ElementRef<HTMLCanvasElement>>;
@Input() pdfBase64: string;
pages: string[] = [];
ngOnInit(): void {
this.setPages();
}
async setPages(): Promise<void> {
const pdfDoc = await pdfjsLib.getDocument({ url: this.pdfBase64 }).promise;
const totalPages = pdfDoc.numPages;
this.pages = new Array(totalPages);
for (let i = 0; i < totalPages; i++) {
pdfDoc.getPage(i + 1).then((page) => {
const canvas = this.canvas.toArray()[page.pageIndex].nativeElement;
this.renderPdfPageToCanvas(page, canvas).then(() => {
this.pages[page.pageIndex] = canvas.toDataURL('image/png');
});
});
}
}
renderPdfPageToCanvas(page: pdfjsLib.PDFPageProxy, canvas: HTMLCanvasElement): pdfjsLib.PDFPromise<pdfjsLib.PDFPageProxy> {
const viewport = page.getViewport({ scale: 1.0 });
const height = viewport.height;
const width = viewport.width;
canvas.height = height;
canvas.width = width;
const renderContext = {
canvasContext: canvas.getContext('2d'),
viewport: viewport
};
return page.render(renderContext).promise;
}
}
package.json
{
...
"dependencies": {
...
"@angular/core": "^9.1.11",
"pdfjs-dist": "2.3.200"
},
"devDependencies": {
...
"@types/pdfjs-dist": "2.1.3"
}
}
我正在尝试开发一个前端代码,要求用户提供 pdf,然后在内部(在用户浏览器中)生成一个 png 数组(通过数据到 url),其中数组中的每个条目对应pdf中的一页:
dat[0] = 第 1 页的 png
dat[1] = 第 2 页的 png
...
当我测试下面的代码时,页面以某种方式呈现在彼此之上并旋转。
<script src="http://cdnjs.cloudflare.com/ajax/libs/processing.js/1.4.1/processing-api.min.js"></script><html>
<!--
Created using jsbin.com
Source can be edited via http://jsbin.com/pdfjs-helloworld-v2/8598/edit
-->
<body>
<canvas id="the-canvas" style="border:1px solid black"></canvas>
<input id='pdf' type='file'/>
<!-- Use latest PDF.js build from Github -->
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script>
<script src="pdf.js"></script>
<script src="pdf.worker.js"></script>
<script type="text/javascript">
//
// Asynchronous download PDF as an ArrayBuffer
//
dat = [];
var pdf = document.getElementById('pdf');
pdf.onchange = function(ev) {
if (file = document.getElementById('pdf').files[0]) {
fileReader = new FileReader();
fileReader.onload = function(ev) {
//console.log(ev);
PDFJS.getDocument(fileReader.result).then(function getPdfHelloWorld(pdf) {
//
// Fetch the first page
//
number_of_pages = pdf.numPages;
for(i = 1; i < number_of_pages+1; ++i) {
pdf.getPage(i).then(function getPageHelloWorld(page) {
var scale = 1;
var viewport = page.getViewport(scale);
//
// Prepare canvas using PDF page dimensions
//
var canvas = document.getElementById('the-canvas');
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
//
// Render PDF page into canvas context
//
var renderContext = {
canvasContext: context,
viewport: viewport};
page.render(renderContext).then(function() {
dat.push(canvas.toDataURL('image/png'));
});
});
}
//console.log(pdf.numPages);
//console.log(pdf)
}, function(error){
console.log(error);
});
};
fileReader.readAsArrayBuffer(file);
}
}
</script>
<style id="jsbin-css">
</style>
<script>
</script>
</body>
</html>
我只对数组数据感兴趣。当我渲染数组中的图像时,我看到了
dat[0] = 第 1 页的 png(正确)
dat[1] = 第 1 页的 png 和第 2 页的 png 相互旋转 180°
...
如何确保正确呈现数组的每个条目中的单个页面?
尝试在不同的 canvas 上呈现页面。您可以创建一个 canvas
并使用
var canvasdiv = document.getElementById('canvas');
var canvas = document.createElement('canvas');
canvasdiv.appendChild(canvas);
var url = 'https://raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf';
var PDFJS = window['pdfjs-dist/build/pdf'];
PDFJS.GlobalWorkerOptions.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';
var loadingTask = PDFJS.getDocument(url);
loadingTask.promise.then(function(pdf) {
var canvasdiv = document.getElementById('canvas');
var totalPages = pdf.numPages
var data = [];
for (let pageNumber = 1; pageNumber <= totalPages; pageNumber++) {
pdf.getPage(pageNumber).then(function(page) {
var scale = 1.5;
var viewport = page.getViewport({ scale: scale });
var canvas = document.createElement('canvas');
canvasdiv.appendChild(canvas);
// Prepare canvas using PDF page dimensions
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
// Render PDF page into canvas context
var renderContext = { canvasContext: context, viewport: viewport };
var renderTask = page.render(renderContext);
renderTask.promise.then(function() {
data.push(canvas.toDataURL('image/png'))
console.log(data.length + ' page(s) loaded in data')
});
});
}
}, function(reason) {
// PDF loading error
console.error(reason);
});
canvas {
border: 1px solid black;
margin: 5px;
width: 25%;
}
<script src="//mozilla.github.io/pdf.js/build/pdf.js"></script>
<div id="canvas"></div>
对于那些从 google 来到这里寻求 Angular 解决方案的人来说,这里是一个实现,在不同的 canvas.
上呈现每个页面pdf-viewer.component.html
<div *ngFor="let page of pages>
<canvas #canvas hidden ></canvas>
<img [src]="page">
</div>
pdf-viewer.component.ts
import * as pdfjsLib from 'pdfjs-dist';
pdfjsLib.GlobalWorkerOptions.workerSrc = 'pdf.worker.js';
@Component({
selector: 'app-pdf-viewer',
templateUrl: './pdf-viewer.component.html',
styleUrls: ['./pdf-viewer.component.scss'],
})
export class PdfViewerComponent implements OnInit {
constructor() { }
@ViewChildren('canvas') canvas: QueryList<ElementRef<HTMLCanvasElement>>;
@Input() pdfBase64: string;
pages: string[] = [];
ngOnInit(): void {
this.setPages();
}
async setPages(): Promise<void> {
const pdfDoc = await pdfjsLib.getDocument({ url: this.pdfBase64 }).promise;
const totalPages = pdfDoc.numPages;
this.pages = new Array(totalPages);
for (let i = 0; i < totalPages; i++) {
pdfDoc.getPage(i + 1).then((page) => {
const canvas = this.canvas.toArray()[page.pageIndex].nativeElement;
this.renderPdfPageToCanvas(page, canvas).then(() => {
this.pages[page.pageIndex] = canvas.toDataURL('image/png');
});
});
}
}
renderPdfPageToCanvas(page: pdfjsLib.PDFPageProxy, canvas: HTMLCanvasElement): pdfjsLib.PDFPromise<pdfjsLib.PDFPageProxy> {
const viewport = page.getViewport({ scale: 1.0 });
const height = viewport.height;
const width = viewport.width;
canvas.height = height;
canvas.width = width;
const renderContext = {
canvasContext: canvas.getContext('2d'),
viewport: viewport
};
return page.render(renderContext).promise;
}
}
package.json
{
...
"dependencies": {
...
"@angular/core": "^9.1.11",
"pdfjs-dist": "2.3.200"
},
"devDependencies": {
...
"@types/pdfjs-dist": "2.1.3"
}
}