pdf.js 带有文本选择
pdf.js with text selection
如何使 PDF 中的文本可选择?
这里试过了。 PDF写的不错,就是没有选文
https://github.com/mozilla/pdf.js
https://github.com/mozilla/pdf.js/blob/master/web/text_layer_builder.css
https://github.com/mozilla/pdf.js/blob/master/web/text_layer_builder.js
'use strict';
PDFJS.getDocument('file.pdf').then(function(pdf){
var page_num = 1;
pdf.getPage(page_num).then(function(page){
var scale = 1.5;
var viewport = page.getViewport(scale);
var canvas = document.getElementById('the-canvas');
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
var canvasOffset = $(canvas).offset();
var $textLayerDiv = $('#text-layer').css({
height : viewport.height+'px',
width : viewport.width+'px',
top : canvasOffset.top,
left : canvasOffset.left
});
page.render({
canvasContext : context,
viewport : viewport
});
page.getTextContent().then(function(textContent){
var textLayer = new TextLayerBuilder({
textLayerDiv : $textLayerDiv.get(0),
pageIndex : page_num - 1,
viewport : viewport
});
textLayer.setTextContent(textContent);
textLayer.render();
});
});
});
<body>
<div>
<canvas id="the-canvas" style="border:1px solid black;"></canvas>
<div id="text-layer" class="textLayer"></div>
</div>
</body>
您好,您在 HTML 内容中创建了 canvas。
Canvas 将不支持文本选择,因此您需要将 canvas 更改为其他方式。
您的 javascript 代码非常完美。
您只需要包含 Text Layer Builder 所依赖的 UI 实用程序:
https://github.com/mozilla/pdf.js/blob/master/web/ui_utils.js
或 HTML:
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/ui_utils.js"></script>
如果您 运行 您的代码(没有 ui_utils)并检查 debug console,
你会看到 ReferenceError: CustomStyle is not defined
。
quick search in PDFjs's repo 将显示它在 ui_utils.js.
中定义
这是我的最小但完整的代码供您参考。
我正在使用 PDFjs 的演示 pdf here。
请注意,在生产中你不应该 link 到 raw.github.
<!DOCTYPE html><meta charset="utf-8">
<link rel="stylesheet" href="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/text_layer_builder.css" />
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js"></script>
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/ui_utils.js"></script>
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/text_layer_builder.js"></script>
<script src="https://mozilla.github.io/pdf.js/build/pdf.js"></script>
<body>
<div>
<canvas id="the-canvas" style="border:1px solid black;"></canvas>
<div id="text-layer" class="textLayer"></div>
</div>
<script>
'use strict';
PDFJS.getDocument('file.pdf').then(function(pdf){
var page_num = 1;
pdf.getPage(page_num).then(function(page){
var scale = 1.5;
var viewport = page.getViewport(scale);
var canvas = $('#the-canvas')[0];
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
var canvasOffset = $(canvas).offset();
var $textLayerDiv = $('#text-layer').css({
height : viewport.height+'px',
width : viewport.width+'px',
top : canvasOffset.top,
left : canvasOffset.left
});
page.render({
canvasContext : context,
viewport : viewport
});
page.getTextContent().then(function(textContent){
console.log( textContent );
var textLayer = new TextLayerBuilder({
textLayerDiv : $textLayerDiv.get(0),
pageIndex : page_num - 1,
viewport : viewport
});
textLayer.setTextContent(textContent);
textLayer.render();
});
});
});
</script>
经过几个小时的努力,我发现这篇文章对于选择文本和使用 pdf.js 没有节点非常有帮助。
Custom PDF Rendering in JavaScript with Mozilla’s PDF.Js
在 pdf.js 版本 2.8.61 上,已检查的答案不再有效,因为 renderTextLayer() 已集成到 pdf.js,不再需要外部源,jQuery .
以下代码将使 PDF 文本可选择。以加载以下PDF文件为例,请自行替换:
https://raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf
它主要使用两个html元素:
<canvas id="the-canvas"></canvas>
<div class="textLayer"></div>
canvas 用于显示非可选文档,.textLayer div 用于可选文本。 .textLayer div上的文字全透明,不可见,只提供选区效果。
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,user-scalable=no">
<script src="//mozilla.github.io/pdf.js/build/pdf.js" crossorigin="anonymous"></script>
<link href="//mozilla.github.io/pdf.js/web/viewer.css" rel="stylesheet" type="text/css" />
<style type="text/css">
#the-canvas {
border: 1px solid black;
direction: ltr;
}
</style>
</head>
<body>
<h1>PDF.js Previous/Next example</h1>
<div>
<button id="prev">Previous</button>
<button id="next">Next</button>
<span>Page: <span id="page_num"></span> / <span id="page_count"></span></span>
</div>
<canvas id="the-canvas"></canvas>
<div class="textLayer"></div>
<script>
// If absolute URL from the remote server is provided, configure the CORS
// header on that server.
var url = '//raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf';
// Loaded via <script> tag, create shortcut to access PDF.js exports.
var pdfjsLib = window['pdfjs-dist/build/pdf'];
// The workerSrc property shall be specified.
pdfjsLib.GlobalWorkerOptions.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';
var pdfDoc = null,
pageNum = 1,
pageRendering = false,
pageNumPending = null,
//scale = 0.8,
scale = 1,
canvas = document.getElementById('the-canvas'),
ctx = canvas.getContext('2d');
/**
* Get page info from document, resize canvas accordingly, and render page.
* @param num Page number.
*/
function renderPage(num) {
pageRendering = true;
// Using promise to fetch the page
pdfDoc.getPage(num).then(function(page) {
var viewport = page.getViewport({scale: scale});
canvas.height = viewport.height;
canvas.width = viewport.width;
// Render PDF page into canvas context
var renderContext = {
canvasContext: ctx,
viewport: viewport
};
var renderTask = page.render(renderContext);
// Wait for rendering to finish
renderTask.promise.then(function() {
pageRendering = false;
if (pageNumPending !== null) {
// New page rendering is pending
renderPage(pageNumPending);
pageNumPending = null;
}
}).then(function() {
// Returns a promise, on resolving it will return text contents of the page
return page.getTextContent();
}).then(function(textContent) {
// Assign CSS to the textLayer element
var textLayer = document.querySelector(".textLayer");
textLayer.style.left = canvas.offsetLeft + 'px';
textLayer.style.top = canvas.offsetTop + 'px';
textLayer.style.height = canvas.offsetHeight + 'px';
textLayer.style.width = canvas.offsetWidth + 'px';
// Pass the data to the method for rendering of text over the pdf canvas.
pdfjsLib.renderTextLayer({
textContent: textContent,
container: textLayer,
viewport: viewport,
textDivs: []
});
});
});
// Update page counters
document.getElementById('page_num').textContent = num;
}
/**
* If another page rendering in progress, waits until the rendering is
* finised. Otherwise, executes rendering immediately.
*/
function queueRenderPage(num) {
if (pageRendering) {
pageNumPending = num;
} else {
renderPage(num);
}
}
/**
* Displays previous page.
*/
function onPrevPage() {
if (pageNum <= 1) {
return;
}
pageNum--;
queueRenderPage(pageNum);
}
document.getElementById('prev').addEventListener('click', onPrevPage);
/**
* Displays next page.
*/
function onNextPage() {
if (pageNum >= pdfDoc.numPages) {
return;
}
pageNum++;
queueRenderPage(pageNum);
}
document.getElementById('next').addEventListener('click', onNextPage);
/**
* Asynchronously downloads PDF.
*/
pdfjsLib.getDocument(url).promise.then(function(pdfDoc_) {
pdfDoc = pdfDoc_;
document.getElementById('page_count').textContent = pdfDoc.numPages;
// Initial/first page rendering
renderPage(pageNum);
});
</script>
</body>
</html>
如何使 PDF 中的文本可选择?
这里试过了。 PDF写的不错,就是没有选文
https://github.com/mozilla/pdf.js
https://github.com/mozilla/pdf.js/blob/master/web/text_layer_builder.css
https://github.com/mozilla/pdf.js/blob/master/web/text_layer_builder.js
'use strict';
PDFJS.getDocument('file.pdf').then(function(pdf){
var page_num = 1;
pdf.getPage(page_num).then(function(page){
var scale = 1.5;
var viewport = page.getViewport(scale);
var canvas = document.getElementById('the-canvas');
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
var canvasOffset = $(canvas).offset();
var $textLayerDiv = $('#text-layer').css({
height : viewport.height+'px',
width : viewport.width+'px',
top : canvasOffset.top,
left : canvasOffset.left
});
page.render({
canvasContext : context,
viewport : viewport
});
page.getTextContent().then(function(textContent){
var textLayer = new TextLayerBuilder({
textLayerDiv : $textLayerDiv.get(0),
pageIndex : page_num - 1,
viewport : viewport
});
textLayer.setTextContent(textContent);
textLayer.render();
});
});
});
<body>
<div>
<canvas id="the-canvas" style="border:1px solid black;"></canvas>
<div id="text-layer" class="textLayer"></div>
</div>
</body>
您好,您在 HTML 内容中创建了 canvas。
Canvas 将不支持文本选择,因此您需要将 canvas 更改为其他方式。
您的 javascript 代码非常完美。 您只需要包含 Text Layer Builder 所依赖的 UI 实用程序:
https://github.com/mozilla/pdf.js/blob/master/web/ui_utils.js
或 HTML:
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/ui_utils.js"></script>
如果您 运行 您的代码(没有 ui_utils)并检查 debug console,
你会看到 ReferenceError: CustomStyle is not defined
。
quick search in PDFjs's repo 将显示它在 ui_utils.js.
这是我的最小但完整的代码供您参考。 我正在使用 PDFjs 的演示 pdf here。 请注意,在生产中你不应该 link 到 raw.github.
<!DOCTYPE html><meta charset="utf-8">
<link rel="stylesheet" href="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/text_layer_builder.css" />
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js"></script>
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/ui_utils.js"></script>
<script src="https://raw.githubusercontent.com/mozilla/pdf.js/master/web/text_layer_builder.js"></script>
<script src="https://mozilla.github.io/pdf.js/build/pdf.js"></script>
<body>
<div>
<canvas id="the-canvas" style="border:1px solid black;"></canvas>
<div id="text-layer" class="textLayer"></div>
</div>
<script>
'use strict';
PDFJS.getDocument('file.pdf').then(function(pdf){
var page_num = 1;
pdf.getPage(page_num).then(function(page){
var scale = 1.5;
var viewport = page.getViewport(scale);
var canvas = $('#the-canvas')[0];
var context = canvas.getContext('2d');
canvas.height = viewport.height;
canvas.width = viewport.width;
var canvasOffset = $(canvas).offset();
var $textLayerDiv = $('#text-layer').css({
height : viewport.height+'px',
width : viewport.width+'px',
top : canvasOffset.top,
left : canvasOffset.left
});
page.render({
canvasContext : context,
viewport : viewport
});
page.getTextContent().then(function(textContent){
console.log( textContent );
var textLayer = new TextLayerBuilder({
textLayerDiv : $textLayerDiv.get(0),
pageIndex : page_num - 1,
viewport : viewport
});
textLayer.setTextContent(textContent);
textLayer.render();
});
});
});
</script>
经过几个小时的努力,我发现这篇文章对于选择文本和使用 pdf.js 没有节点非常有帮助。 Custom PDF Rendering in JavaScript with Mozilla’s PDF.Js
在 pdf.js 版本 2.8.61 上,已检查的答案不再有效,因为 renderTextLayer() 已集成到 pdf.js,不再需要外部源,jQuery .
以下代码将使 PDF 文本可选择。以加载以下PDF文件为例,请自行替换:
https://raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf
它主要使用两个html元素:
<canvas id="the-canvas"></canvas>
<div class="textLayer"></div>
canvas 用于显示非可选文档,.textLayer div 用于可选文本。 .textLayer div上的文字全透明,不可见,只提供选区效果。
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,user-scalable=no">
<script src="//mozilla.github.io/pdf.js/build/pdf.js" crossorigin="anonymous"></script>
<link href="//mozilla.github.io/pdf.js/web/viewer.css" rel="stylesheet" type="text/css" />
<style type="text/css">
#the-canvas {
border: 1px solid black;
direction: ltr;
}
</style>
</head>
<body>
<h1>PDF.js Previous/Next example</h1>
<div>
<button id="prev">Previous</button>
<button id="next">Next</button>
<span>Page: <span id="page_num"></span> / <span id="page_count"></span></span>
</div>
<canvas id="the-canvas"></canvas>
<div class="textLayer"></div>
<script>
// If absolute URL from the remote server is provided, configure the CORS
// header on that server.
var url = '//raw.githubusercontent.com/mozilla/pdf.js/ba2edeae/web/compressed.tracemonkey-pldi-09.pdf';
// Loaded via <script> tag, create shortcut to access PDF.js exports.
var pdfjsLib = window['pdfjs-dist/build/pdf'];
// The workerSrc property shall be specified.
pdfjsLib.GlobalWorkerOptions.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js';
var pdfDoc = null,
pageNum = 1,
pageRendering = false,
pageNumPending = null,
//scale = 0.8,
scale = 1,
canvas = document.getElementById('the-canvas'),
ctx = canvas.getContext('2d');
/**
* Get page info from document, resize canvas accordingly, and render page.
* @param num Page number.
*/
function renderPage(num) {
pageRendering = true;
// Using promise to fetch the page
pdfDoc.getPage(num).then(function(page) {
var viewport = page.getViewport({scale: scale});
canvas.height = viewport.height;
canvas.width = viewport.width;
// Render PDF page into canvas context
var renderContext = {
canvasContext: ctx,
viewport: viewport
};
var renderTask = page.render(renderContext);
// Wait for rendering to finish
renderTask.promise.then(function() {
pageRendering = false;
if (pageNumPending !== null) {
// New page rendering is pending
renderPage(pageNumPending);
pageNumPending = null;
}
}).then(function() {
// Returns a promise, on resolving it will return text contents of the page
return page.getTextContent();
}).then(function(textContent) {
// Assign CSS to the textLayer element
var textLayer = document.querySelector(".textLayer");
textLayer.style.left = canvas.offsetLeft + 'px';
textLayer.style.top = canvas.offsetTop + 'px';
textLayer.style.height = canvas.offsetHeight + 'px';
textLayer.style.width = canvas.offsetWidth + 'px';
// Pass the data to the method for rendering of text over the pdf canvas.
pdfjsLib.renderTextLayer({
textContent: textContent,
container: textLayer,
viewport: viewport,
textDivs: []
});
});
});
// Update page counters
document.getElementById('page_num').textContent = num;
}
/**
* If another page rendering in progress, waits until the rendering is
* finised. Otherwise, executes rendering immediately.
*/
function queueRenderPage(num) {
if (pageRendering) {
pageNumPending = num;
} else {
renderPage(num);
}
}
/**
* Displays previous page.
*/
function onPrevPage() {
if (pageNum <= 1) {
return;
}
pageNum--;
queueRenderPage(pageNum);
}
document.getElementById('prev').addEventListener('click', onPrevPage);
/**
* Displays next page.
*/
function onNextPage() {
if (pageNum >= pdfDoc.numPages) {
return;
}
pageNum++;
queueRenderPage(pageNum);
}
document.getElementById('next').addEventListener('click', onNextPage);
/**
* Asynchronously downloads PDF.
*/
pdfjsLib.getDocument(url).promise.then(function(pdfDoc_) {
pdfDoc = pdfDoc_;
document.getElementById('page_count').textContent = pdfDoc.numPages;
// Initial/first page rendering
renderPage(pageNum);
});
</script>
</body>
</html>