有没有办法添加脚本以在 chrome+puppeeter 的 evaluate() 上下文中添加新函数?
Is there a way to add script to add new functions in evaluate() context of chrome+puppeeter?
基于这个 ,有没有办法(就像 casperjs/phantomjs)在 page.evaluate()
上下文中添加我们的自定义函数?
例如,包含一个带有辅助函数 x
的文件以调用 Xpath 函数:x('//a/@href')
您可以在单独的 page.evaluate()
调用中在浏览器上下文中将辅助函数注册到 运行。 page.exposeFunction()
looks tempting, but it doesn't have access to browser context(您需要 document
对象)。
下面是在浏览器上下文中注册辅助函数的示例,如 $x()
:
const puppeteer = require('puppeteer');
const addHelperFunctions = () => {
window.$x = xPath => document
.evaluate(
xPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
)
.singleNodeValue;
};
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://en.wikipedia.org', { waitUntil: 'networkidle2' });
await page.evaluate(addHelperFunctions);
const text = await page.evaluate(() => {
// $x() is now available
const featureArticle = $x('//*[@id="mp-tfa"]');
return featureArticle.textContent;
});
console.log(text);
await browser.close();
})();
您还可以将帮助程序保存在单独的文件中,然后使用 page.addScriptTag()
.
将它们注入到浏览器上下文中
这是一个例子:
helperFunctions.js
window.$x = xPath => document
.evaluate(
xPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
)
.singleNodeValue;
并使用它:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://en.wikipedia.org', { waitUntil: 'networkidle2' });
await page.addScriptTag({ path: './helperFunctions.js' });
const text = await page.evaluate(() => {
// $x() is now available
const featureArticle = $x('//*[@id="mp-tfa"]');
return featureArticle.textContent;
});
console.log(text);
await browser.close();
})();
基于casperjsgetElementByXPath()
和getElementsByXPath()
的另一种解决方案。优点是我们可以针对特定节点(第二个参数)使用 xpath 表达式。
window.$x = xPath => document
.evaluate(
xPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
)
.singleNodeValue;
window.getElementByXPath = function getElementByXPath(expression, scope) {
scope = scope || document;
var a = document.evaluate(expression, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
if (a.snapshotLength > 0) {
return a.snapshotItem(0);
}
};
window.getElementsByXPath = function getElementsByXPath(expression, scope) {
scope = scope || document;
var nodes = [];
var a = document.evaluate(expression, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (var i = 0; i < a.snapshotLength; i++) {
nodes.push(a.snapshotItem(i));
}
return nodes;
};
现实生活中的代码示例:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://99bitcoins.com/bitcoin-rich-list-top100/#addresses', { waitUntil: 'networkidle2' });
await page.addScriptTag({ path: './helperFunctions.js' });
const result = await page.evaluate(() => {
var obj = {};
var data = getElementsByXPath('//table[@class="t99btc-rich-list"]//tr');
for (var i = 1; i<=100; i++) {
obj[i] = {
"hash": getElementByXPath('./td/a', data[i]).innerText,
"balance": getElementByXPath('./td[3]', data[i]).innerText
}
}
return obj;
});
console.log(JSON.stringify(result, null, 4));
await browser.close();
})();
基于这个 page.evaluate()
上下文中添加我们的自定义函数?
例如,包含一个带有辅助函数 x
的文件以调用 Xpath 函数:x('//a/@href')
您可以在单独的 page.evaluate()
调用中在浏览器上下文中将辅助函数注册到 运行。 page.exposeFunction()
looks tempting, but it doesn't have access to browser context(您需要 document
对象)。
下面是在浏览器上下文中注册辅助函数的示例,如 $x()
:
const puppeteer = require('puppeteer');
const addHelperFunctions = () => {
window.$x = xPath => document
.evaluate(
xPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
)
.singleNodeValue;
};
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://en.wikipedia.org', { waitUntil: 'networkidle2' });
await page.evaluate(addHelperFunctions);
const text = await page.evaluate(() => {
// $x() is now available
const featureArticle = $x('//*[@id="mp-tfa"]');
return featureArticle.textContent;
});
console.log(text);
await browser.close();
})();
您还可以将帮助程序保存在单独的文件中,然后使用 page.addScriptTag()
.
这是一个例子:
helperFunctions.js
window.$x = xPath => document
.evaluate(
xPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
)
.singleNodeValue;
并使用它:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://en.wikipedia.org', { waitUntil: 'networkidle2' });
await page.addScriptTag({ path: './helperFunctions.js' });
const text = await page.evaluate(() => {
// $x() is now available
const featureArticle = $x('//*[@id="mp-tfa"]');
return featureArticle.textContent;
});
console.log(text);
await browser.close();
})();
基于casperjsgetElementByXPath()
和getElementsByXPath()
的另一种解决方案。优点是我们可以针对特定节点(第二个参数)使用 xpath 表达式。
window.$x = xPath => document
.evaluate(
xPath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
)
.singleNodeValue;
window.getElementByXPath = function getElementByXPath(expression, scope) {
scope = scope || document;
var a = document.evaluate(expression, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
if (a.snapshotLength > 0) {
return a.snapshotItem(0);
}
};
window.getElementsByXPath = function getElementsByXPath(expression, scope) {
scope = scope || document;
var nodes = [];
var a = document.evaluate(expression, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (var i = 0; i < a.snapshotLength; i++) {
nodes.push(a.snapshotItem(i));
}
return nodes;
};
现实生活中的代码示例:
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://99bitcoins.com/bitcoin-rich-list-top100/#addresses', { waitUntil: 'networkidle2' });
await page.addScriptTag({ path: './helperFunctions.js' });
const result = await page.evaluate(() => {
var obj = {};
var data = getElementsByXPath('//table[@class="t99btc-rich-list"]//tr');
for (var i = 1; i<=100; i++) {
obj[i] = {
"hash": getElementByXPath('./td/a', data[i]).innerText,
"balance": getElementByXPath('./td[3]', data[i]).innerText
}
}
return obj;
});
console.log(JSON.stringify(result, null, 4));
await browser.close();
})();