PhantomJS 将所有 onNavigationRequested 回调推送到数组
PhantomJS push all onNavigationRequested callbacks to array
我有一个幻影 js 脚本,它检查每个重定向并通过 page.onNavigationRequested
回调方法在控制台中显示它。
但是当我想捕获从 page.onNavigationRequested
回调方法返回的所有 URL 并将它们推送到一个数组并最终在最后显示所有 URL 时在脚本中,它只显示第一个重定向 URL.
能否请您检查脚本和建议。
var page = require('webpage').create();
var sys = require('system');
var fs = require('fs');
var response = {};
var arrayOfResponses = [];
var pageUrl = 'http://example.com/r1.php';
phantom.onError = function (msg, trace) {
phantom.exit(1);
};
function forceExit(){
phantom.exit(0);
}
page.onNavigationRequested = function(url, type, willNavigate, main) {
arrayOfResponses.push(url) ;
}
response.content = arrayOfResponses;
page.open(pageUrl, function(status) {
if ( status !== 'success' ) {
phantom.exit( 1 );
} else {
phantom.exit( 0 );
}
}, 100);
setTimeout(forceExit,2000);
console.log(JSON.stringify(response));
提前谢谢你。
您的脚本有两个问题:
您让 PhantomJS 在第一个 url 打开后过早退出。它没有时间跟随重定向。
你从上到下写脚本就好像程序流程是linear/synchronous,而在javascript中却不是——onNavigationRequested
可以被多次调用.
考虑到这一点,让我们重写脚本以收集所有重定向并在 2 秒内没有进行新重定向时退出。
var page = require('webpage').create();
var response = {};
var arrayOfResponses = [];
var pageUrl = 'http://admin.weeqo.com/redirect/r1.php';
var exitTimeout;
// This will be called if no redirects are requested in 2 seconds
function forceExit(){
// Just for fun we'll note the final URL
var curURL = page.evaluate(function(){
return document.location.href
});
console.log("Final URL is " + curURL);
// Prepare and output the report:
response.content = arrayOfResponses;
console.log("List of all requested URLs: " + JSON.stringify(response));
// Now we can exit safely
phantom.exit(0);
}
// This is called before each redirect
page.onNavigationRequested = function(url, type, willNavigate, main) {
// Clear timeout so that script is not shut down
// because we have a new redirect
if(exitTimeout) {
clearTimeout(exitTimeout);
}
arrayOfResponses.push(url);
console.log("Navigation requested: " + url);
// Create timeout that will shut down the script
// in two seconds unless cancelled
exitTimeout = setTimeout(forceExit, 2000);
}
// open the first page
page.open(pageUrl, function(status) {
// We only care for errors because
// who knows how many time will pass before
// we hit the last redirect
if ( status !== 'success' ) {
phantom.exit( 1 );
}
});
我有一个幻影 js 脚本,它检查每个重定向并通过 page.onNavigationRequested
回调方法在控制台中显示它。
但是当我想捕获从 page.onNavigationRequested
回调方法返回的所有 URL 并将它们推送到一个数组并最终在最后显示所有 URL 时在脚本中,它只显示第一个重定向 URL.
能否请您检查脚本和建议。
var page = require('webpage').create();
var sys = require('system');
var fs = require('fs');
var response = {};
var arrayOfResponses = [];
var pageUrl = 'http://example.com/r1.php';
phantom.onError = function (msg, trace) {
phantom.exit(1);
};
function forceExit(){
phantom.exit(0);
}
page.onNavigationRequested = function(url, type, willNavigate, main) {
arrayOfResponses.push(url) ;
}
response.content = arrayOfResponses;
page.open(pageUrl, function(status) {
if ( status !== 'success' ) {
phantom.exit( 1 );
} else {
phantom.exit( 0 );
}
}, 100);
setTimeout(forceExit,2000);
console.log(JSON.stringify(response));
提前谢谢你。
您的脚本有两个问题:
您让 PhantomJS 在第一个 url 打开后过早退出。它没有时间跟随重定向。
你从上到下写脚本就好像程序流程是linear/synchronous,而在javascript中却不是——
onNavigationRequested
可以被多次调用.
考虑到这一点,让我们重写脚本以收集所有重定向并在 2 秒内没有进行新重定向时退出。
var page = require('webpage').create();
var response = {};
var arrayOfResponses = [];
var pageUrl = 'http://admin.weeqo.com/redirect/r1.php';
var exitTimeout;
// This will be called if no redirects are requested in 2 seconds
function forceExit(){
// Just for fun we'll note the final URL
var curURL = page.evaluate(function(){
return document.location.href
});
console.log("Final URL is " + curURL);
// Prepare and output the report:
response.content = arrayOfResponses;
console.log("List of all requested URLs: " + JSON.stringify(response));
// Now we can exit safely
phantom.exit(0);
}
// This is called before each redirect
page.onNavigationRequested = function(url, type, willNavigate, main) {
// Clear timeout so that script is not shut down
// because we have a new redirect
if(exitTimeout) {
clearTimeout(exitTimeout);
}
arrayOfResponses.push(url);
console.log("Navigation requested: " + url);
// Create timeout that will shut down the script
// in two seconds unless cancelled
exitTimeout = setTimeout(forceExit, 2000);
}
// open the first page
page.open(pageUrl, function(status) {
// We only care for errors because
// who knows how many time will pass before
// we hit the last redirect
if ( status !== 'success' ) {
phantom.exit( 1 );
}
});