使用 phantom.js 生成多个 HAR 文件

Using phantom.js to generate multiple HAR files

我正在使用 netsniff.js 中的代码生成一个 har file,我想改进它以从数组中给定的多个 link 生成一个 har 文件(命名为links 在我下面的代码中)。

这里还有一个问题 Using Multiple page.open in Single Script 可能对我有帮助,但我不知道如何在我的代码中实现给定的解决方案..

下面是我的代码(如果 links 数组包含不止一项,它会在输出文件中记录 FAIL to load the address):

"use strict";
if (!Date.prototype.toISOString) {
    Date.prototype.toISOString = function () {
        function pad(n) { return n < 10 ? '0' + n : n; }
        function ms(n) { return n < 10 ? '00'+ n : n < 100 ? '0' + n : n }
        return this.getFullYear() + '-' +
            pad(this.getMonth() + 1) + '-' +
            pad(this.getDate()) + 'T' +
            pad(this.getHours()) + ':' +
            pad(this.getMinutes()) + ':' +
            pad(this.getSeconds()) + '.' +
            ms(this.getMilliseconds()) + 'Z';
    }
}
var entries = [];
function createHAR(address, title, startTime, resources)
{
    resources.forEach(function (resource) {
        var request = resource.request,
            startReply = resource.startReply,
            endReply = resource.endReply;

        if (!request || !startReply || !endReply) {
            return;
        }

        // Exclude Data URI from HAR file because
        // they aren't included in specification
        if (request.url.match(/(^data:image\/.*)/i)) {
            return;
        }

        entries.push({
            startedDateTime: request.time.toISOString(),
            time: endReply.time - request.time,
            request: {
                method: request.method,
                url: request.url,
                httpVersion: "HTTP/1.1",
                cookies: [],
                headers: request.headers,
                queryString: [],
                headersSize: -1,
                bodySize: -1
            },
            response: {
                status: endReply.status,
                statusText: endReply.statusText,
                httpVersion: "HTTP/1.1",
                cookies: [],
                headers: endReply.headers,
                redirectURL: "",
                headersSize: -1,
                bodySize: startReply.bodySize,
                content: {
                    size: startReply.bodySize,
                    mimeType: endReply.contentType
                }
            },
            cache: {},
            timings: {
                blocked: 0,
                dns: -1,
                connect: -1,
                send: 0,
                wait: startReply.time - request.time,
                receive: endReply.time - startReply.time,
                ssl: -1
            },
            pageref: address
        });
    });

    return {
        log: {
            version: '1.2',
            creator: {
                name: "PhantomJS",
                version: phantom.version.major + '.' + phantom.version.minor +
                    '.' + phantom.version.patch
            },
            pages: [{
                startedDateTime: startTime.toISOString(),
                id: address,
                title: title,
                pageTimings: {
                    onLoad: page.endTime - page.startTime
                }
            }],
            entries: entries
        }
    };
}
var page = require('webpage').create()
var fs = require('fs');
var count = 0;
function processSites(links)
{
    page.address = links.pop();
    var path = 'file' + count + '.har';
    page.resources = [];
    console.log("page resources:", page.resources)
    count = count + 1;
    page.onLoadStarted = function () {
        page.startTime = new Date();
    };
    page.onResourceRequested = function (req) {
        page.resources[req.id] = {
            request: req,
            startReply: null,
            endReply: null
        };
    };

    page.onResourceReceived = function (res) {
        if (res.stage === 'start') {
            page.resources[res.id].startReply = res;
        }
        if (res.stage === 'end') {
            page.resources[res.id].endReply = res;
        }
    };

    page.open(page.address, function (status) {
        var har;
        setTimeout(function () {
            if (status !== 'success') {
                console.log('FAIL to load the address');
                phantom.exit(1);
            } else {
                page.endTime = new Date();
                page.title = page.evaluate(function () {
                    return document.title;
                });
                entries = [];
                har = createHAR(page.address, page.title, page.startTime, page.resources);
                // console.log(JSON.stringify(har, undefined, 4));
                fs.write(path, JSON.stringify(har), 'w');

                if(links.length > 0)
                {
                    processSites(links);
                }
                else
                {
                    phantom.exit();
                }
            }
        }, 10000);
    });

}

var links = ["http://whosebug.com", "http://marvel.com"];

processSites(links);

更新:
上面的代码生成了两个 har 文件 file1.har 和 file2.har,但是第二个 har 文件还包含从两个 link 生成的 har 代码,它应该只有 har 第一个代码 link...

通过设置 var har = " "

解决了这个问题

您不能在简单的循环中重复打开 PhantomJS 中的页面,因为 page.open 方法是异步的。它不会等待处理第一个站点,立即打开第二个站点。

我已经重写了您的脚本以使用递归:下一个站点只有在处理当前站点后才会打开。 (注意:如果队列中的任何站点加载失败,整个过程将停止,但您可以轻松重写脚本来避免这种情况)。

if (!Date.prototype.toISOString) {
    Date.prototype.toISOString = function () {
        // ...
    }
}

var entries = [];

function createHAR(address, title, startTime, resources)
{
    // ...
}

var page = require('webpage').create()

function processSites(links)
{
    page.address = links.pop();

    console.log("PAGE ADDRESS: ", page.address);
    page.resources = [];

    page.onLoadStarted = function () {
        page.startTime = new Date();
    };
    page.onResourceRequested = function (req) {
        page.resources[req.id] = {
            request: req,
            startReply: null,
            endReply: null
        };
    };

    page.onResourceReceived = function (res) {
        if (res.stage === 'start') {
            page.resources[res.id].startReply = res;
        }
        if (res.stage === 'end') {
            page.resources[res.id].endReply = res;
        }
    };

    page.open(page.address, function (status) {
        var har;
        setTimeout(function () {
            if (status !== 'success') {
                console.log('FAIL to load the address');
                phantom.exit(1);
            } else {
                page.endTime = new Date();
                page.title = page.evaluate(function () {
                    return document.title;
                });
                har = createHAR(page.address, page.title, page.startTime, page.resources);
                console.log(JSON.stringify(har, undefined, 4));

                if(links.length > 0)
                {
                    processSites(links);
                }
                else
                {
                    phantom.exit();
                }
            }
        }, 10000);
    });

}

var links = ["http://edition.cnn.com", "http://whosebug.com"];

processSites(links);