HtmlUnitDriver 中的黑名单和白名单 URL
Blacklist and whitelist URLs in HtmlUnitDriver
在 PhantomJS 和 GhostDriver 中将 URL 列入黑名单非常简单。首先使用处理程序初始化驱动程序:
PhantomJSDriver driver = new PhantomJSDriver();
driver.executePhantomJS(loadFile("/phantomjs/handlers.js"))
并配置处理程序:
this.onResourceRequested = function (requestData, networkRequest) {
var allowedUrls = [
/https?:\/\/localhost.*/,
/https?:\/\/.*\.example.com\/?.*/
];
var disallowedUrls = [
/https?:\/\/nonono.com.*/
];
function isUrlAllowed(url) {
function matches(url) {
return function(re) {
return re.test(url);
};
}
return allowedUrls.some(matches(url)) && !disallowedUrls.some(matches(url));
}
if (!isUrlAllowed(requestData.url)) {
console.log("Aborting disallowed request (# " + requestData.id + ") to url: '" + requestData.url + "'");
networkRequest.abort();
}
};
我还没有找到使用 HtmlUnitDriver 执行此操作的好方法。 How to filter javascript from specific urls in HtmlUnit中提到了ScriptPreProcessor,但它使用的是WebClient,而不是HtmlUnitDriver。有什么想法吗?
扩展 HtmlUnitDriver 并实现 ScriptPreProcessor
(用于编辑内容)和 HttpWebConnection
(用于 allowing/blocking URL):
public class FilteringHtmlUnitDriver extends HtmlUnitDriver {
private static final String[] ALLOWED_URLS = {
"https?://localhost.*",
"https?://.*\.yes.yes/?.*",
};
private static final String[] DISALLOWED_URLS = {
"https?://spam.nono.*"
};
public FilteringHtmlUnitDriver(DesiredCapabilities capabilities) {
super(capabilities);
}
@Override
protected WebClient modifyWebClient(WebClient client) {
WebConnection connection = filteringWebConnection(client);
ScriptPreProcessor preProcessor = filteringPreProcessor();
client.setWebConnection(connection);
client.setScriptPreProcessor(preProcessor);
return client;
}
private ScriptPreProcessor filteringPreProcessor() {
return (htmlPage, sourceCode, sourceName, lineNumber, htmlElement) -> editContent(sourceCode);
}
private String editContent(String sourceCode) {
return sourceCode.replaceAll("foo", "bar"); }
private WebConnection filteringWebConnection(WebClient client) {
return new HttpWebConnection(client) {
@Override
public WebResponse getResponse(WebRequest request) throws IOException {
String url = request.getUrl().toString();
WebResponse emptyResponse = new WebResponse(
new WebResponseData("".getBytes(), SC_OK, "", new ArrayList<>()), request, 0);
for (String disallowed : DISALLOWED_URLS) {
if (url.matches(disallowed)) {
return emptyResponse;
}
}
for (String allowed : ALLOWED_URLS) {
if (url.matches(allowed)) {
return super.getResponse(request);
}
}
return emptyResponse;
}
};
}
}
这样既可以编辑内容,也可以阻止 URL。
在 PhantomJS 和 GhostDriver 中将 URL 列入黑名单非常简单。首先使用处理程序初始化驱动程序:
PhantomJSDriver driver = new PhantomJSDriver();
driver.executePhantomJS(loadFile("/phantomjs/handlers.js"))
并配置处理程序:
this.onResourceRequested = function (requestData, networkRequest) {
var allowedUrls = [
/https?:\/\/localhost.*/,
/https?:\/\/.*\.example.com\/?.*/
];
var disallowedUrls = [
/https?:\/\/nonono.com.*/
];
function isUrlAllowed(url) {
function matches(url) {
return function(re) {
return re.test(url);
};
}
return allowedUrls.some(matches(url)) && !disallowedUrls.some(matches(url));
}
if (!isUrlAllowed(requestData.url)) {
console.log("Aborting disallowed request (# " + requestData.id + ") to url: '" + requestData.url + "'");
networkRequest.abort();
}
};
我还没有找到使用 HtmlUnitDriver 执行此操作的好方法。 How to filter javascript from specific urls in HtmlUnit中提到了ScriptPreProcessor,但它使用的是WebClient,而不是HtmlUnitDriver。有什么想法吗?
扩展 HtmlUnitDriver 并实现 ScriptPreProcessor
(用于编辑内容)和 HttpWebConnection
(用于 allowing/blocking URL):
public class FilteringHtmlUnitDriver extends HtmlUnitDriver {
private static final String[] ALLOWED_URLS = {
"https?://localhost.*",
"https?://.*\.yes.yes/?.*",
};
private static final String[] DISALLOWED_URLS = {
"https?://spam.nono.*"
};
public FilteringHtmlUnitDriver(DesiredCapabilities capabilities) {
super(capabilities);
}
@Override
protected WebClient modifyWebClient(WebClient client) {
WebConnection connection = filteringWebConnection(client);
ScriptPreProcessor preProcessor = filteringPreProcessor();
client.setWebConnection(connection);
client.setScriptPreProcessor(preProcessor);
return client;
}
private ScriptPreProcessor filteringPreProcessor() {
return (htmlPage, sourceCode, sourceName, lineNumber, htmlElement) -> editContent(sourceCode);
}
private String editContent(String sourceCode) {
return sourceCode.replaceAll("foo", "bar"); }
private WebConnection filteringWebConnection(WebClient client) {
return new HttpWebConnection(client) {
@Override
public WebResponse getResponse(WebRequest request) throws IOException {
String url = request.getUrl().toString();
WebResponse emptyResponse = new WebResponse(
new WebResponseData("".getBytes(), SC_OK, "", new ArrayList<>()), request, 0);
for (String disallowed : DISALLOWED_URLS) {
if (url.matches(disallowed)) {
return emptyResponse;
}
}
for (String allowed : ALLOWED_URLS) {
if (url.matches(allowed)) {
return super.getResponse(request);
}
}
return emptyResponse;
}
};
}
}
这样既可以编辑内容,也可以阻止 URL。