HtmlUnit WebClient 会话持续时间
HtmlUnit WebClient Session Duration
我正在制作命令行工具来多次访问网站。我一次使用多个线程访问页面,每个线程使用循环重复访问网站。工具工作正常,它可以按要求访问网站,但唯一的问题是它打开网站并在几分钟后关闭它。因此每次访问的会话持续时间限制为 3 到 4 秒。我需要将此会话持续时间增加到至少 60 秒。下面是我的代码。
package directUrl;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
public class ThreadDirectUrl extends Thread {
private String url;
private String paramUserAgent;
private String paramReferer;
private int loopSize;
public ThreadDirectUrl(String url, String paramUserAgent, String paramReferer, int loopSize) {
this.url = url;
this.paramUserAgent = paramUserAgent;
this.paramReferer = paramReferer;
this.loopSize = loopSize;
}
public void run() {
String userAgent = new String();
// Get User Agent
if (paramUserAgent.equals("1")) {
userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:17.0) Gecko/17.0 Firefox/17.0";
} else if (paramUserAgent.equals("2")) {
userAgent = "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1";
} else if (paramUserAgent.equals("3")) {
userAgent = "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_0 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8A293 Safari/6531.22.7";
} else if (paramUserAgent.equals("4")) {
userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.79 Safari/537.1";
}
BrowserVersion bv = new BrowserVersion("Netscape", "Version", userAgent, 0);
try {
URL openUrl = new URL(url);
for (int i = 1; i <= loopSize; i++) {
WebClient webClient = new WebClient(bv);
webClient.addRequestHeader("Accept-Encoding", "compress, gzip");
webClient.addRequestHeader("Referer", paramReferer);
webClient.getOptions().setPrintContentOnFailingStatusCode(true);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setCssEnabled(false);
webClient.getOptions().setPopupBlockerEnabled(true);
webClient.getOptions().setMaxInMemory(3);
webClient.getPage(openUrl);
System.out.println(Thread.currentThread().getName() + "----" + i + "----\nSuccess!\nUser Agent: "
+ bv.getUserAgent() + "\n\n");
Thread.sleep(60000);
webClient.getCurrentWindow().getJobManager().removeAllJobs();
webClient.close();
}
System.out.println(Thread.currentThread().getName() + "COMPLETED");
} catch (FailingHttpStatusCodeException e) {
System.out.println("Error!");
} catch (MalformedURLException e) {
System.out.println("Error - Use URL with \"http://\" or \"https://\"!");
} catch (IOException e) {
System.out.println("Error!");
} catch (ArrayIndexOutOfBoundsException e) {
System.out.println("Error!");
} catch (InterruptedException e) {
System.out.println(Thread.currentThread().getName() + "Interrupted");
} finally {
System.gc();
}
}
}
主要class如下
package directUrl;
import java.util.logging.Level;
import java.util.logging.Logger;
public class DirectUrl {
public static void main(String[] args) {
// Production Variables
String url = args[0]; // URL
String paramUserAgent = args[1]; // User Agent Choice
String paramReferer = args[2]; // Referrer URL
int loopSize = Integer.parseInt(args[3]); // Loop Size
int threadSize = Integer.parseInt(args[4]); // Counts of threads
Logger logger = Logger.getLogger("");
logger.setLevel(Level.OFF);
// Create Multiple Threads
ThreadDirectUrl aThread;
for (int i = 1; i <= threadSize; i++) {
aThread = new ThreadDirectUrl(url, paramUserAgent, paramReferer, loopSize);
aThread.setName("thread" + i);
aThread.start();
}
}
}
在classThreadDirectUrl中,我用过;
Thread.sleep(60000);
在 getPage() 方法之后,但它不起作用。请提出建议。
如果想让服务器看到会话存活得更久,那么对客户端做点什么。
例如再次加载同一页面:
webClient.getPage(openUrl);
Thread.sleep(60000);
// then get the same page again
webClient.getPage(openUrl);
我正在制作命令行工具来多次访问网站。我一次使用多个线程访问页面,每个线程使用循环重复访问网站。工具工作正常,它可以按要求访问网站,但唯一的问题是它打开网站并在几分钟后关闭它。因此每次访问的会话持续时间限制为 3 到 4 秒。我需要将此会话持续时间增加到至少 60 秒。下面是我的代码。
package directUrl;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
public class ThreadDirectUrl extends Thread {
private String url;
private String paramUserAgent;
private String paramReferer;
private int loopSize;
public ThreadDirectUrl(String url, String paramUserAgent, String paramReferer, int loopSize) {
this.url = url;
this.paramUserAgent = paramUserAgent;
this.paramReferer = paramReferer;
this.loopSize = loopSize;
}
public void run() {
String userAgent = new String();
// Get User Agent
if (paramUserAgent.equals("1")) {
userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:17.0) Gecko/17.0 Firefox/17.0";
} else if (paramUserAgent.equals("2")) {
userAgent = "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1";
} else if (paramUserAgent.equals("3")) {
userAgent = "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_0 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8A293 Safari/6531.22.7";
} else if (paramUserAgent.equals("4")) {
userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.79 Safari/537.1";
}
BrowserVersion bv = new BrowserVersion("Netscape", "Version", userAgent, 0);
try {
URL openUrl = new URL(url);
for (int i = 1; i <= loopSize; i++) {
WebClient webClient = new WebClient(bv);
webClient.addRequestHeader("Accept-Encoding", "compress, gzip");
webClient.addRequestHeader("Referer", paramReferer);
webClient.getOptions().setPrintContentOnFailingStatusCode(true);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setCssEnabled(false);
webClient.getOptions().setPopupBlockerEnabled(true);
webClient.getOptions().setMaxInMemory(3);
webClient.getPage(openUrl);
System.out.println(Thread.currentThread().getName() + "----" + i + "----\nSuccess!\nUser Agent: "
+ bv.getUserAgent() + "\n\n");
Thread.sleep(60000);
webClient.getCurrentWindow().getJobManager().removeAllJobs();
webClient.close();
}
System.out.println(Thread.currentThread().getName() + "COMPLETED");
} catch (FailingHttpStatusCodeException e) {
System.out.println("Error!");
} catch (MalformedURLException e) {
System.out.println("Error - Use URL with \"http://\" or \"https://\"!");
} catch (IOException e) {
System.out.println("Error!");
} catch (ArrayIndexOutOfBoundsException e) {
System.out.println("Error!");
} catch (InterruptedException e) {
System.out.println(Thread.currentThread().getName() + "Interrupted");
} finally {
System.gc();
}
}
}
主要class如下
package directUrl;
import java.util.logging.Level;
import java.util.logging.Logger;
public class DirectUrl {
public static void main(String[] args) {
// Production Variables
String url = args[0]; // URL
String paramUserAgent = args[1]; // User Agent Choice
String paramReferer = args[2]; // Referrer URL
int loopSize = Integer.parseInt(args[3]); // Loop Size
int threadSize = Integer.parseInt(args[4]); // Counts of threads
Logger logger = Logger.getLogger("");
logger.setLevel(Level.OFF);
// Create Multiple Threads
ThreadDirectUrl aThread;
for (int i = 1; i <= threadSize; i++) {
aThread = new ThreadDirectUrl(url, paramUserAgent, paramReferer, loopSize);
aThread.setName("thread" + i);
aThread.start();
}
}
}
在classThreadDirectUrl中,我用过;
Thread.sleep(60000);
在 getPage() 方法之后,但它不起作用。请提出建议。
如果想让服务器看到会话存活得更久,那么对客户端做点什么。
例如再次加载同一页面:
webClient.getPage(openUrl);
Thread.sleep(60000);
// then get the same page again
webClient.getPage(openUrl);