使用 Jsoup 递归执行器
Executors recursive with Jsoup
我想应用 ExecutorService 以给定的格式从 url 递归读取路径,我很难回忆起递归
getRecursive(element.attr("abs:href"), level);
public class NewClass {
static String levels[] = {"div.col-md-9 li a", "div#sidebar ul li a"};
private void getRecursive(String href,int level) {
if (level > levels.length - 1) {
return;
}
Document doc;
try {
doc = Jsoup.connect(href).get();
Elements elements = doc.select(levels[level]);
level++;
ExecutorService executor = Executors.newFixedThreadPool(5);
for (final Element element : elements) {
executor.execute(new Runnable() {
@Override
public void run() {
if (!element.attr("href").isEmpty()) {
String links = "";
links += element.attr("abs:href") + "\n";
System.out.println(links);
getRecursive(element.attr("abs:href"), level);
}
}
});
}
} catch (IOException e1) {
e1.printStackTrace();
}
}
public static void main(String[] args) {
new NewClass().getRecursive("http://www.java2s.com/", 0);
}
}
level
变量必须是 final
才能在 run()
方法中访问它。在任何情况下,您没有正确使用 ExecutorService
,您的程序不限于 5
个线程,因为您每次调用 getRecursive(...)
方法时都会创建一个新的 ExecutorService
。
要对每个递归使用相同的 ExecutorService
,您可以这样做:
public class NewClass {
static String levels[] = { "div.col-md-9 li a", "div#sidebar ul li a" };
static String links = "";
private void getRecursive(String href, int level, final ExecutorService executor) {
if (level > levels.length - 1) {
return;
}
Document doc;
try {
doc = Jsoup.connect(href).get();
Elements elements = doc.select(levels[level]);
final int flevel = ++level;
for (final Element element : elements) {
executor.execute(new Runnable() {
@Override
public void run() {
if (!element.attr("href").isEmpty()) {
links += element.attr("abs:href") + "\n";
System.out.println(links);
getRecursive(element.attr("abs:href"), flevel, executor);
}
}
});
}
} catch (IOException e1) {
e1.printStackTrace();
}
}
public static void main(String[] args) {
ExecutorService executor = Executors.newFixedThreadPool(5);
new NewClass().getRecursive("http://www.java2s.com/", 0, executor);
}
}
我想应用 ExecutorService 以给定的格式从 url 递归读取路径,我很难回忆起递归 getRecursive(element.attr("abs:href"), level);
public class NewClass {
static String levels[] = {"div.col-md-9 li a", "div#sidebar ul li a"};
private void getRecursive(String href,int level) {
if (level > levels.length - 1) {
return;
}
Document doc;
try {
doc = Jsoup.connect(href).get();
Elements elements = doc.select(levels[level]);
level++;
ExecutorService executor = Executors.newFixedThreadPool(5);
for (final Element element : elements) {
executor.execute(new Runnable() {
@Override
public void run() {
if (!element.attr("href").isEmpty()) {
String links = "";
links += element.attr("abs:href") + "\n";
System.out.println(links);
getRecursive(element.attr("abs:href"), level);
}
}
});
}
} catch (IOException e1) {
e1.printStackTrace();
}
}
public static void main(String[] args) {
new NewClass().getRecursive("http://www.java2s.com/", 0);
}
}
level
变量必须是 final
才能在 run()
方法中访问它。在任何情况下,您没有正确使用 ExecutorService
,您的程序不限于 5
个线程,因为您每次调用 getRecursive(...)
方法时都会创建一个新的 ExecutorService
。
要对每个递归使用相同的 ExecutorService
,您可以这样做:
public class NewClass {
static String levels[] = { "div.col-md-9 li a", "div#sidebar ul li a" };
static String links = "";
private void getRecursive(String href, int level, final ExecutorService executor) {
if (level > levels.length - 1) {
return;
}
Document doc;
try {
doc = Jsoup.connect(href).get();
Elements elements = doc.select(levels[level]);
final int flevel = ++level;
for (final Element element : elements) {
executor.execute(new Runnable() {
@Override
public void run() {
if (!element.attr("href").isEmpty()) {
links += element.attr("abs:href") + "\n";
System.out.println(links);
getRecursive(element.attr("abs:href"), flevel, executor);
}
}
});
}
} catch (IOException e1) {
e1.printStackTrace();
}
}
public static void main(String[] args) {
ExecutorService executor = Executors.newFixedThreadPool(5);
new NewClass().getRecursive("http://www.java2s.com/", 0, executor);
}
}