使用 Jsoup 递归执行器

Executors recursive with Jsoup

我想应用 ExecutorService 以给定的格式从 url 递归读取路径,我很难回忆起递归 getRecursive(element.attr("abs:href"), level);

public class NewClass {

    static String levels[] = {"div.col-md-9 li a", "div#sidebar ul li a"};

    private void getRecursive(String href,int level) {

        if (level > levels.length - 1) {
            return;
        }

        Document doc;
        try {
            doc = Jsoup.connect(href).get();
            Elements elements = doc.select(levels[level]);

            level++;
            ExecutorService executor = Executors.newFixedThreadPool(5);
            for (final Element element : elements) {
                executor.execute(new Runnable() {

                    @Override
                    public void run() {
                        if (!element.attr("href").isEmpty()) {
                            String links = "";
                            links += element.attr("abs:href") + "\n";
                            System.out.println(links);
                            getRecursive(element.attr("abs:href"), level);
                        }
                    }
                });
            }
        } catch (IOException e1) {
            e1.printStackTrace();
        }
    }

    public static void main(String[] args) {
        new NewClass().getRecursive("http://www.java2s.com/", 0);
    }
}

level 变量必须是 final 才能在 run() 方法中访问它。在任何情况下,您没有正确使用 ExecutorService,您的程序不限于 5 个线程,因为您每次调用 getRecursive(...) 方法时都会创建一个新的 ExecutorService

要对每个递归使用相同的 ExecutorService,您可以这样做:

public class NewClass { 

    static String levels[] = { "div.col-md-9 li a", "div#sidebar ul li a" };    
    static String links = "";   

    private void getRecursive(String href, int level, final ExecutorService executor) { 

        if (level > levels.length - 1) {    
            return; 
        }   

        Document doc;   
        try {   
            doc = Jsoup.connect(href).get();    
            Elements elements = doc.select(levels[level]);  
            final int flevel = ++level; 
            for (final Element element : elements) {    
                executor.execute(new Runnable() {   
                    @Override   
                    public void run() { 
                        if (!element.attr("href").isEmpty()) {  
                            links += element.attr("abs:href") + "\n";   
                            System.out.println(links);  
                            getRecursive(element.attr("abs:href"), flevel, executor);   
                        }   
                    }   
                }); 
            }   
        } catch (IOException e1) {  
            e1.printStackTrace();   
        }   
    }   

    public static void main(String[] args) {    
        ExecutorService executor = Executors.newFixedThreadPool(5); 
        new NewClass().getRecursive("http://www.java2s.com/", 0, executor); 
    }   
}