JSoup:如何列出列表中的链接?

JSoup: how to list links from a list?

如何 list the links, but only from a div tag? More specifically, a list in that specific div only? Somehow limit the selection 到特定元素?

代码:

package my.books;

import java.io.File;
import java.net.URI;
import java.util.Properties;
import java.util.logging.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class App {

    private static final Logger LOG = Logger.getLogger(App.class.getName());
    private Properties properties = new Properties();

    public static void main(String[] args) throws Exception {
        new App().basicJSoup();
    }

    private void basicJSoup() throws Exception {
        properties.loadFromXML(App.class.getResourceAsStream("/properties.xml"));
        LOG.fine(properties.toString());
        URI inputURI = new URI(properties.getProperty("html_input"));
        URI outputURI = new URI(properties.getProperty("output"));

        File input = new File(inputURI);
        Document doc = Jsoup.parse(input, "UTF-8");
        Element sideCategories = doc.select("div.side_categories").first();
        LOG.fine(sideCategories.outerHtml());

        Elements ul = doc.select("div.side_categories > ul");
        Elements li = ul.select("li");

        for (int i = 0; i < li.size(); i++) {
            LOG.info(li.get(i).text());
            LOG.info("i\t\t" + i);
        }
    }

}

如果我没有正确理解你的问题,你只需要编写完整、具体的 css 选择器,例如 div.side_categories ul li a.

例如:

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class JSoupTest {
    public static void main(String[] args) {
        String markup =
                "<div class=\"side_categories\">" +
                  "<ul>" +
                    "<li>" +
                      "<a href=\"#\">Link 1</a>" +
                    "</li>" +
                    "<li>" +
                      "<a href=\"#\">Link 2</a>" +
                    "</li>" +
                  "</ul>" +
                "</div>";

        Document doc = Jsoup.parse(markup);
        Elements links = doc.select("div.side_categories ul li a");

        for (Element link : links) {
            System.out.println(link);
        }
    }
}

结果:

<a href="#">Link 1</a>
<a href="#">Link 2</a>