JSoup:如何列出列表中的链接?
JSoup: how to list links from a list?
如何 list the links, but only from a div
tag? More specifically, a list
in that specific div
only? Somehow limit the selection 到特定元素?
代码:
package my.books;
import java.io.File;
import java.net.URI;
import java.util.Properties;
import java.util.logging.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class App {
private static final Logger LOG = Logger.getLogger(App.class.getName());
private Properties properties = new Properties();
public static void main(String[] args) throws Exception {
new App().basicJSoup();
}
private void basicJSoup() throws Exception {
properties.loadFromXML(App.class.getResourceAsStream("/properties.xml"));
LOG.fine(properties.toString());
URI inputURI = new URI(properties.getProperty("html_input"));
URI outputURI = new URI(properties.getProperty("output"));
File input = new File(inputURI);
Document doc = Jsoup.parse(input, "UTF-8");
Element sideCategories = doc.select("div.side_categories").first();
LOG.fine(sideCategories.outerHtml());
Elements ul = doc.select("div.side_categories > ul");
Elements li = ul.select("li");
for (int i = 0; i < li.size(); i++) {
LOG.info(li.get(i).text());
LOG.info("i\t\t" + i);
}
}
}
如果我没有正确理解你的问题,你只需要编写完整、具体的 css 选择器,例如 div.side_categories ul li a
.
例如:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class JSoupTest {
public static void main(String[] args) {
String markup =
"<div class=\"side_categories\">" +
"<ul>" +
"<li>" +
"<a href=\"#\">Link 1</a>" +
"</li>" +
"<li>" +
"<a href=\"#\">Link 2</a>" +
"</li>" +
"</ul>" +
"</div>";
Document doc = Jsoup.parse(markup);
Elements links = doc.select("div.side_categories ul li a");
for (Element link : links) {
System.out.println(link);
}
}
}
结果:
<a href="#">Link 1</a>
<a href="#">Link 2</a>
如何 list the links, but only from a div
tag? More specifically, a list
in that specific div
only? Somehow limit the selection 到特定元素?
代码:
package my.books;
import java.io.File;
import java.net.URI;
import java.util.Properties;
import java.util.logging.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class App {
private static final Logger LOG = Logger.getLogger(App.class.getName());
private Properties properties = new Properties();
public static void main(String[] args) throws Exception {
new App().basicJSoup();
}
private void basicJSoup() throws Exception {
properties.loadFromXML(App.class.getResourceAsStream("/properties.xml"));
LOG.fine(properties.toString());
URI inputURI = new URI(properties.getProperty("html_input"));
URI outputURI = new URI(properties.getProperty("output"));
File input = new File(inputURI);
Document doc = Jsoup.parse(input, "UTF-8");
Element sideCategories = doc.select("div.side_categories").first();
LOG.fine(sideCategories.outerHtml());
Elements ul = doc.select("div.side_categories > ul");
Elements li = ul.select("li");
for (int i = 0; i < li.size(); i++) {
LOG.info(li.get(i).text());
LOG.info("i\t\t" + i);
}
}
}
如果我没有正确理解你的问题,你只需要编写完整、具体的 css 选择器,例如 div.side_categories ul li a
.
例如:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class JSoupTest {
public static void main(String[] args) {
String markup =
"<div class=\"side_categories\">" +
"<ul>" +
"<li>" +
"<a href=\"#\">Link 1</a>" +
"</li>" +
"<li>" +
"<a href=\"#\">Link 2</a>" +
"</li>" +
"</ul>" +
"</div>";
Document doc = Jsoup.parse(markup);
Elements links = doc.select("div.side_categories ul li a");
for (Element link : links) {
System.out.println(link);
}
}
}
结果:
<a href="#">Link 1</a>
<a href="#">Link 2</a>