如何使用 Jsoup 从 html 页面检索代码片段?
How to retrieve a code snippet from html page with Jsoup?
我想用 Jsoup 解析 Html 页面。
html.page
<html>
<head></head>
<body>
<div id="1">SomeText</div>
<script>(function(a, b)){var fjs = a.getElementsByTagNames(b)[0]; … }
</script>
<div class="class1">SomeText</div>
<div class="class2">SomeText</div>
<script>(function(c, d)){var fjs = c.getElementsByTagNames(d)[0]; … }
</script>
<div class="class3">SomeText</div>
<div class="class4">SomeText</div>
</body>
</html>
为了检索一些信息,我编写了代码:
File input = new File(filePath);
PrintWriter writer = new PrintWriter(input, "UTF-8");
writer.write(document.getElementById("1").outerHtml() + "\n");
writer.write(document.getElementsByClass("class1").outerHtml() + "\n");
writer.write(document.getElementsByClass("class2").outerHtml() + "\n");
writer.flush();
writer.close();
文件输出内容为:
<div id="1">SomeText</div>
<div class="class1">SomeText</div>
<div class="class2">SomeText</div>
What is the best way to receive the output file content like?
<div id="1">SomeText</div>
<script>(function(a, b)){var fjs = a.getElementsByTagNames(b)[0]; … }
</script>
<div class="class1">SomeText</div>
<div class="class2">SomeText</div>
尝试使用 getElementByTag 并将结果写入文件中所需的位置
File input = new File(filePath);
PrintWriter writer = new PrintWriter(input, "UTF-8");
writer.write(document.getElementById("1").outerHtml() + "\n");
Elements scripts = document.getElementsByTag("script");
for (Element script : scripts) {
if (script.data().startsWith("(function(a, b)")) {
writer.write(script.outerHtml() + "\n");
}
}
writer.write(document.getElementsByClass("class1").outerHtml() + "\n");
writer.write(document.getElementsByClass("class2").outerHtml() + "\n");
writer.flush();
writer.close();
我想用 Jsoup 解析 Html 页面。
html.page
<html>
<head></head>
<body>
<div id="1">SomeText</div>
<script>(function(a, b)){var fjs = a.getElementsByTagNames(b)[0]; … }
</script>
<div class="class1">SomeText</div>
<div class="class2">SomeText</div>
<script>(function(c, d)){var fjs = c.getElementsByTagNames(d)[0]; … }
</script>
<div class="class3">SomeText</div>
<div class="class4">SomeText</div>
</body>
</html>
为了检索一些信息,我编写了代码:
File input = new File(filePath);
PrintWriter writer = new PrintWriter(input, "UTF-8");
writer.write(document.getElementById("1").outerHtml() + "\n");
writer.write(document.getElementsByClass("class1").outerHtml() + "\n");
writer.write(document.getElementsByClass("class2").outerHtml() + "\n");
writer.flush();
writer.close();
文件输出内容为:
<div id="1">SomeText</div>
<div class="class1">SomeText</div>
<div class="class2">SomeText</div>
What is the best way to receive the output file content like?
<div id="1">SomeText</div>
<script>(function(a, b)){var fjs = a.getElementsByTagNames(b)[0]; … }
</script>
<div class="class1">SomeText</div>
<div class="class2">SomeText</div>
尝试使用 getElementByTag 并将结果写入文件中所需的位置
File input = new File(filePath);
PrintWriter writer = new PrintWriter(input, "UTF-8");
writer.write(document.getElementById("1").outerHtml() + "\n");
Elements scripts = document.getElementsByTag("script");
for (Element script : scripts) {
if (script.data().startsWith("(function(a, b)")) {
writer.write(script.outerHtml() + "\n");
}
}
writer.write(document.getElementsByClass("class1").outerHtml() + "\n");
writer.write(document.getElementsByClass("class2").outerHtml() + "\n");
writer.flush();
writer.close();