如何使用 Jsoup 进行多线程处理
How can I use Jsoup for multithreading
我正在尝试使用 Jsoup 从 URl 下载信息作为文本,然后计算我想使用 multi threading.I 实现的每个唯一单词的频率我正在使用文本文件每个都有一个 url
如果我只给出一个 url,newline.I 会得到输出,但是当我给出更多 url 时,我会得到错误。
import java.io.*;
import java.net.*;
import java.util.HashMap;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.parser.*;
import org.jsoup.*;
public class JavaGetUrl {
public void downloadweb(String url) throws IOException {
//System.out.println(url);
Document doc=Jsoup.connect(url).get();
//System.out.println(doc.text());
wordCounter(doc.text());
}
public static void wordCounter(String s)
{
String a[] = s.split(" ");
Map<String, Integer> words = new HashMap<>();
for (String str : a) {
if (words.containsKey(str)) {
words.put(str, 1 + words.get(str));
} else {
words.put(str, 1);
}
}
System.out.println(words);
}
public static void main (String[] args) throws IOException {
Mythread mt=new Mythread();
Thread t=new Thread(mt);
t.start();
}
}
class Mythread extends JavaGetUrl implements Runnable
{
public void run()
{
try
{
File file=new File("Demo.txt"); //creates a new file instance
FileReader fr=new FileReader(file); //reads the file
BufferedReader br=new BufferedReader(fr); //creates a buffering character input stream
//constructs a string buffer with no characters
String line;
StringBuffer sb=new StringBuffer();
while((line=br.readLine())!=null)
{
sb.append(line); //appends line to string buffer
sb.append("\n"); //line feed
}
fr.close(); //closes the stream and release the resources
System.out.println("Contents of File: ");
System.out.println(sb.toString());
JavaGetUrl jg=new JavaGetUrl();
jg.downloadweb(sb.toString());
//System.out.println(sb.toString());
}
catch(IOException e)
{
e.printStackTrace();
}
}
}
我遇到了这些错误
有没有办法使用 Jsoup
实现多线程
您的链接包含 \n
符号,因此它们不正确。
我认为您可以只使用字符串列表,如下所示:
List<String> urls = new ArrayList<>();
while((line=br.readLine())!=null)
{
urls.add(line); // adds line to the List of Strings
}
请注意代码格式。
import java.io.*;
import java.net.*;
import java.util.HashMap;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.parser.*;
import org.jsoup.*;
public class JavaGetUrl {
public void downloadweb(String url) throws IOException {
//System.out.println(url);
Document doc=Jsoup.connect(url).get();
//System.out.println(doc.text());
wordCounter(doc.text());
}
public static void wordCounter(String s)
{
String a[] = s.split(" ");
Map<String, Integer> words = new HashMap<>();
for (String str : a) {
if (words.containsKey(str)) {
words.put(str, 1 + words.get(str));
} else {
words.put(str, 1);
}
}
System.out.println(words);
}
public static void main (String[] args) throws IOException {
Mythread mt=new Mythread();
Thread t=new Thread(mt);
t.start();
}
}
class Mythread extends JavaGetUrl implements Runnable
{
public void run()
{
try
{
File file=new File("Demo.txt"); //creates a new file instance
FileReader fr=new FileReader(file); //reads the file
BufferedReader br=new BufferedReader(fr); //creates a buffering character input stream
//constructs a string buffer with no characters
String line;
StringBuffer sb=new StringBuffer();
while((line=br.readLine())!=null)
{
sb.append(line); //appends line to string buffer
sb.append("\n"); //line feed
}
fr.close(); //closes the stream and release the resources
System.out.println("Contents of File: ");
System.out.println(sb.toString());
JavaGetUrl jg=new JavaGetUrl();
jg.downloadweb(sb.toString());
//System.out.println(sb.toString());
}
catch(IOException e)
{
e.printStackTrace();
}
}
}
我遇到了这些错误 有没有办法使用 Jsoup
实现多线程您的链接包含 \n
符号,因此它们不正确。
我认为您可以只使用字符串列表,如下所示:
List<String> urls = new ArrayList<>();
while((line=br.readLine())!=null)
{
urls.add(line); // adds line to the List of Strings
}
请注意代码格式。