JSoup HTML 按顺序解析结果并将结果写入 CSV
JSoup HTML Parse and Write Results to CSV In Order
我正在尝试找到将 Jsoup 时从 HTML 文档中解析出的数据保存到 CSV 中的最佳方法。我遇到的问题是使用 [CSVWriter][1] - https://mvnrepository.com/artifact/com.opencsv/opencsv/4.6 并用它写入数据。请在下面查看我的代码片段。数据结构如下所示,信息框是主要列表记录,其中包含每个后续字段。 CSVWriter 看起来像是一个字符串数组,但在使用字符串数组从元素写入 CSVData 编写器时遇到了问题。
Jsoup 选择器正在返回所选项目的数组。例如,当我选择姓名时,如果页面上有 9 条记录,它将返回所有 9 个姓名。我需要将这些数据放在一起,以便将每一行打印成 CSV。
信息框 >
名称|
电邮|
Phone|
网站
我遇到的问题是我如何尝试在下面的这一行中写入数据
writer.writeAll((Iterable<String[]>) infoArray);
这无法正常工作并出现错误,但想展示我想要的东西,以及是否有人熟悉将数据从 Jsoup Elements 写入 CSV。谢谢
String filePath ="c:/results.csv";
// first create file object for file placed at location
// specified by filepath
File file = new File(filePath);
try {
// create FileWriter object with file as parameter
FileWriter outputfile = new FileWriter(file);
// create CSVWriter object filewriter object as parameter
CSVWriter writer = new CSVWriter(outputfile);
String[] header = { "Name", "Phone", "Street","State","City","Zipcode" };
Elements infobox = doc.select(".info");
List<String> infoArray = new ArrayList<>();
for(int i = 0; i < infobox.size(); i++){
infobox.get(i).select(".business-name > span");
infoArray.add(infobox.get(i).select(".business-name > span").text());
infoArray.add(infobox.get(i).select(".phones.phone.primary").text());
infoArray.add(infobox.get(i).select(".street-address").text());
infoArray.add(infobox.get(i).select(".state").text());
infoArray.add(infobox.get(i).select(".city").text());
infoArray.add(infobox.get(i).select(".zip").text());
}
writer.writeNext(header);
//How to write data in order to match each record accordingly?
//Data should be written to CSV like the following example under each header into each corrosponding row
//name, phone, street
writer.writeAll((Iterable<String[]>) infoArray);
for(String ia : infoArray){
}
// closing writer connection
writer.close();
}
catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
这是最终对我有用的东西。问题不在于将字符串添加到字符串数组中以传递给 CSVWriter。这是我的例子。
try {
String[] header = { "Name", "Phone", "Street","State","City","Zipcode" };
Elements infobox = doc.select(".info");
if(count == 0){
writer.writeNext(header);
}
for(int i = 0; i < infobox.size(); i++){
infobox.get(i).select(".business-name > span");
String businessName = infobox.get(i).select(".business-name > span").text();
String phone = infobox.get(i).select(".phones.phone.primary").text();
String address = infobox.get(i).select(".street-address").text();
//Address seems to be displayed another way too
String address2 = infobox.get(i).select(".adr").text();
//Use regular expression to normalize data
String[] columns = new String[]{
businessName, phone, address
};
writer.writeNext(columns);
}
writer.close();
}
这里是如何使用 OpenCSV 的小例子。也许会对你有所帮助。
HeaderNames.java
public class HeaderNames
{
public static final String NAME = "Name";
public static final String PHONE = "Phone";
public static final String STREET = "Street";
public static final String STATE = "State";
public static final String CITY = "City";
public static final String ZIPCODE = "Zipcode";
}
DemoDTO.java
import java.io.Serializable;
import com.opencsv.bean.CsvBindByName;
public class DemoDTO implements Serializable
{
private static final long serialVersionUID = 1L;
@CsvBindByName(column = HeaderNames.NAME)
private String name;
@CsvBindByName(column = HeaderNames.PHONE)
private String phone;
@CsvBindByName(column = HeaderNames.STREET)
private String street;
@CsvBindByName(column = HeaderNames.STATE)
private String state;
@CsvBindByName(column = HeaderNames.CITY)
private String city;
@CsvBindByName(column = HeaderNames.ZIPCODE)
private String zipcode;
public String getName()
{
return name;
}
public void setName(String name)
{
this.name = name;
}
public String getPhone()
{
return phone;
}
public void setPhone(String phone)
{
this.phone = phone;
}
public String getStreet()
{
return street;
}
public void setStreet(String street)
{
this.street = street;
}
public String getState()
{
return state;
}
public void setState(String state)
{
this.state = state;
}
public String getCity()
{
return city;
}
public void setCity(String city)
{
this.city = city;
}
public String getZipcode()
{
return zipcode;
}
public void setZipcode(String zipcode)
{
this.zipcode = zipcode;
}
}
Main.java
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import com.opencsv.CSVWriter;
import com.opencsv.bean.HeaderColumnNameMappingStrategy;
import com.opencsv.bean.StatefulBeanToCsv;
import com.opencsv.bean.StatefulBeanToCsvBuilder;
import com.opencsv.exceptions.CsvDataTypeMismatchException;
import com.opencsv.exceptions.CsvRequiredFieldEmptyException;
public class Main
{
public static void main(String[] args) throws IOException, CsvDataTypeMismatchException, CsvRequiredFieldEmptyException
{
File file = new File(System.getProperty("user.dir") + System.getProperty("file.separator") + "results.csv");
FileWriter writer = new FileWriter(file);
List<DemoDTO> beans = new ArrayList<DemoDTO>();
for (int i = 0; i < 10; i++)
{
DemoDTO demoDTO = new DemoDTO();
demoDTO.setCity("city " + i);
demoDTO.setName("name " + i);
demoDTO.setPhone("phone " + i);
demoDTO.setState("state " + i);
demoDTO.setStreet("street " + i);
demoDTO.setZipcode("zipcode " + i);
beans.add(demoDTO);
}
HeaderColumnNameMappingStrategy<DemoDTO> strategy = new HeaderColumnNameMappingStrategy<>();
strategy.setType(DemoDTO.class);
StatefulBeanToCsv<DemoDTO> beanToCsv = new StatefulBeanToCsvBuilder<DemoDTO>(writer)
.withSeparator(';')
.withEscapechar(CSVWriter.NO_ESCAPE_CHARACTER)
.withLineEnd(CSVWriter.DEFAULT_LINE_END)
.withQuotechar(CSVWriter.DEFAULT_QUOTE_CHARACTER)
.withMappingStrategy(strategy)
.withThrowExceptions(true)
.build();
beanToCsv.write(beans);
writer.flush();
writer.close();
}
}
我正在尝试找到将 Jsoup 时从 HTML 文档中解析出的数据保存到 CSV 中的最佳方法。我遇到的问题是使用 [CSVWriter][1] - https://mvnrepository.com/artifact/com.opencsv/opencsv/4.6 并用它写入数据。请在下面查看我的代码片段。数据结构如下所示,信息框是主要列表记录,其中包含每个后续字段。 CSVWriter 看起来像是一个字符串数组,但在使用字符串数组从元素写入 CSVData 编写器时遇到了问题。
Jsoup 选择器正在返回所选项目的数组。例如,当我选择姓名时,如果页面上有 9 条记录,它将返回所有 9 个姓名。我需要将这些数据放在一起,以便将每一行打印成 CSV。
信息框 > 名称| 电邮| Phone| 网站
我遇到的问题是我如何尝试在下面的这一行中写入数据
writer.writeAll((Iterable<String[]>) infoArray);
这无法正常工作并出现错误,但想展示我想要的东西,以及是否有人熟悉将数据从 Jsoup Elements 写入 CSV。谢谢
String filePath ="c:/results.csv";
// first create file object for file placed at location
// specified by filepath
File file = new File(filePath);
try {
// create FileWriter object with file as parameter
FileWriter outputfile = new FileWriter(file);
// create CSVWriter object filewriter object as parameter
CSVWriter writer = new CSVWriter(outputfile);
String[] header = { "Name", "Phone", "Street","State","City","Zipcode" };
Elements infobox = doc.select(".info");
List<String> infoArray = new ArrayList<>();
for(int i = 0; i < infobox.size(); i++){
infobox.get(i).select(".business-name > span");
infoArray.add(infobox.get(i).select(".business-name > span").text());
infoArray.add(infobox.get(i).select(".phones.phone.primary").text());
infoArray.add(infobox.get(i).select(".street-address").text());
infoArray.add(infobox.get(i).select(".state").text());
infoArray.add(infobox.get(i).select(".city").text());
infoArray.add(infobox.get(i).select(".zip").text());
}
writer.writeNext(header);
//How to write data in order to match each record accordingly?
//Data should be written to CSV like the following example under each header into each corrosponding row
//name, phone, street
writer.writeAll((Iterable<String[]>) infoArray);
for(String ia : infoArray){
}
// closing writer connection
writer.close();
}
catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
这是最终对我有用的东西。问题不在于将字符串添加到字符串数组中以传递给 CSVWriter。这是我的例子。
try {
String[] header = { "Name", "Phone", "Street","State","City","Zipcode" };
Elements infobox = doc.select(".info");
if(count == 0){
writer.writeNext(header);
}
for(int i = 0; i < infobox.size(); i++){
infobox.get(i).select(".business-name > span");
String businessName = infobox.get(i).select(".business-name > span").text();
String phone = infobox.get(i).select(".phones.phone.primary").text();
String address = infobox.get(i).select(".street-address").text();
//Address seems to be displayed another way too
String address2 = infobox.get(i).select(".adr").text();
//Use regular expression to normalize data
String[] columns = new String[]{
businessName, phone, address
};
writer.writeNext(columns);
}
writer.close();
}
这里是如何使用 OpenCSV 的小例子。也许会对你有所帮助。
HeaderNames.java
public class HeaderNames
{
public static final String NAME = "Name";
public static final String PHONE = "Phone";
public static final String STREET = "Street";
public static final String STATE = "State";
public static final String CITY = "City";
public static final String ZIPCODE = "Zipcode";
}
DemoDTO.java
import java.io.Serializable;
import com.opencsv.bean.CsvBindByName;
public class DemoDTO implements Serializable
{
private static final long serialVersionUID = 1L;
@CsvBindByName(column = HeaderNames.NAME)
private String name;
@CsvBindByName(column = HeaderNames.PHONE)
private String phone;
@CsvBindByName(column = HeaderNames.STREET)
private String street;
@CsvBindByName(column = HeaderNames.STATE)
private String state;
@CsvBindByName(column = HeaderNames.CITY)
private String city;
@CsvBindByName(column = HeaderNames.ZIPCODE)
private String zipcode;
public String getName()
{
return name;
}
public void setName(String name)
{
this.name = name;
}
public String getPhone()
{
return phone;
}
public void setPhone(String phone)
{
this.phone = phone;
}
public String getStreet()
{
return street;
}
public void setStreet(String street)
{
this.street = street;
}
public String getState()
{
return state;
}
public void setState(String state)
{
this.state = state;
}
public String getCity()
{
return city;
}
public void setCity(String city)
{
this.city = city;
}
public String getZipcode()
{
return zipcode;
}
public void setZipcode(String zipcode)
{
this.zipcode = zipcode;
}
}
Main.java
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import com.opencsv.CSVWriter;
import com.opencsv.bean.HeaderColumnNameMappingStrategy;
import com.opencsv.bean.StatefulBeanToCsv;
import com.opencsv.bean.StatefulBeanToCsvBuilder;
import com.opencsv.exceptions.CsvDataTypeMismatchException;
import com.opencsv.exceptions.CsvRequiredFieldEmptyException;
public class Main
{
public static void main(String[] args) throws IOException, CsvDataTypeMismatchException, CsvRequiredFieldEmptyException
{
File file = new File(System.getProperty("user.dir") + System.getProperty("file.separator") + "results.csv");
FileWriter writer = new FileWriter(file);
List<DemoDTO> beans = new ArrayList<DemoDTO>();
for (int i = 0; i < 10; i++)
{
DemoDTO demoDTO = new DemoDTO();
demoDTO.setCity("city " + i);
demoDTO.setName("name " + i);
demoDTO.setPhone("phone " + i);
demoDTO.setState("state " + i);
demoDTO.setStreet("street " + i);
demoDTO.setZipcode("zipcode " + i);
beans.add(demoDTO);
}
HeaderColumnNameMappingStrategy<DemoDTO> strategy = new HeaderColumnNameMappingStrategy<>();
strategy.setType(DemoDTO.class);
StatefulBeanToCsv<DemoDTO> beanToCsv = new StatefulBeanToCsvBuilder<DemoDTO>(writer)
.withSeparator(';')
.withEscapechar(CSVWriter.NO_ESCAPE_CHARACTER)
.withLineEnd(CSVWriter.DEFAULT_LINE_END)
.withQuotechar(CSVWriter.DEFAULT_QUOTE_CHARACTER)
.withMappingStrategy(strategy)
.withThrowExceptions(true)
.build();
beanToCsv.write(beans);
writer.flush();
writer.close();
}
}