JSoup HTML 按顺序解析结果并将结果写入 CSV

JSoup HTML Parse and Write Results to CSV In Order

我正在尝试找到将 Jsoup 时从 HTML 文档中解析出的数据保存到 CSV 中的最佳方法。我遇到的问题是使用 [CSVWriter][1] - https://mvnrepository.com/artifact/com.opencsv/opencsv/4.6 并用它写入数据。请在下面查看我的代码片段。数据结构如下所示,信息框是主要列表记录,其中包含每个后续字段。 CSVWriter 看起来像是一个字符串数组,但在使用字符串数组从元素写入 CSVData 编写器时遇到了问题。

Jsoup 选择器正在返回所选项目的数组。例如,当我选择姓名时,如果页面上有 9 条记录,它将返回所有 9 个姓名。我需要将这些数据放在一起,以便将每一行打印成 CSV。

信息框 > 名称| 电邮| Phone| 网站

我遇到的问题是我如何尝试在下面的这一行中写入数据

writer.writeAll((Iterable<String[]>) infoArray);

这无法正常工作并出现错误,但想展示我想要的东西,以及是否有人熟悉将数据从 Jsoup Elements 写入 CSV。谢谢

String filePath ="c:/results.csv";
                // first create file object for file placed at location
                // specified by filepath
                File file = new File(filePath);
                try {
                    // create FileWriter object with file as parameter
                    FileWriter outputfile = new FileWriter(file);

                    // create CSVWriter object filewriter object as parameter
                    CSVWriter writer = new CSVWriter(outputfile);

                    String[] header = { "Name", "Phone", "Street","State","City","Zipcode" };
                    Elements infobox = doc.select(".info");
                    List<String> infoArray = new ArrayList<>();

                    for(int i = 0; i < infobox.size(); i++){

                        infobox.get(i).select(".business-name > span");

                        infoArray.add(infobox.get(i).select(".business-name > span").text());
                        infoArray.add(infobox.get(i).select(".phones.phone.primary").text());
                        infoArray.add(infobox.get(i).select(".street-address").text());
                        infoArray.add(infobox.get(i).select(".state").text());
                        infoArray.add(infobox.get(i).select(".city").text());
                        infoArray.add(infobox.get(i).select(".zip").text());


                    }


                    writer.writeNext(header);
                    //How to write data in order to match each record accordingly?
                    //Data should be written to CSV like the following example under each header into each corrosponding row
                    //name, phone, street
                    writer.writeAll((Iterable<String[]>) infoArray);
                    for(String ia : infoArray){


                    }

                    // closing writer connection
                    writer.close();
                }
                catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }

这是最终对我有用的东西。问题不在于将字符串添加到字符串数组中以传递给 CSVWriter。这是我的例子。

   try {


                    String[] header = { "Name", "Phone", "Street","State","City","Zipcode" };
                    Elements infobox = doc.select(".info");

                    if(count == 0){

                        writer.writeNext(header);
                    }

                    for(int i = 0; i < infobox.size(); i++){



                        infobox.get(i).select(".business-name > span");

                        String businessName = infobox.get(i).select(".business-name > span").text();
                        String phone = infobox.get(i).select(".phones.phone.primary").text();
                        String address = infobox.get(i).select(".street-address").text();
                        //Address seems to be displayed another way too
                        String address2 = infobox.get(i).select(".adr").text();
                        //Use regular expression to normalize data


                        String[] columns = new String[]{
                                businessName, phone, address
                        };

                        writer.writeNext(columns);


                    }

                    writer.close();
                }

这里是如何使用 OpenCSV 的小例子。也许会对你有所帮助。

HeaderNames.java

public class HeaderNames
{

    public static final String NAME = "Name";
    public static final String PHONE = "Phone";
    public static final String STREET = "Street";
    public static final String STATE = "State";
    public static final String CITY = "City";
    public static final String ZIPCODE = "Zipcode";
}

DemoDTO.java

import java.io.Serializable;
import com.opencsv.bean.CsvBindByName;


public class DemoDTO implements Serializable
{

    private static final long serialVersionUID = 1L;

    @CsvBindByName(column = HeaderNames.NAME)
    private String name;

    @CsvBindByName(column = HeaderNames.PHONE)
    private String phone;

    @CsvBindByName(column = HeaderNames.STREET)
    private String street;

    @CsvBindByName(column = HeaderNames.STATE)
    private String state;

    @CsvBindByName(column = HeaderNames.CITY)
    private String city;

    @CsvBindByName(column = HeaderNames.ZIPCODE)
    private String zipcode;

    public String getName()
    {

    return name;
    }


    public void setName(String name)
    {

    this.name = name;
    }


    public String getPhone()
    {

    return phone;
    }


    public void setPhone(String phone)
    {

    this.phone = phone;
    }


    public String getStreet()
    {

    return street;
    }


    public void setStreet(String street)
    {

    this.street = street;
    }


    public String getState()
    {

    return state;
    }


    public void setState(String state)
    {

    this.state = state;
    }


    public String getCity()
    {

    return city;
    }


    public void setCity(String city)
    {

    this.city = city;
    }


    public String getZipcode()
    {

    return zipcode;
    }


    public void setZipcode(String zipcode)
    {

    this.zipcode = zipcode;
    }

}

Main.java

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import com.opencsv.CSVWriter;
import com.opencsv.bean.HeaderColumnNameMappingStrategy;
import com.opencsv.bean.StatefulBeanToCsv;
import com.opencsv.bean.StatefulBeanToCsvBuilder;
import com.opencsv.exceptions.CsvDataTypeMismatchException;
import com.opencsv.exceptions.CsvRequiredFieldEmptyException;


public class Main
{

    public static void main(String[] args) throws IOException, CsvDataTypeMismatchException, CsvRequiredFieldEmptyException
    {

    File file = new File(System.getProperty("user.dir") + System.getProperty("file.separator") + "results.csv");

    FileWriter writer = new FileWriter(file);

    List<DemoDTO> beans = new ArrayList<DemoDTO>();

    for (int i = 0; i < 10; i++)
    {
        DemoDTO demoDTO = new DemoDTO();

        demoDTO.setCity("city " + i);
        demoDTO.setName("name " + i);
        demoDTO.setPhone("phone " + i);
        demoDTO.setState("state " + i);
        demoDTO.setStreet("street " + i);
        demoDTO.setZipcode("zipcode " + i);

        beans.add(demoDTO);
    }

    HeaderColumnNameMappingStrategy<DemoDTO> strategy = new HeaderColumnNameMappingStrategy<>();
    strategy.setType(DemoDTO.class);

    StatefulBeanToCsv<DemoDTO> beanToCsv = new StatefulBeanToCsvBuilder<DemoDTO>(writer)
        .withSeparator(';')
        .withEscapechar(CSVWriter.NO_ESCAPE_CHARACTER)
        .withLineEnd(CSVWriter.DEFAULT_LINE_END)
        .withQuotechar(CSVWriter.DEFAULT_QUOTE_CHARACTER)
        .withMappingStrategy(strategy)
        .withThrowExceptions(true)
        .build();

    beanToCsv.write(beans);

    writer.flush();
    writer.close();
    }

}