如何从 CSV 文件中获取特定数据
How to get specific data from a CSV file
我有一个非常大的 CSV 文件,我已经设法使用 Scanner 将所有这些放入 ArrayList
Path filepath = Paths.get("./data.csv");
try{
Scanner InputStream = new Scanner(filepath);
while (InputStream.hasNext()){
wholefile.add(String.valueOf(InputStream.next()));
} InputStream.close();
System.out.println(wholefile);
} catch (IOException e) {
e.printStackTrace();
}
}
我的数组如下所示:
wholefile = [id,属性, 地址,first_name,last_name,email,Owner, contact, address,Price,Date, sold, 1,94032, Mockingbird, Alley,Brander,Verillo,bverillo0@sogou.com,435587.57,2,293, Haas, Lane,Maxy,Reynalds......]
这里是 excel 中 csv 文件的截图
https://plus.google.com/photos/photo/115135191238195349859/6559552907258825106?authkey=CIu-hovf5pj29gE
我想用这些数据做一些事情,但我不知道我需要写什么方法:
- 通过 ID
获取一条 属性 记录
- 获取 n 个价格最高的房产列表
- 一个月的总销售额。
任何帮助或指导将不胜感激,我不确定我是否以正确的方式解决这个问题
https://plus.google.com/photos/photo/115135191238195349859/6559637333893665186
使用字符串 ArrayList 在执行您想要的操作时性能会很差。
首先创建一个匹配您的 CVS Header 的 Object。然后在读取文件时开始添加到您创建的 Object 的 ArrayList,并且为了排序、搜索和总销售额,只需在 ArrayList 上创建一个流。
不要浪费时间重新发明轮子。
我建议使用 Apache Commons CSV 库 来操作 .csv 文件。
你可以找到官方文档here.
还有一些例子here.
我不得不推出一个自定义 CSV 解析器来证明我们正在尝试做的一些概念,我认为你可以在这里重新调整它的用途:
CSVReader.java
public class CSVReader implements Iterable<CSVRow> {
private List<String> _data;
private int _itPos = 0;
private int _skip = 0;
private FileIterator _it;
private boolean _hasTrailingComma = false;
public CSVReader(Path path, boolean hasTrailingComma) throws IOException {
this(Files.readAllLines(path), hasTrailingComma);
}
public CSVReader(Path path) throws IOException {
this(path, false);
}
public CSVReader(List<String> data, boolean hasTrailingComma) {
_data = data;
_it = new FileIterator();
_hasTrailingComma = hasTrailingComma;
}
public CSVReader(List<String> data) {
this(data, false);
}
public CSVRow getHeaders() {
return new CSVRow(_data.get(0), _hasTrailingComma);
}
public void skip(int rows) {
_skip = rows;
}
@Override
public Iterator<CSVRow> iterator() {
_itPos = _skip;
return _it;
}
private class FileIterator implements Iterator<CSVRow> {
@Override
public boolean hasNext() {
return _itPos < _data.size();
}
@Override
public CSVRow next() {
if (_itPos == _data.size()) {
throw new NoSuchElementException();
}
return new CSVRow(_data.get(_itPos++), _hasTrailingComma);
}
}
}
CSVRow.java
public class CSVRow implements Iterable<String> {
private String[] _data;
private int _itPos = 0;
private int _skip = 0;
private RowIterator _it = null;
private int _actualLength = 0;
public CSVRow(String row, boolean trailingComma) {
// Minor hack
// in case the data doesn't end in commas
// we check for the last character and add
// a comma. Ideally, the input file should be fixed;
if(trailingComma && !row.endsWith(",")) {
row += ",";
}
_data = row.split("\s*,\s*", -1);
_actualLength = trailingComma ? _data.length - 1 : _data.length;
_it = new RowIterator();
}
public CSVRow(String row) {
this(row, false);
}
public void skip(int cells) {
_skip = cells;
}
@Override
public Iterator<String> iterator() {
_itPos = _skip;
return _it;
}
public String[] toArray() {
return Arrays.copyOf(_data, _actualLength);
}
private class RowIterator implements Iterator<String> {
@Override
public boolean hasNext() {
return _itPos < _actualLength;
}
@Override
public String next() {
if (_itPos == _actualLength) {
throw new NoSuchElementException();
}
return _data[_itPos++];
}
}
}
用法
public static void main(String[] args) {
Path filepath = Paths.get("./data.csv");
CSVReader reader = new CSVReader(filepath);
for (CSVRow row : reader) {
for (String str : row) {
System.out.printf("%s ", str);
}
System.out.println();
}
}
现在将每一行建模为一个对象将会很有用,这样您就可以在 Java 中使用它来做一些事情。您可以定义一个 class Property
来模拟每一行
public class Property {
private int id;
private String address;
private String firstName;
private String lastName;
private String email;
private String ownerContactAddress;
private BigDecimal price;
private java.sql.Date dateSold;
public Property() {
}
// Setters and getters
public long getId() {
return this.id;
}
public void setId(String id) {
this.id = Long.parseLong(id);
}
public String getAddress() {
return this.address;
}
public void setAddress(String address) {
this.address = address;
}
// TODO: setter/getters for firstName, lastName, email, ownerContactAddress
public BigDecimal getPrice() {
return this.price;
}
public void setPrice(String price, Locale locale) throws ParseException {
NumberFormat format = NumberFormat.getNumberInstance(locale);
if (format instanceof DecimalFormat) {
((DecimalFormat) format).setParseBigDecimal(true);
}
this.price = (BigDecimal) format.parse(amount.replaceAll("[^\d.,]",""));
}
public java.sql.Date getDateSold() {
return this.dateSold;
}
public void setDateSold(String date, String format) throws ParseException {
SimpleDateFormat sdf = new SimpleDateFormat(format);
this.dateSold = new Date(sdf.parse(date).getTime());
}
}
将所有东西放在一起(未测试)
public static void main(String[] args) {
// Collection to store properties
// You could also write a class to wrap this
// map along with the methods you need to implement
// Say PropertyTable {
// private Map<Long, Property> properties ...
// Property getPropertyById(long id);
// getHighestPriced() // sort the map by price
// }
Map<Long, Property> properties = new HashMap<>();
Path filepath = Paths.get("./data.csv");
CSVReader reader = new CSVReader(filepath);
for (CSVRow row : reader) {
Iterator<String> it = row.iterator();
Property p = new Property();
p.setId(it.next());
p.setAddress(it.next());
// ... set the remaining properties
p.setPrice(it.next(), new Locale("en", "GB"));
p.seDateSold(it.next(), "MM/dd/yyyy");
properties.put(p.getId(), p);
}
// At this point, you should have all the properties read
// let's try to get property with id 5
Property prop = properties.get(5L);
}
希望对您有所帮助。
我有一个非常大的 CSV 文件,我已经设法使用 Scanner 将所有这些放入 ArrayList
Path filepath = Paths.get("./data.csv");
try{
Scanner InputStream = new Scanner(filepath);
while (InputStream.hasNext()){
wholefile.add(String.valueOf(InputStream.next()));
} InputStream.close();
System.out.println(wholefile);
} catch (IOException e) {
e.printStackTrace();
}
}
我的数组如下所示:
wholefile = [id,属性, 地址,first_name,last_name,email,Owner, contact, address,Price,Date, sold, 1,94032, Mockingbird, Alley,Brander,Verillo,bverillo0@sogou.com,435587.57,2,293, Haas, Lane,Maxy,Reynalds......]
这里是 excel 中 csv 文件的截图 https://plus.google.com/photos/photo/115135191238195349859/6559552907258825106?authkey=CIu-hovf5pj29gE
我想用这些数据做一些事情,但我不知道我需要写什么方法:
- 通过 ID 获取一条 属性 记录
- 获取 n 个价格最高的房产列表
- 一个月的总销售额。
任何帮助或指导将不胜感激,我不确定我是否以正确的方式解决这个问题
https://plus.google.com/photos/photo/115135191238195349859/6559637333893665186
使用字符串 ArrayList 在执行您想要的操作时性能会很差。 首先创建一个匹配您的 CVS Header 的 Object。然后在读取文件时开始添加到您创建的 Object 的 ArrayList,并且为了排序、搜索和总销售额,只需在 ArrayList 上创建一个流。
不要浪费时间重新发明轮子。
我建议使用 Apache Commons CSV 库 来操作 .csv 文件。
你可以找到官方文档here.
还有一些例子here.
我不得不推出一个自定义 CSV 解析器来证明我们正在尝试做的一些概念,我认为你可以在这里重新调整它的用途:
CSVReader.java
public class CSVReader implements Iterable<CSVRow> {
private List<String> _data;
private int _itPos = 0;
private int _skip = 0;
private FileIterator _it;
private boolean _hasTrailingComma = false;
public CSVReader(Path path, boolean hasTrailingComma) throws IOException {
this(Files.readAllLines(path), hasTrailingComma);
}
public CSVReader(Path path) throws IOException {
this(path, false);
}
public CSVReader(List<String> data, boolean hasTrailingComma) {
_data = data;
_it = new FileIterator();
_hasTrailingComma = hasTrailingComma;
}
public CSVReader(List<String> data) {
this(data, false);
}
public CSVRow getHeaders() {
return new CSVRow(_data.get(0), _hasTrailingComma);
}
public void skip(int rows) {
_skip = rows;
}
@Override
public Iterator<CSVRow> iterator() {
_itPos = _skip;
return _it;
}
private class FileIterator implements Iterator<CSVRow> {
@Override
public boolean hasNext() {
return _itPos < _data.size();
}
@Override
public CSVRow next() {
if (_itPos == _data.size()) {
throw new NoSuchElementException();
}
return new CSVRow(_data.get(_itPos++), _hasTrailingComma);
}
}
}
CSVRow.java
public class CSVRow implements Iterable<String> {
private String[] _data;
private int _itPos = 0;
private int _skip = 0;
private RowIterator _it = null;
private int _actualLength = 0;
public CSVRow(String row, boolean trailingComma) {
// Minor hack
// in case the data doesn't end in commas
// we check for the last character and add
// a comma. Ideally, the input file should be fixed;
if(trailingComma && !row.endsWith(",")) {
row += ",";
}
_data = row.split("\s*,\s*", -1);
_actualLength = trailingComma ? _data.length - 1 : _data.length;
_it = new RowIterator();
}
public CSVRow(String row) {
this(row, false);
}
public void skip(int cells) {
_skip = cells;
}
@Override
public Iterator<String> iterator() {
_itPos = _skip;
return _it;
}
public String[] toArray() {
return Arrays.copyOf(_data, _actualLength);
}
private class RowIterator implements Iterator<String> {
@Override
public boolean hasNext() {
return _itPos < _actualLength;
}
@Override
public String next() {
if (_itPos == _actualLength) {
throw new NoSuchElementException();
}
return _data[_itPos++];
}
}
}
用法
public static void main(String[] args) {
Path filepath = Paths.get("./data.csv");
CSVReader reader = new CSVReader(filepath);
for (CSVRow row : reader) {
for (String str : row) {
System.out.printf("%s ", str);
}
System.out.println();
}
}
现在将每一行建模为一个对象将会很有用,这样您就可以在 Java 中使用它来做一些事情。您可以定义一个 class Property
来模拟每一行
public class Property {
private int id;
private String address;
private String firstName;
private String lastName;
private String email;
private String ownerContactAddress;
private BigDecimal price;
private java.sql.Date dateSold;
public Property() {
}
// Setters and getters
public long getId() {
return this.id;
}
public void setId(String id) {
this.id = Long.parseLong(id);
}
public String getAddress() {
return this.address;
}
public void setAddress(String address) {
this.address = address;
}
// TODO: setter/getters for firstName, lastName, email, ownerContactAddress
public BigDecimal getPrice() {
return this.price;
}
public void setPrice(String price, Locale locale) throws ParseException {
NumberFormat format = NumberFormat.getNumberInstance(locale);
if (format instanceof DecimalFormat) {
((DecimalFormat) format).setParseBigDecimal(true);
}
this.price = (BigDecimal) format.parse(amount.replaceAll("[^\d.,]",""));
}
public java.sql.Date getDateSold() {
return this.dateSold;
}
public void setDateSold(String date, String format) throws ParseException {
SimpleDateFormat sdf = new SimpleDateFormat(format);
this.dateSold = new Date(sdf.parse(date).getTime());
}
}
将所有东西放在一起(未测试)
public static void main(String[] args) {
// Collection to store properties
// You could also write a class to wrap this
// map along with the methods you need to implement
// Say PropertyTable {
// private Map<Long, Property> properties ...
// Property getPropertyById(long id);
// getHighestPriced() // sort the map by price
// }
Map<Long, Property> properties = new HashMap<>();
Path filepath = Paths.get("./data.csv");
CSVReader reader = new CSVReader(filepath);
for (CSVRow row : reader) {
Iterator<String> it = row.iterator();
Property p = new Property();
p.setId(it.next());
p.setAddress(it.next());
// ... set the remaining properties
p.setPrice(it.next(), new Locale("en", "GB"));
p.seDateSold(it.next(), "MM/dd/yyyy");
properties.put(p.getId(), p);
}
// At this point, you should have all the properties read
// let's try to get property with id 5
Property prop = properties.get(5L);
}
希望对您有所帮助。