如何使用 ExcelDataReader 从文件中删除空单元格和行?
How to remove empty cells and rows from file using ExcelDataReader?
我正在使用 ExcelDataReader 读取 excel 文件,但正在处理的文件有 16k 列和 1200 条记录。在只有 52 列之间填充其他列是空的,所以我想删除带有行的空列。
尝试过:
var dataset = reader.AsDataSet(new ExcelDataSetConfiguration() {
ConfigureDataTable = _ => new ExcelDataTableConfiguration() {
FilterRow = rowReader => rowReader.Depth != 0
}
});
将不胜感激谢谢
据我所知,没有任何简单的方法可以过滤空列或行,我最好的尝试是:
// use ExcelDataTableConfiguration.FilterRow to filter empty rows
FilterRow = rowReader =>
{
var hasData = false;
for (var i = 0; i < rowReader.FieldCount; i++)
{
if (rowReader[i] == null || string.IsNullOrEmpty(rowReader[i].ToString()))
{
continue;
}
hasData = true;
break;
}
return hasData;
},
// use ExcelDataTableConfiguration.FilterColumn to filter empty columns
FilterColumn = (rowReader, colIndex) =>
{
var hasData = false;
rowReader.Reset();
// this will skip first row as it is name of column
rowReader.Read();
while (rowReader.Read())
{
if (rowReader[colIndex] == null ||
string.IsNullOrEmpty(rowReader[colIndex].ToString()))
{
continue;
}
hasData = true;
break;
}
// below codes do a trick!
rowReader.Reset();
rowReader.Read();
return hasData;
}
在 FilterRow
或 FilterColumn
中使用 IExcelDataReader
参数是邪恶的!
我认为在 main reader
和每个 rowReader
中都引用了一个对象!所以你应该始终知道如何使用它们,正如你所看到的,我添加了一个技巧来使 reader 在使用后准备就绪。 - HTH ;)
在 ShA.t's answer 的启发下,我能够使 header 行和列准确读取它们在 excel 文档中的开始位置。您只需要过滤空列,只要您知道行的起始位置即可。这是示例代码:
public class TrimmedTableSample
{
#region Properties
int HeaderRowIndex { get; set; }
#endregion
#region Methods
public void Read(string documentPath)
{
using (var stream = File.Open(documentPath, FileMode.Open, FileAccess.Read))
using (var reader = ExcelReaderFactory.CreateReader(stream))
{
var dataSet = reader.AsDataSet(new ExcelDataSetConfiguration()
{
UseColumnDataType = true,
ConfigureDataTable = (tableReader) => new ExcelDataTableConfiguration()
{
EmptyColumnNamePrefix = "Column ",
UseHeaderRow = true,
ReadHeaderRow = (reader) =>
{
bool empty = true;
HeaderRowIndex = 0;
while (empty)
{
for (var i = 0; i < reader.FieldCount && empty; i++)
empty = string.IsNullOrWhiteSpace(reader.GetString(i));
if (empty)
{
empty = reader.Read(); // Only continue if more content is available
HeaderRowIndex++; // Keep track of the first row position.
}
}
},
FilterColumn = (reader, index) =>
{
bool empty = false;
string sheet = reader.Name;
// Start reading the table from the beginning
reader.Reset();
// EDIT: Head over the our current excel sheet
while (reader.Name != sheet)
if (!reader.NextResult())
break;
// Head to the first row with content
int rowIndex = 0;
while (rowIndex < HeaderRowIndex)
{
reader.Read();
rowIndex++;
}
while (reader.Read())
{
// Decide if the current column is empty
if (reader[index] == null || string.IsNullOrEmpty(reader[index].ToString()))
continue;
empty = true;
break;
}
// Start over again (This allows the reader to automatically read the rest of the content itself)
reader.Reset();
// EDIT: Head over the our current excel sheet
while (reader.Name != sheet)
if (!reader.NextResult())
break;
reader.Read();
// Head over to the first row with content
rowIndex = 0;
while (rowIndex < HeaderRowIndex)
{
reader.Read();
rowIndex++;
}
// Return info on whether this column should be ignored or not.
return empty;
}
}
});
}
}
#endregion
}
祝你好运!
我通过调用 DataRow.Delete() 方法来使用此解决方案:
foreach (var row in rows)
{
var emptyColumnsCount = 0;
for (var i = 0; i < columns.Count; i++)
{
if (!row.IsNull(columns[i]))
{
// Do some logic to read row.
}
else
{
emptColumnsCount++;
}
}
if (emptyColumnsCount == columns.Count)
row.Delete();
else
// Do some logic to use row
}
我正在使用 ExcelDataReader 读取 excel 文件,但正在处理的文件有 16k 列和 1200 条记录。在只有 52 列之间填充其他列是空的,所以我想删除带有行的空列。
尝试过:
var dataset = reader.AsDataSet(new ExcelDataSetConfiguration() {
ConfigureDataTable = _ => new ExcelDataTableConfiguration() {
FilterRow = rowReader => rowReader.Depth != 0
}
});
将不胜感激谢谢
据我所知,没有任何简单的方法可以过滤空列或行,我最好的尝试是:
// use ExcelDataTableConfiguration.FilterRow to filter empty rows
FilterRow = rowReader =>
{
var hasData = false;
for (var i = 0; i < rowReader.FieldCount; i++)
{
if (rowReader[i] == null || string.IsNullOrEmpty(rowReader[i].ToString()))
{
continue;
}
hasData = true;
break;
}
return hasData;
},
// use ExcelDataTableConfiguration.FilterColumn to filter empty columns
FilterColumn = (rowReader, colIndex) =>
{
var hasData = false;
rowReader.Reset();
// this will skip first row as it is name of column
rowReader.Read();
while (rowReader.Read())
{
if (rowReader[colIndex] == null ||
string.IsNullOrEmpty(rowReader[colIndex].ToString()))
{
continue;
}
hasData = true;
break;
}
// below codes do a trick!
rowReader.Reset();
rowReader.Read();
return hasData;
}
在 FilterRow
或 FilterColumn
中使用 IExcelDataReader
参数是邪恶的!
我认为在 main reader
和每个 rowReader
中都引用了一个对象!所以你应该始终知道如何使用它们,正如你所看到的,我添加了一个技巧来使 reader 在使用后准备就绪。 - HTH ;)
在 ShA.t's answer 的启发下,我能够使 header 行和列准确读取它们在 excel 文档中的开始位置。您只需要过滤空列,只要您知道行的起始位置即可。这是示例代码:
public class TrimmedTableSample
{
#region Properties
int HeaderRowIndex { get; set; }
#endregion
#region Methods
public void Read(string documentPath)
{
using (var stream = File.Open(documentPath, FileMode.Open, FileAccess.Read))
using (var reader = ExcelReaderFactory.CreateReader(stream))
{
var dataSet = reader.AsDataSet(new ExcelDataSetConfiguration()
{
UseColumnDataType = true,
ConfigureDataTable = (tableReader) => new ExcelDataTableConfiguration()
{
EmptyColumnNamePrefix = "Column ",
UseHeaderRow = true,
ReadHeaderRow = (reader) =>
{
bool empty = true;
HeaderRowIndex = 0;
while (empty)
{
for (var i = 0; i < reader.FieldCount && empty; i++)
empty = string.IsNullOrWhiteSpace(reader.GetString(i));
if (empty)
{
empty = reader.Read(); // Only continue if more content is available
HeaderRowIndex++; // Keep track of the first row position.
}
}
},
FilterColumn = (reader, index) =>
{
bool empty = false;
string sheet = reader.Name;
// Start reading the table from the beginning
reader.Reset();
// EDIT: Head over the our current excel sheet
while (reader.Name != sheet)
if (!reader.NextResult())
break;
// Head to the first row with content
int rowIndex = 0;
while (rowIndex < HeaderRowIndex)
{
reader.Read();
rowIndex++;
}
while (reader.Read())
{
// Decide if the current column is empty
if (reader[index] == null || string.IsNullOrEmpty(reader[index].ToString()))
continue;
empty = true;
break;
}
// Start over again (This allows the reader to automatically read the rest of the content itself)
reader.Reset();
// EDIT: Head over the our current excel sheet
while (reader.Name != sheet)
if (!reader.NextResult())
break;
reader.Read();
// Head over to the first row with content
rowIndex = 0;
while (rowIndex < HeaderRowIndex)
{
reader.Read();
rowIndex++;
}
// Return info on whether this column should be ignored or not.
return empty;
}
}
});
}
}
#endregion
}
祝你好运!
我通过调用 DataRow.Delete() 方法来使用此解决方案:
foreach (var row in rows)
{
var emptyColumnsCount = 0;
for (var i = 0; i < columns.Count; i++)
{
if (!row.IsNull(columns[i]))
{
// Do some logic to read row.
}
else
{
emptColumnsCount++;
}
}
if (emptyColumnsCount == columns.Count)
row.Delete();
else
// Do some logic to use row
}