使用 OpenXML 读取 Excel 个文件
Reading Excel files using OpenXML
我正在尝试通过 OpenXML 读取 Excel 文件并希望输出为 CSV。目前,它读取单独一行(由于 writeline)或单行(使用 write 时)中的每个单元格。以 Excel 中的表格格式读取和输出的最佳方式是什么?我可以利用 OpenXML 中的内置功能吗?
static void Main(string[] args)
{
String xlDocName = @"C:\Users\xlp111\source.xlsx";
using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(xlDocName, false))
{
WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart;
string cellValue = string.Empty;
foreach(WorksheetPart worksheetPart in workbookPart.WorksheetParts)
{
OpenXmlReader reader = OpenXmlReader.Create(worksheetPart);
while (reader.Read())
{
if (reader.ElementType == typeof(Row))
{
reader.ReadFirstChild();
do
{
if (reader.ElementType == typeof(Cell))
{
Cell c = (Cell)reader.LoadCurrentElement();
if (c.DataType != null && c.DataType == CellValues.SharedString)
{
SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText));
cellValue = ssi.Text.Text;
Console.WriteLine(cellValue);
}
}
}
while (reader.ReadNextSibling());
}
}
}
Console.ReadLine();
}
}
}
编辑:
使用 Open XML Microsoft Office SDK
从 :https://www.microsoft.com/en-eg/download/details.aspx?id=5124&wa=wsignin1.0
安装 V2
(或V2.5)
以下 class 将 excel sheet 转换为带分隔符的 CSV 文件
//reference library
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
public class OpenXmlExcel
{
public void ExcelToCsv(string source, string target, string delimiter = ";", bool firstRowIsHeade = true)
{
var dt = ReadExcelSheet(source, firstRowIsHeade);
DatatableToCsv(dt, target, delimiter);
}
private void DatatableToCsv(DataTable dt, string fname, string delimiter = ";")
{
using (StreamWriter writer = new StreamWriter(fname))
{
foreach (DataRow row in dt.AsEnumerable())
{
writer.WriteLine(string.Join(delimiter, row.ItemArray.Select(x => x.ToString())) + delimiter);
}
}
}
List<string> Headers = new List<string>();
private DataTable ReadExcelSheet(string fname, bool firstRowIsHeade)
{
DataTable dt = new DataTable();
using (SpreadsheetDocument doc = SpreadsheetDocument.Open(fname, false))
{
//Read the first Sheets
Sheet sheet = doc.WorkbookPart.Workbook.Sheets.GetFirstChild<Sheet>();
Worksheet worksheet = (doc.WorkbookPart.GetPartById(sheet.Id.Value) as WorksheetPart).Worksheet;
IEnumerable<Row> rows = worksheet.GetFirstChild<SheetData>().Descendants<Row>();
foreach (Row row in rows)
{
//Read the first row as header
if (row.RowIndex.Value == 1)
{
var j = 1;
foreach (Cell cell in row.Descendants<Cell>())
{
var colunmName = firstRowIsHeade ? GetCellValue(doc, cell) : "Field" + j++;
Console.WriteLine(colunmName);
Headers.Add(colunmName);
dt.Columns.Add(colunmName);
}
}
else
{
dt.Rows.Add();
int i = 0;
foreach (Cell cell in row.Descendants<Cell>())
{
dt.Rows[dt.Rows.Count - 1][i] = GetCellValue(doc, cell);
i++;
}
}
}
}
return dt;
}
private string GetCellValue(SpreadsheetDocument doc, Cell cell)
{
string value = cell.CellValue.InnerText;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return doc.WorkbookPart.SharedStringTablePart.SharedStringTable.ChildElements.GetItem(int.Parse(value)).InnerText;
}
return value;
}
}
使用方法:
new OpenXmlExcel().ExcelToCsv("f1.xlsx","f1.csv",";",true);
or
//use default: separator=";" ,first row is header
new OpenXmlExcel().ExcelToCsv("f1.xlsx","f1.csv");
我正在尝试通过 OpenXML 读取 Excel 文件并希望输出为 CSV。目前,它读取单独一行(由于 writeline)或单行(使用 write 时)中的每个单元格。以 Excel 中的表格格式读取和输出的最佳方式是什么?我可以利用 OpenXML 中的内置功能吗?
static void Main(string[] args) { String xlDocName = @"C:\Users\xlp111\source.xlsx"; using (SpreadsheetDocument spreadsheetDocument = SpreadsheetDocument.Open(xlDocName, false)) { WorkbookPart workbookPart = spreadsheetDocument.WorkbookPart; string cellValue = string.Empty; foreach(WorksheetPart worksheetPart in workbookPart.WorksheetParts) { OpenXmlReader reader = OpenXmlReader.Create(worksheetPart); while (reader.Read()) { if (reader.ElementType == typeof(Row)) { reader.ReadFirstChild(); do { if (reader.ElementType == typeof(Cell)) { Cell c = (Cell)reader.LoadCurrentElement(); if (c.DataType != null && c.DataType == CellValues.SharedString) { SharedStringItem ssi = workbookPart.SharedStringTablePart.SharedStringTable.Elements<SharedStringItem>().ElementAt(int.Parse(c.CellValue.InnerText)); cellValue = ssi.Text.Text; Console.WriteLine(cellValue); } } } while (reader.ReadNextSibling()); } } } Console.ReadLine(); } } }
编辑:
使用 Open XML Microsoft Office SDK
从 :https://www.microsoft.com/en-eg/download/details.aspx?id=5124&wa=wsignin1.0
安装 V2(或V2.5)
以下 class 将 excel sheet 转换为带分隔符的 CSV 文件
//reference library
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
public class OpenXmlExcel
{
public void ExcelToCsv(string source, string target, string delimiter = ";", bool firstRowIsHeade = true)
{
var dt = ReadExcelSheet(source, firstRowIsHeade);
DatatableToCsv(dt, target, delimiter);
}
private void DatatableToCsv(DataTable dt, string fname, string delimiter = ";")
{
using (StreamWriter writer = new StreamWriter(fname))
{
foreach (DataRow row in dt.AsEnumerable())
{
writer.WriteLine(string.Join(delimiter, row.ItemArray.Select(x => x.ToString())) + delimiter);
}
}
}
List<string> Headers = new List<string>();
private DataTable ReadExcelSheet(string fname, bool firstRowIsHeade)
{
DataTable dt = new DataTable();
using (SpreadsheetDocument doc = SpreadsheetDocument.Open(fname, false))
{
//Read the first Sheets
Sheet sheet = doc.WorkbookPart.Workbook.Sheets.GetFirstChild<Sheet>();
Worksheet worksheet = (doc.WorkbookPart.GetPartById(sheet.Id.Value) as WorksheetPart).Worksheet;
IEnumerable<Row> rows = worksheet.GetFirstChild<SheetData>().Descendants<Row>();
foreach (Row row in rows)
{
//Read the first row as header
if (row.RowIndex.Value == 1)
{
var j = 1;
foreach (Cell cell in row.Descendants<Cell>())
{
var colunmName = firstRowIsHeade ? GetCellValue(doc, cell) : "Field" + j++;
Console.WriteLine(colunmName);
Headers.Add(colunmName);
dt.Columns.Add(colunmName);
}
}
else
{
dt.Rows.Add();
int i = 0;
foreach (Cell cell in row.Descendants<Cell>())
{
dt.Rows[dt.Rows.Count - 1][i] = GetCellValue(doc, cell);
i++;
}
}
}
}
return dt;
}
private string GetCellValue(SpreadsheetDocument doc, Cell cell)
{
string value = cell.CellValue.InnerText;
if (cell.DataType != null && cell.DataType.Value == CellValues.SharedString)
{
return doc.WorkbookPart.SharedStringTablePart.SharedStringTable.ChildElements.GetItem(int.Parse(value)).InnerText;
}
return value;
}
}
使用方法:
new OpenXmlExcel().ExcelToCsv("f1.xlsx","f1.csv",";",true);
or
//use default: separator=";" ,first row is header
new OpenXmlExcel().ExcelToCsv("f1.xlsx","f1.csv");