使用 OLEDB 从 Excel 导入时数据被截断,有什么替代方法?
Data truncated while importing from Excel using OLEDB, what are the alternatives?
我有一个 WPF 应用程序,它具有从 Excel 读取数据的功能。
我正在使用 OLEDB 执行此操作并且效果很好,直到我发现列有 255 个限制并且数据将被截断,除非前八行中不存在大于 255 个字符的数据。此问题的解决方法是更新注册表,这意味着更新所有用户的注册表。所以我不想采用这种方法。
OLEDB代码:
string strSQL = "SELECT * FROM [Sheet1$]";
OleDbCommand cmd = new OleDbCommand(strSQL, conn);
DataSet ds1 = new DataSet();
OleDbDataAdapter da = new OleDbDataAdapter(cmd);
da.Fill(ds1);
作为替代方案,我尝试了 Interop.Excel 。但是,OLEDB 似乎更慢。使用 Interop.Excel.
加载需要 2 秒的 Excel sheet 大约需要 15 秒
System.Data.DataTable tempTable = new System.Data.DataTable();
tempTable.TableName = "ResultData";
Excel.Application app = new Excel.Application();
Excel.Workbook book = null;
Excel.Range range = null;
try
{
app.Visible = false;
app.ScreenUpdating = false;
app.DisplayAlerts = false;
book = app.Workbooks.Open(inputFilePath, Missing.Value, Missing.Value, Missing.Value
, Missing.Value, Missing.Value, Missing.Value, Missing.Value
, Missing.Value, Missing.Value, Missing.Value, Missing.Value
, Missing.Value, Missing.Value, Missing.Value);
foreach (Excel.Worksheet sheet in book.Worksheets)
{
Logger.LogException("Values for Sheet " + sheet.Index, System.Reflection.MethodBase.GetCurrentMethod().ToString());
// get a range to work with
range = sheet.get_Range("A1", Missing.Value);
// get the end of values to the right (will stop at the first empty cell)
range = range.get_End(Excel.XlDirection.xlToRight);
// get the end of values toward the bottom, looking in the last column (will stop at first empty cell)
range = range.get_End(Excel.XlDirection.xlDown);
// get the address of the bottom, right cell
string downAddress = range.get_Address(
false, false, Excel.XlReferenceStyle.xlA1,
Type.Missing, Type.Missing);
// Get the range, then values from a1
range = sheet.get_Range("A1", downAddress);
object[,] values = (object[,])range.Value2;
//Get the Column Names
for (int k = 0; k < values.GetLength(1); )
{
tempTable.Columns.Add(Convert.ToString(values[1, ++k]).Trim());
}
for (int i = 2; i <= values.GetLength(0); i++)//first row contains the column names, so start from the next row.
{
System.Data.DataRow dr = tempTable.NewRow();
for (int j = 1; j <= values.GetLength(1); j++)//columns
{
dr[j - 1] = values[i, j];
}
tempTable.Rows.Add(dr);
}
}
有没有和OLEDB一样快的替代方案?
Excelsheet中的列和行不固定。
如果您正在使用 xlsx 文件,我建议切换到 Open XML SDK for Office,它的性能比 OLEDB 或 Interop 连接方法好得多。
不过,有些人认为SDK难用,所以有3rd party packages将SDK包装成更友好的界面,但我个人觉得SDK不太难做。
谢谢你 response.Here 是我使用的最终代码:
参考 link : http://www.prowareness.com/blog/reading-data-from-excel-document-using-openxml/
public static DataSet ExtractExcelSheetValuesToDataTable(string xlsxFilePath, string sheetName)
{
DataTable dt = new DataTable();
DataSet ds = new DataSet();
using (SpreadsheetDocument myWorkbook = SpreadsheetDocument.Open(xlsxFilePath, true))
{
//Access the main Workbook part, which contains data
WorkbookPart workbookPart = myWorkbook.WorkbookPart;
WorksheetPart worksheetPart = null;
if (!string.IsNullOrEmpty(sheetName))
{
Sheet ss = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName).SingleOrDefault<Sheet>();
worksheetPart = (WorksheetPart)workbookPart.GetPartById(ss.Id);
}
else
{
worksheetPart = workbookPart.WorksheetParts.FirstOrDefault();
}
SharedStringTablePart stringTablePart = workbookPart.SharedStringTablePart;
if (worksheetPart != null)
{
Row lastRow = worksheetPart.Worksheet.Descendants<Row>().LastOrDefault();
Row firstRow = worksheetPart.Worksheet.Descendants<Row>().FirstOrDefault();
if (firstRow != null)
{
foreach (Cell c in firstRow.ChildElements)
{
string value = GetValue(c, stringTablePart);
dt.Columns.Add(value);
}
}
if (lastRow != null)
{
for (int i = 2; i <= lastRow.RowIndex; i++)
{
DataRow dr = dt.NewRow();
bool empty = true;
Row row = worksheetPart.Worksheet.Descendants<Row>().Where(r => i == r.RowIndex).FirstOrDefault();
int j = 0;
if (row != null)
{
foreach (Cell c in row.Descendants<Cell>())
{
int? colIndex = GetColumnIndex(((DocumentFormat.OpenXml.Spreadsheet.CellType)(c)).CellReference);
if (colIndex > j)
{
dr[j] = "";
j++;
}
//Get cell value
string value = GetValue(c, stringTablePart);
//if (!string.IsNullOrEmpty(value))
// empty = false;
dr[j] = value;
j++;
if (j == dt.Columns.Count)
break;
}
//foreach (Cell c in row.ChildElements)
//{
// //Get cell value
// string value = GetValue(c, stringTablePart);
// //if (!string.IsNullOrEmpty(value))
// // empty = false;
// dr[j] = value;
// j++;
// if (j == dt.Columns.Count)
// break;
//}
//if (empty)
// break;
dt.Rows.Add(dr);
}
}
}
}
}
ds.Tables.Add(dt);
return ds;
}
public static string GetValue(Cell cell, SharedStringTablePart stringTablePart)
{
if (cell.ChildElements.Count == 0) return null;
//get cell value
string value = cell.ElementAt(0).InnerText;//CellValue.InnerText;
//Look up real value from shared string table
if ((cell.DataType != null) && (cell.DataType == CellValues.SharedString))
value = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
return value;
}
private static int? GetColumnIndex(string cellReference)
{
if (string.IsNullOrEmpty(cellReference))
{
return null;
}
//remove digits
string columnReference = Regex.Replace(cellReference.ToUpper(), @"[\d]", string.Empty);
int columnNumber = -1;
int mulitplier = 1;
//working from the end of the letters take the ASCII code less 64 (so A = 1, B =2...etc)
//then multiply that number by our multiplier (which starts at 1)
//multiply our multiplier by 26 as there are 26 letters
foreach (char c in columnReference.ToCharArray().Reverse())
{
columnNumber += mulitplier * ((int)c - 64);
mulitplier = mulitplier * 26;
}
//the result is zero based so return columnnumber + 1 for a 1 based answer
//this will match Excel's COLUMN function
return columnNumber;
}
我有一个 WPF 应用程序,它具有从 Excel 读取数据的功能。 我正在使用 OLEDB 执行此操作并且效果很好,直到我发现列有 255 个限制并且数据将被截断,除非前八行中不存在大于 255 个字符的数据。此问题的解决方法是更新注册表,这意味着更新所有用户的注册表。所以我不想采用这种方法。
OLEDB代码:
string strSQL = "SELECT * FROM [Sheet1$]";
OleDbCommand cmd = new OleDbCommand(strSQL, conn);
DataSet ds1 = new DataSet();
OleDbDataAdapter da = new OleDbDataAdapter(cmd);
da.Fill(ds1);
作为替代方案,我尝试了 Interop.Excel 。但是,OLEDB 似乎更慢。使用 Interop.Excel.
加载需要 2 秒的 Excel sheet 大约需要 15 秒System.Data.DataTable tempTable = new System.Data.DataTable();
tempTable.TableName = "ResultData";
Excel.Application app = new Excel.Application();
Excel.Workbook book = null;
Excel.Range range = null;
try
{
app.Visible = false;
app.ScreenUpdating = false;
app.DisplayAlerts = false;
book = app.Workbooks.Open(inputFilePath, Missing.Value, Missing.Value, Missing.Value
, Missing.Value, Missing.Value, Missing.Value, Missing.Value
, Missing.Value, Missing.Value, Missing.Value, Missing.Value
, Missing.Value, Missing.Value, Missing.Value);
foreach (Excel.Worksheet sheet in book.Worksheets)
{
Logger.LogException("Values for Sheet " + sheet.Index, System.Reflection.MethodBase.GetCurrentMethod().ToString());
// get a range to work with
range = sheet.get_Range("A1", Missing.Value);
// get the end of values to the right (will stop at the first empty cell)
range = range.get_End(Excel.XlDirection.xlToRight);
// get the end of values toward the bottom, looking in the last column (will stop at first empty cell)
range = range.get_End(Excel.XlDirection.xlDown);
// get the address of the bottom, right cell
string downAddress = range.get_Address(
false, false, Excel.XlReferenceStyle.xlA1,
Type.Missing, Type.Missing);
// Get the range, then values from a1
range = sheet.get_Range("A1", downAddress);
object[,] values = (object[,])range.Value2;
//Get the Column Names
for (int k = 0; k < values.GetLength(1); )
{
tempTable.Columns.Add(Convert.ToString(values[1, ++k]).Trim());
}
for (int i = 2; i <= values.GetLength(0); i++)//first row contains the column names, so start from the next row.
{
System.Data.DataRow dr = tempTable.NewRow();
for (int j = 1; j <= values.GetLength(1); j++)//columns
{
dr[j - 1] = values[i, j];
}
tempTable.Rows.Add(dr);
}
}
有没有和OLEDB一样快的替代方案?
Excelsheet中的列和行不固定。
如果您正在使用 xlsx 文件,我建议切换到 Open XML SDK for Office,它的性能比 OLEDB 或 Interop 连接方法好得多。
不过,有些人认为SDK难用,所以有3rd party packages将SDK包装成更友好的界面,但我个人觉得SDK不太难做。
谢谢你 response.Here 是我使用的最终代码: 参考 link : http://www.prowareness.com/blog/reading-data-from-excel-document-using-openxml/
public static DataSet ExtractExcelSheetValuesToDataTable(string xlsxFilePath, string sheetName)
{
DataTable dt = new DataTable();
DataSet ds = new DataSet();
using (SpreadsheetDocument myWorkbook = SpreadsheetDocument.Open(xlsxFilePath, true))
{
//Access the main Workbook part, which contains data
WorkbookPart workbookPart = myWorkbook.WorkbookPart;
WorksheetPart worksheetPart = null;
if (!string.IsNullOrEmpty(sheetName))
{
Sheet ss = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName).SingleOrDefault<Sheet>();
worksheetPart = (WorksheetPart)workbookPart.GetPartById(ss.Id);
}
else
{
worksheetPart = workbookPart.WorksheetParts.FirstOrDefault();
}
SharedStringTablePart stringTablePart = workbookPart.SharedStringTablePart;
if (worksheetPart != null)
{
Row lastRow = worksheetPart.Worksheet.Descendants<Row>().LastOrDefault();
Row firstRow = worksheetPart.Worksheet.Descendants<Row>().FirstOrDefault();
if (firstRow != null)
{
foreach (Cell c in firstRow.ChildElements)
{
string value = GetValue(c, stringTablePart);
dt.Columns.Add(value);
}
}
if (lastRow != null)
{
for (int i = 2; i <= lastRow.RowIndex; i++)
{
DataRow dr = dt.NewRow();
bool empty = true;
Row row = worksheetPart.Worksheet.Descendants<Row>().Where(r => i == r.RowIndex).FirstOrDefault();
int j = 0;
if (row != null)
{
foreach (Cell c in row.Descendants<Cell>())
{
int? colIndex = GetColumnIndex(((DocumentFormat.OpenXml.Spreadsheet.CellType)(c)).CellReference);
if (colIndex > j)
{
dr[j] = "";
j++;
}
//Get cell value
string value = GetValue(c, stringTablePart);
//if (!string.IsNullOrEmpty(value))
// empty = false;
dr[j] = value;
j++;
if (j == dt.Columns.Count)
break;
}
//foreach (Cell c in row.ChildElements)
//{
// //Get cell value
// string value = GetValue(c, stringTablePart);
// //if (!string.IsNullOrEmpty(value))
// // empty = false;
// dr[j] = value;
// j++;
// if (j == dt.Columns.Count)
// break;
//}
//if (empty)
// break;
dt.Rows.Add(dr);
}
}
}
}
}
ds.Tables.Add(dt);
return ds;
}
public static string GetValue(Cell cell, SharedStringTablePart stringTablePart)
{
if (cell.ChildElements.Count == 0) return null;
//get cell value
string value = cell.ElementAt(0).InnerText;//CellValue.InnerText;
//Look up real value from shared string table
if ((cell.DataType != null) && (cell.DataType == CellValues.SharedString))
value = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
return value;
}
private static int? GetColumnIndex(string cellReference)
{
if (string.IsNullOrEmpty(cellReference))
{
return null;
}
//remove digits
string columnReference = Regex.Replace(cellReference.ToUpper(), @"[\d]", string.Empty);
int columnNumber = -1;
int mulitplier = 1;
//working from the end of the letters take the ASCII code less 64 (so A = 1, B =2...etc)
//then multiply that number by our multiplier (which starts at 1)
//multiply our multiplier by 26 as there are 26 letters
foreach (char c in columnReference.ToCharArray().Reverse())
{
columnNumber += mulitplier * ((int)c - 64);
mulitplier = mulitplier * 26;
}
//the result is zero based so return columnnumber + 1 for a 1 based answer
//this will match Excel's COLUMN function
return columnNumber;
}