如何使用 EPPlus 将 excel 行解析回类型
How to parse excel rows back to types using EPPlus
EPPlus 有一个方便的 LoadFromCollection<T>
方法可以将我自己类型的数据获取到工作表中。
例如,如果我有 class:
public class Customer
{
public int Id { get; set; }
public string Firstname { get; set; }
public string Surname { get; set; }
public DateTime Birthdate { get; set; }
}
然后是下面的代码:
var package = new ExcelPackage();
var sheet = package.Workbook.Worksheets.Add("Customers");
var customers = new List<Customer>{
new Customer{
Id = 1,
Firstname = "John",
Surname = "Doe",
Birthdate = new DateTime(2000, 1, 1)
},
new Customer{
Id = 2,
Firstname = "Mary",
Surname = "Moe",
Birthdate = new DateTime(2001, 2, 2)
}
};
sheet.Cells[1, 1].LoadFromCollection(customers);
package.Save();
...将向名为 "Customers".
的工作表中添加 2 行
我的问题是是否有一个方便的对应方可以将 excel 中的行(例如在进行了一些修改之后)提取回我的类型。
类似于:
var package = new ExcelPackage(inputStream);
var customers = sheet.Dimension.SaveToCollection<Customer>() ??
我有
...但没有发现如何简单地将行解析为我的类型。
不幸的是,EPPlus 本身没有这样的方法。这是一个棘手的问题,因为如果你真的希望它是通用的,你将不得不使用反射。由于 Excel 将所有数字和日期存储为双精度,因此您必须处理大量的拆箱和类型检查。
这是我一直在努力的事情。它是一种扩展方法,将通过 Generics
完成。它可以工作,但只能在有限的测试下使用,因此请确保您自己检查一下。我不能保证它是最优化的(迄今为止),但在他看来它相当不错。你会像这样使用它:
IEnumerable<TestObject> newcollection = worksheet.ConvertSheetToObjects<TestObject>();
分机:
public static IEnumerable<T> ConvertSheetToObjects<T>(this ExcelWorksheet worksheet) where T:new()
{
//DateTime Conversion
var convertDateTime = new Func<double, DateTime>(excelDate =>
{
if (excelDate < 1)
throw new ArgumentException("Excel dates cannot be smaller than 0.");
var dateOfReference = new DateTime(1900, 1, 1);
if (excelDate > 60d)
excelDate = excelDate - 2;
else
excelDate = excelDate - 1;
return dateOfReference.AddDays(excelDate);
});
//Get the properties of T
var tprops = (new T())
.GetType()
.GetProperties()
.ToList();
//Cells only contains references to cells with actual data
var groups = worksheet.Cells
.GroupBy(cell => cell.Start.Row)
.ToList();
//Assume the second row represents column data types (big assumption!)
var types = groups
.Skip(1)
.First()
.Select(rcell => rcell.Value.GetType())
.ToList();
//Assume first row has the column names
var colnames = groups
.First()
.Select((hcell, idx) => new { Name = hcell.Value.ToString(), index = idx })
.Where(o => tprops.Select(p => p.Name).Contains(o.Name))
.ToList();
//Everything after the header is data
var rowvalues = groups
.Skip(1) //Exclude header
.Select(cg => cg.Select(c => c.Value).ToList());
//Create the collection container
var collection = rowvalues
.Select(row =>
{
var tnew = new T();
colnames.ForEach(colname =>
{
//This is the real wrinkle to using reflection - Excel stores all numbers as double including int
var val = row[colname.index];
var type = types[colname.index];
var prop = tprops.First(p => p.Name == colname.Name);
//If it is numeric it is a double since that is how excel stores all numbers
if (type == typeof (double))
{
//Unbox it
var unboxedVal = (double) val;
//FAR FROM A COMPLETE LIST!!!
if (prop.PropertyType == typeof (Int32))
prop.SetValue(tnew, (int) unboxedVal);
else if (prop.PropertyType == typeof (double))
prop.SetValue(tnew, unboxedVal);
else if (prop.PropertyType == typeof (DateTime))
prop.SetValue(tnew, convertDateTime(unboxedVal));
else
throw new NotImplementedException(String.Format("Type '{0}' not implemented yet!", prop.PropertyType.Name));
}
else
{
//Its a string
prop.SetValue(tnew, val);
}
});
return tnew;
});
//Send it back
return collection;
}
一个完整的例子:
[TestMethod]
public void Read_To_Collection_Test()
{
//A collection to Test
var objectcollection = new List<TestObject>();
for (var i = 0; i < 10; i++)
objectcollection.Add(new TestObject {Col1 = i, Col2 = i*10, Col3 = Path.GetRandomFileName(), Col4 = DateTime.Now.AddDays(i)});
//Create a test file to convert back
byte[] bytes;
using (var pck = new ExcelPackage())
{
//Load the random data
var workbook = pck.Workbook;
var worksheet = workbook.Worksheets.Add("data");
worksheet.Cells.LoadFromCollection(objectcollection, true);
bytes = pck.GetAsByteArray();
}
//*********************************
//Convert from excel to a collection
using (var pck = new ExcelPackage(new MemoryStream(bytes)))
{
var workbook = pck.Workbook;
var worksheet = workbook.Worksheets["data"];
var newcollection = worksheet.ConvertSheetToObjects<TestObject>();
newcollection.ToList().ForEach(to => Console.WriteLine("{{ Col1:{0}, Col2: {1}, Col3: \"{2}\", Col4: {3} }}", to.Col1, to.Col2, to.Col3, to.Col4.ToShortDateString()));
}
}
//test object class
public class TestObject
{
public int Col1 { get; set; }
public int Col2 { get; set; }
public string Col3 { get; set; }
public DateTime Col4 { get; set; }
}
控制台输出:
{ Col1:0, Col2: 0, Col3: "wrulvxbx.wdv", Col4: 10/30/2015 }
{ Col1:1, Col2: 10, Col3: "wflh34yu.0pu", Col4: 10/31/2015 }
{ Col1:2, Col2: 20, Col3: "ps0f1jg0.121", Col4: 11/1/2015 }
{ Col1:3, Col2: 30, Col3: "skoc2gx1.2xs", Col4: 11/2/2015 }
{ Col1:4, Col2: 40, Col3: "urs3jnbb.ob1", Col4: 11/3/2015 }
{ Col1:5, Col2: 50, Col3: "m4l2fese.4yz", Col4: 11/4/2015 }
{ Col1:6, Col2: 60, Col3: "v3dselpn.rqq", Col4: 11/5/2015 }
{ Col1:7, Col2: 70, Col3: "v2ggbaar.r31", Col4: 11/6/2015 }
{ Col1:8, Col2: 80, Col3: "da4vd35p.msl", Col4: 11/7/2015 }
{ Col1:9, Col2: 90, Col3: "v5dtpuad.2ao", Col4: 11/8/2015 }
受上述启发,我采取了稍微不同的路线。
- 我创建了一个属性并将每个 属性 映射到一个列。
- 我使用 DTO 类型来定义我期望每一列的内容
- 允许不需要列
- 使用 EPPlus 转换类型
通过这样做,我可以使用传统的模型验证,并接受对列 headers
的更改
--
用法:
using(FileStream fileStream = new FileStream(_fileName, FileMode.Open)){
ExcelPackage excel = new ExcelPackage(fileStream);
var workSheet = excel.Workbook.Worksheets[RESOURCES_WORKSHEET];
IEnumerable<ExcelResourceDto> newcollection = workSheet.ConvertSheetToObjects<ExcelResourceDto>();
newcollection.ToList().ForEach(x => Console.WriteLine(x.Title));
}
Dto 映射到 excel
public class ExcelResourceDto
{
[Column(1)]
[Required]
public string Title { get; set; }
[Column(2)]
[Required]
public string SearchTags { get; set; }
}
这是属性定义
[AttributeUsage(AttributeTargets.All)]
public class Column : System.Attribute
{
public int ColumnIndex { get; set; }
public Column(int column)
{
ColumnIndex = column;
}
}
扩展 class 处理映射行到 DTO
public static class EPPLusExtensions
{
public static IEnumerable<T> ConvertSheetToObjects<T>(this ExcelWorksheet worksheet) where T : new()
{
Func<CustomAttributeData, bool> columnOnly = y => y.AttributeType == typeof(Column);
var columns = typeof(T)
.GetProperties()
.Where(x => x.CustomAttributes.Any(columnOnly))
.Select(p => new
{
Property = p,
Column = p.GetCustomAttributes<Column>().First().ColumnIndex //safe because if where above
}).ToList();
var rows= worksheet.Cells
.Select(cell => cell.Start.Row)
.Distinct()
.OrderBy(x=>x);
//Create the collection container
var collection = rows.Skip(1)
.Select(row =>
{
var tnew = new T();
columns.ForEach(col =>
{
//This is the real wrinkle to using reflection - Excel stores all numbers as double including int
var val = worksheet.Cells[row, col.Column];
//If it is numeric it is a double since that is how excel stores all numbers
if (val.Value == null)
{
col.Property.SetValue(tnew, null);
return;
}
if (col.Property.PropertyType == typeof(Int32))
{
col.Property.SetValue(tnew, val.GetValue<int>());
return;
}
if (col.Property.PropertyType == typeof(double))
{
col.Property.SetValue(tnew, val.GetValue<double>());
return;
}
if (col.Property.PropertyType == typeof(DateTime))
{
col.Property.SetValue(tnew, val.GetValue<DateTime>());
return;
}
//Its a string
col.Property.SetValue(tnew, val.GetValue<string>());
});
return tnew;
});
//Send it back
return collection;
}
}
EPPlus 有一个方便的 LoadFromCollection<T>
方法可以将我自己类型的数据获取到工作表中。
例如,如果我有 class:
public class Customer
{
public int Id { get; set; }
public string Firstname { get; set; }
public string Surname { get; set; }
public DateTime Birthdate { get; set; }
}
然后是下面的代码:
var package = new ExcelPackage();
var sheet = package.Workbook.Worksheets.Add("Customers");
var customers = new List<Customer>{
new Customer{
Id = 1,
Firstname = "John",
Surname = "Doe",
Birthdate = new DateTime(2000, 1, 1)
},
new Customer{
Id = 2,
Firstname = "Mary",
Surname = "Moe",
Birthdate = new DateTime(2001, 2, 2)
}
};
sheet.Cells[1, 1].LoadFromCollection(customers);
package.Save();
...将向名为 "Customers".
的工作表中添加 2 行我的问题是是否有一个方便的对应方可以将 excel 中的行(例如在进行了一些修改之后)提取回我的类型。
类似于:
var package = new ExcelPackage(inputStream);
var customers = sheet.Dimension.SaveToCollection<Customer>() ??
我有
...但没有发现如何简单地将行解析为我的类型。
不幸的是,EPPlus 本身没有这样的方法。这是一个棘手的问题,因为如果你真的希望它是通用的,你将不得不使用反射。由于 Excel 将所有数字和日期存储为双精度,因此您必须处理大量的拆箱和类型检查。
这是我一直在努力的事情。它是一种扩展方法,将通过 Generics
完成。它可以工作,但只能在有限的测试下使用,因此请确保您自己检查一下。我不能保证它是最优化的(迄今为止),但在他看来它相当不错。你会像这样使用它:
IEnumerable<TestObject> newcollection = worksheet.ConvertSheetToObjects<TestObject>();
分机:
public static IEnumerable<T> ConvertSheetToObjects<T>(this ExcelWorksheet worksheet) where T:new()
{
//DateTime Conversion
var convertDateTime = new Func<double, DateTime>(excelDate =>
{
if (excelDate < 1)
throw new ArgumentException("Excel dates cannot be smaller than 0.");
var dateOfReference = new DateTime(1900, 1, 1);
if (excelDate > 60d)
excelDate = excelDate - 2;
else
excelDate = excelDate - 1;
return dateOfReference.AddDays(excelDate);
});
//Get the properties of T
var tprops = (new T())
.GetType()
.GetProperties()
.ToList();
//Cells only contains references to cells with actual data
var groups = worksheet.Cells
.GroupBy(cell => cell.Start.Row)
.ToList();
//Assume the second row represents column data types (big assumption!)
var types = groups
.Skip(1)
.First()
.Select(rcell => rcell.Value.GetType())
.ToList();
//Assume first row has the column names
var colnames = groups
.First()
.Select((hcell, idx) => new { Name = hcell.Value.ToString(), index = idx })
.Where(o => tprops.Select(p => p.Name).Contains(o.Name))
.ToList();
//Everything after the header is data
var rowvalues = groups
.Skip(1) //Exclude header
.Select(cg => cg.Select(c => c.Value).ToList());
//Create the collection container
var collection = rowvalues
.Select(row =>
{
var tnew = new T();
colnames.ForEach(colname =>
{
//This is the real wrinkle to using reflection - Excel stores all numbers as double including int
var val = row[colname.index];
var type = types[colname.index];
var prop = tprops.First(p => p.Name == colname.Name);
//If it is numeric it is a double since that is how excel stores all numbers
if (type == typeof (double))
{
//Unbox it
var unboxedVal = (double) val;
//FAR FROM A COMPLETE LIST!!!
if (prop.PropertyType == typeof (Int32))
prop.SetValue(tnew, (int) unboxedVal);
else if (prop.PropertyType == typeof (double))
prop.SetValue(tnew, unboxedVal);
else if (prop.PropertyType == typeof (DateTime))
prop.SetValue(tnew, convertDateTime(unboxedVal));
else
throw new NotImplementedException(String.Format("Type '{0}' not implemented yet!", prop.PropertyType.Name));
}
else
{
//Its a string
prop.SetValue(tnew, val);
}
});
return tnew;
});
//Send it back
return collection;
}
一个完整的例子:
[TestMethod]
public void Read_To_Collection_Test()
{
//A collection to Test
var objectcollection = new List<TestObject>();
for (var i = 0; i < 10; i++)
objectcollection.Add(new TestObject {Col1 = i, Col2 = i*10, Col3 = Path.GetRandomFileName(), Col4 = DateTime.Now.AddDays(i)});
//Create a test file to convert back
byte[] bytes;
using (var pck = new ExcelPackage())
{
//Load the random data
var workbook = pck.Workbook;
var worksheet = workbook.Worksheets.Add("data");
worksheet.Cells.LoadFromCollection(objectcollection, true);
bytes = pck.GetAsByteArray();
}
//*********************************
//Convert from excel to a collection
using (var pck = new ExcelPackage(new MemoryStream(bytes)))
{
var workbook = pck.Workbook;
var worksheet = workbook.Worksheets["data"];
var newcollection = worksheet.ConvertSheetToObjects<TestObject>();
newcollection.ToList().ForEach(to => Console.WriteLine("{{ Col1:{0}, Col2: {1}, Col3: \"{2}\", Col4: {3} }}", to.Col1, to.Col2, to.Col3, to.Col4.ToShortDateString()));
}
}
//test object class
public class TestObject
{
public int Col1 { get; set; }
public int Col2 { get; set; }
public string Col3 { get; set; }
public DateTime Col4 { get; set; }
}
控制台输出:
{ Col1:0, Col2: 0, Col3: "wrulvxbx.wdv", Col4: 10/30/2015 }
{ Col1:1, Col2: 10, Col3: "wflh34yu.0pu", Col4: 10/31/2015 }
{ Col1:2, Col2: 20, Col3: "ps0f1jg0.121", Col4: 11/1/2015 }
{ Col1:3, Col2: 30, Col3: "skoc2gx1.2xs", Col4: 11/2/2015 }
{ Col1:4, Col2: 40, Col3: "urs3jnbb.ob1", Col4: 11/3/2015 }
{ Col1:5, Col2: 50, Col3: "m4l2fese.4yz", Col4: 11/4/2015 }
{ Col1:6, Col2: 60, Col3: "v3dselpn.rqq", Col4: 11/5/2015 }
{ Col1:7, Col2: 70, Col3: "v2ggbaar.r31", Col4: 11/6/2015 }
{ Col1:8, Col2: 80, Col3: "da4vd35p.msl", Col4: 11/7/2015 }
{ Col1:9, Col2: 90, Col3: "v5dtpuad.2ao", Col4: 11/8/2015 }
受上述启发,我采取了稍微不同的路线。
- 我创建了一个属性并将每个 属性 映射到一个列。
- 我使用 DTO 类型来定义我期望每一列的内容
- 允许不需要列
- 使用 EPPlus 转换类型
通过这样做,我可以使用传统的模型验证,并接受对列 headers
的更改-- 用法:
using(FileStream fileStream = new FileStream(_fileName, FileMode.Open)){
ExcelPackage excel = new ExcelPackage(fileStream);
var workSheet = excel.Workbook.Worksheets[RESOURCES_WORKSHEET];
IEnumerable<ExcelResourceDto> newcollection = workSheet.ConvertSheetToObjects<ExcelResourceDto>();
newcollection.ToList().ForEach(x => Console.WriteLine(x.Title));
}
Dto 映射到 excel
public class ExcelResourceDto
{
[Column(1)]
[Required]
public string Title { get; set; }
[Column(2)]
[Required]
public string SearchTags { get; set; }
}
这是属性定义
[AttributeUsage(AttributeTargets.All)]
public class Column : System.Attribute
{
public int ColumnIndex { get; set; }
public Column(int column)
{
ColumnIndex = column;
}
}
扩展 class 处理映射行到 DTO
public static class EPPLusExtensions
{
public static IEnumerable<T> ConvertSheetToObjects<T>(this ExcelWorksheet worksheet) where T : new()
{
Func<CustomAttributeData, bool> columnOnly = y => y.AttributeType == typeof(Column);
var columns = typeof(T)
.GetProperties()
.Where(x => x.CustomAttributes.Any(columnOnly))
.Select(p => new
{
Property = p,
Column = p.GetCustomAttributes<Column>().First().ColumnIndex //safe because if where above
}).ToList();
var rows= worksheet.Cells
.Select(cell => cell.Start.Row)
.Distinct()
.OrderBy(x=>x);
//Create the collection container
var collection = rows.Skip(1)
.Select(row =>
{
var tnew = new T();
columns.ForEach(col =>
{
//This is the real wrinkle to using reflection - Excel stores all numbers as double including int
var val = worksheet.Cells[row, col.Column];
//If it is numeric it is a double since that is how excel stores all numbers
if (val.Value == null)
{
col.Property.SetValue(tnew, null);
return;
}
if (col.Property.PropertyType == typeof(Int32))
{
col.Property.SetValue(tnew, val.GetValue<int>());
return;
}
if (col.Property.PropertyType == typeof(double))
{
col.Property.SetValue(tnew, val.GetValue<double>());
return;
}
if (col.Property.PropertyType == typeof(DateTime))
{
col.Property.SetValue(tnew, val.GetValue<DateTime>());
return;
}
//Its a string
col.Property.SetValue(tnew, val.GetValue<string>());
});
return tnew;
});
//Send it back
return collection;
}
}