使用 Epplus 导入 Excel 电子表格时的代码优化
Code Optimization when importing Excel Spreadsheet with Epplus
我需要从 excel 电子表格中为活动导入超过 35,000 个代码。我在下面有以下代码(完全按照我需要的方式工作)但是当我 运行 这个过程可能需要 20-30 分钟才能完成。
如果有什么可以帮助加快进程,我们将不胜感激。我不会称自己为高级程序员,而且我知道这可能可以通过高级编码技术来完成。如果有人能指出我正确的方向,将不胜感激。
活动和主代码表的模型。
public class Campaign
{
public int CampaignId { get; set; }
public string Name { get; set; }
public virtual List<MasterCode> MasterCodes { get; set; }
}
public class MasterCode
{
public int MasterCodeId { get; set; }
public int CampaignId { get; set; }
public virtual Campaign Campaign { get; set; }
public string Code { get; set; }
public bool Used { get; set; }
public DateTime SubmittedOn { get; set; }
}
下面是视图中的代码。表单字段未绑定模型,因为此代码位于已绑定到另一个模型的视图的弹出窗口 window 中。
@using (Html.BeginForm("UploadMasterCodes", "Campaigns", FormMethod.Post, new { enctype = "multipart/form-data" }))
{
@Html.AntiForgeryToken()
@Html.Hidden("CampaignId", Model.CampaignId)
<div class="form-group">
@Html.Label("Master Code File")
@Html.TextBox("File", null, new { type = "file", @class = "form-control" })
</div>
}
控制器代码
[HttpPost]
[ValidateAntiForgeryToken]
public ActionResult UploadMasterCodes(CampaignMasterCodeUploadViewModel model)
{
if (ModelState.IsValid)
{
var result = CampaignMethods.uploadMasterCodes(model.File, model.CampaignId);
TempData["SuccessMessage"] = result;
return RedirectToAction("Details", new { id = model.CampaignId });
}
return RedirectToAction("Details", new { id = model.CampaignId });
}
最后是执行上传到数据库的代码。我正在构建一个字符串来跟踪发生的任何错误,因为我需要了解所有这些错误。
public static string uploadMasterCodes(HttpPostedFileBase file, int campaignId)
{
using (ApplicationDbContext _context = new ApplicationDbContext())
{
string response = string.Empty;
var campaign = _context.Campaigns.Find(campaignId);
if (campaign == null)
{
return String.Format("Campaign {0} not found", campaignId.ToString());
}
var submitDate = DateTime.Now;
int successCount = 0;
int errorCount = 0;
if ((file != null) && (file.ContentLength > 0) && !string.IsNullOrEmpty(file.FileName))
{
byte[] fileBytes = new byte[file.ContentLength];
var data = file.InputStream.Read(fileBytes, 0, Convert.ToInt32(file.ContentLength));
using (var package = new ExcelPackage(file.InputStream))
{
var currentSheet = package.Workbook.Worksheets;
var workSheet = currentSheet.First();
var noOfRow = workSheet.Dimension.End.Row;
for (int i = 1; i <= noOfRow; i++)
{
var masterCode = new MasterCode();
masterCode.Code = workSheet.Cells[i, 1].Value.ToString();
masterCode.CampaignId = campaignId;
masterCode.Campaign = campaign;
masterCode.SubmittedOn = submitDate;
// Check to make sure that the Code does not already exist.
if (!campaign.MasterCodes.Any(m => m.Code == masterCode.Code))
{
try
{
_context.MasterCodes.Add(masterCode);
_context.SaveChanges();
successCount++;
}
catch (Exception ex)
{
response += String.Format("Code: {0} failed with error: {1} <br />", masterCode.Code, ex.Message);
errorCount++;
}
}
else
{
response += String.Format("Code: {0} already exists <br />", masterCode.Code);
errorCount++;
}
}
response += string.Format("Number of codes:{0} / Success: {1} / Errors {2}", noOfRow.ToString(), successCount.ToString(), errorCount.ToString());
}
}
return response;
}
}
每当我需要读取 Excel 文件时,我都会使用这个 OLEDB 提供程序:
Provider=Microsoft.ACE.OLEDB.12.0;Data Source={FilePath};Extended Properties='Excel 12.0 Xml;HDR=Yes;IMEX=1';
(可以找到有关此主题的更多信息 here and here)
然后您可以执行 bulk insert 以实现最快的插入。
请参阅 this answer 如何在内存中执行此操作。在您的情况下,您首先需要将文件保存在服务器上的某个位置,处理并向用户报告进度。
根据@grmbl 的建议和大量阅读,我能够使用 SQLBulkCopy 解决速度问题。我没有使用 OLEDB 提供程序,而是将文件复制到服务器并创建了一个数据表。然后我将其用于批量复制。以下代码将 335,000 条记录的运行时间缩短到 10 秒左右。
我删除了所有错误检查代码,所以它不是一堵代码墙。
控制器代码。
[HttpPost]
[ValidateAntiForgeryToken]
public ActionResult UploadMasterCodes(CampaignMasterCodeUploadViewModel model)
{
if (ModelState.IsValid)
{
var filename = Path.GetFileName(model.File.FileName);
var path = FileMethods.UploadFile(model.File, Server.MapPath("~/App_Data/Bsa4/"), filename);
var dt = CampaignMethods.ProcessMasterCodeCsvToDatatable(path, model.CampaignId);
TempData["SuccessMessage"] = CampaignMethods.ProcessMastercodeSqlBulkCopy(dt);
return RedirectToAction("Details", new { id = model.CampaignId });
}
TempData["ErrorMessage"] = "Master code upload form error. Please refresh the page and try again.";
return RedirectToAction("Details", new { id = model.CampaignId });
}
处理代码。
public static DataTable ProcessMasterCodeCsvToDatatable(string file, int campaignId)
{
using (ApplicationDbContext _context = new ApplicationDbContext()) {
var campaign = _context.Campaigns.Find(campaignId);
DataTable dt = new DataTable();
dt.Columns.Add("CampaignId");
dt.Columns.Add("Code");
dt.Columns.Add("Used");
dt.Columns.Add("SubmittedOn");
string line = null;
var submitDate = DateTime.Now;
using (StreamReader sr = File.OpenText(file))
{
while ((line = sr.ReadLine()) != null)
{
string[] data = line.Split(',');
if (data.Length > 0)
{
if (!string.IsNullOrEmpty(data[0]))
{
DataRow row = dt.NewRow();
row[0] = campaign.CampaignId;
row[1] = data[0];
row[2] = false;
row[3] = submitDate;
dt.Rows.Add(row);
}
}
}
}
return dt;
}
}
public static String ProcessMastercodeSqlBulkCopy(DataTable dt)
{
string Feedback = string.Empty;
using (SqlConnection cn = new SqlConnection(ConfigurationManager.ConnectionStrings["DefaultConnection"].ConnectionString))
{
cn.Open();
using (SqlBulkCopy copy = new SqlBulkCopy(cn))
{
copy.ColumnMappings.Add(0, "CampaignId");
copy.ColumnMappings.Add(2, "Code");
copy.ColumnMappings.Add(3, "Used");
copy.ColumnMappings.Add(4, "SubmittedOn");
copy.DestinationTableName = "MasterCodes";
try
{
//Send it to the server
copy.WriteToServer(dt);
Feedback = "Master Code Upload completed successfully";
}
catch (Exception ex)
{
Feedback = ex.Message;
}
}
}
return Feedback;
}
我需要从 excel 电子表格中为活动导入超过 35,000 个代码。我在下面有以下代码(完全按照我需要的方式工作)但是当我 运行 这个过程可能需要 20-30 分钟才能完成。
如果有什么可以帮助加快进程,我们将不胜感激。我不会称自己为高级程序员,而且我知道这可能可以通过高级编码技术来完成。如果有人能指出我正确的方向,将不胜感激。
活动和主代码表的模型。
public class Campaign
{
public int CampaignId { get; set; }
public string Name { get; set; }
public virtual List<MasterCode> MasterCodes { get; set; }
}
public class MasterCode
{
public int MasterCodeId { get; set; }
public int CampaignId { get; set; }
public virtual Campaign Campaign { get; set; }
public string Code { get; set; }
public bool Used { get; set; }
public DateTime SubmittedOn { get; set; }
}
下面是视图中的代码。表单字段未绑定模型,因为此代码位于已绑定到另一个模型的视图的弹出窗口 window 中。
@using (Html.BeginForm("UploadMasterCodes", "Campaigns", FormMethod.Post, new { enctype = "multipart/form-data" }))
{
@Html.AntiForgeryToken()
@Html.Hidden("CampaignId", Model.CampaignId)
<div class="form-group">
@Html.Label("Master Code File")
@Html.TextBox("File", null, new { type = "file", @class = "form-control" })
</div>
}
控制器代码
[HttpPost]
[ValidateAntiForgeryToken]
public ActionResult UploadMasterCodes(CampaignMasterCodeUploadViewModel model)
{
if (ModelState.IsValid)
{
var result = CampaignMethods.uploadMasterCodes(model.File, model.CampaignId);
TempData["SuccessMessage"] = result;
return RedirectToAction("Details", new { id = model.CampaignId });
}
return RedirectToAction("Details", new { id = model.CampaignId });
}
最后是执行上传到数据库的代码。我正在构建一个字符串来跟踪发生的任何错误,因为我需要了解所有这些错误。
public static string uploadMasterCodes(HttpPostedFileBase file, int campaignId)
{
using (ApplicationDbContext _context = new ApplicationDbContext())
{
string response = string.Empty;
var campaign = _context.Campaigns.Find(campaignId);
if (campaign == null)
{
return String.Format("Campaign {0} not found", campaignId.ToString());
}
var submitDate = DateTime.Now;
int successCount = 0;
int errorCount = 0;
if ((file != null) && (file.ContentLength > 0) && !string.IsNullOrEmpty(file.FileName))
{
byte[] fileBytes = new byte[file.ContentLength];
var data = file.InputStream.Read(fileBytes, 0, Convert.ToInt32(file.ContentLength));
using (var package = new ExcelPackage(file.InputStream))
{
var currentSheet = package.Workbook.Worksheets;
var workSheet = currentSheet.First();
var noOfRow = workSheet.Dimension.End.Row;
for (int i = 1; i <= noOfRow; i++)
{
var masterCode = new MasterCode();
masterCode.Code = workSheet.Cells[i, 1].Value.ToString();
masterCode.CampaignId = campaignId;
masterCode.Campaign = campaign;
masterCode.SubmittedOn = submitDate;
// Check to make sure that the Code does not already exist.
if (!campaign.MasterCodes.Any(m => m.Code == masterCode.Code))
{
try
{
_context.MasterCodes.Add(masterCode);
_context.SaveChanges();
successCount++;
}
catch (Exception ex)
{
response += String.Format("Code: {0} failed with error: {1} <br />", masterCode.Code, ex.Message);
errorCount++;
}
}
else
{
response += String.Format("Code: {0} already exists <br />", masterCode.Code);
errorCount++;
}
}
response += string.Format("Number of codes:{0} / Success: {1} / Errors {2}", noOfRow.ToString(), successCount.ToString(), errorCount.ToString());
}
}
return response;
}
}
每当我需要读取 Excel 文件时,我都会使用这个 OLEDB 提供程序:
Provider=Microsoft.ACE.OLEDB.12.0;Data Source={FilePath};Extended Properties='Excel 12.0 Xml;HDR=Yes;IMEX=1';
(可以找到有关此主题的更多信息 here and here)
然后您可以执行 bulk insert 以实现最快的插入。 请参阅 this answer 如何在内存中执行此操作。在您的情况下,您首先需要将文件保存在服务器上的某个位置,处理并向用户报告进度。
根据@grmbl 的建议和大量阅读,我能够使用 SQLBulkCopy 解决速度问题。我没有使用 OLEDB 提供程序,而是将文件复制到服务器并创建了一个数据表。然后我将其用于批量复制。以下代码将 335,000 条记录的运行时间缩短到 10 秒左右。
我删除了所有错误检查代码,所以它不是一堵代码墙。 控制器代码。
[HttpPost]
[ValidateAntiForgeryToken]
public ActionResult UploadMasterCodes(CampaignMasterCodeUploadViewModel model)
{
if (ModelState.IsValid)
{
var filename = Path.GetFileName(model.File.FileName);
var path = FileMethods.UploadFile(model.File, Server.MapPath("~/App_Data/Bsa4/"), filename);
var dt = CampaignMethods.ProcessMasterCodeCsvToDatatable(path, model.CampaignId);
TempData["SuccessMessage"] = CampaignMethods.ProcessMastercodeSqlBulkCopy(dt);
return RedirectToAction("Details", new { id = model.CampaignId });
}
TempData["ErrorMessage"] = "Master code upload form error. Please refresh the page and try again.";
return RedirectToAction("Details", new { id = model.CampaignId });
}
处理代码。
public static DataTable ProcessMasterCodeCsvToDatatable(string file, int campaignId)
{
using (ApplicationDbContext _context = new ApplicationDbContext()) {
var campaign = _context.Campaigns.Find(campaignId);
DataTable dt = new DataTable();
dt.Columns.Add("CampaignId");
dt.Columns.Add("Code");
dt.Columns.Add("Used");
dt.Columns.Add("SubmittedOn");
string line = null;
var submitDate = DateTime.Now;
using (StreamReader sr = File.OpenText(file))
{
while ((line = sr.ReadLine()) != null)
{
string[] data = line.Split(',');
if (data.Length > 0)
{
if (!string.IsNullOrEmpty(data[0]))
{
DataRow row = dt.NewRow();
row[0] = campaign.CampaignId;
row[1] = data[0];
row[2] = false;
row[3] = submitDate;
dt.Rows.Add(row);
}
}
}
}
return dt;
}
}
public static String ProcessMastercodeSqlBulkCopy(DataTable dt)
{
string Feedback = string.Empty;
using (SqlConnection cn = new SqlConnection(ConfigurationManager.ConnectionStrings["DefaultConnection"].ConnectionString))
{
cn.Open();
using (SqlBulkCopy copy = new SqlBulkCopy(cn))
{
copy.ColumnMappings.Add(0, "CampaignId");
copy.ColumnMappings.Add(2, "Code");
copy.ColumnMappings.Add(3, "Used");
copy.ColumnMappings.Add(4, "SubmittedOn");
copy.DestinationTableName = "MasterCodes";
try
{
//Send it to the server
copy.WriteToServer(dt);
Feedback = "Master Code Upload completed successfully";
}
catch (Exception ex)
{
Feedback = ex.Message;
}
}
}
return Feedback;
}