使用 CsvHelper 同时从 zip 存档读取多个文件
Reading multiple files from zip archive with CsvHelper simultaneously
我正在研究将下载一个 zip 存档,然后将所述存档中的每个文件读取到列表中的东西。当我同步编程时它成功了,但需要永远。
我决定尝试使用任务来读取不同线程上的每个文件。当我这样做时,我得到以下信息:
End of Central Directory record could not be found
这是我为处理下载和提取而写的class:
public class GtfsFileDownloader
{
public string FileLocation { get; set; }
public string FileName { get; set; }
public MemoryStream ZipStream { get; set; }
public GtfsFileDownloader(string loc, string nm)
{
FileLocation = loc;
FileName = nm;
}
public void DownloadZip()
{
ZipStream = new MemoryStream(new WebClient().DownloadData(FileLocation + FileName));
}
public List<T> GetFileContents<T, Q>(string fileName) where Q: ClassMap
{
var retList = new List<T>();
var entry = new ZipArchive(ZipStream).Entries.SingleOrDefault(x => x.FullName == fileName);
if(entry != null)
{
using (var reader = new StreamReader(entry.Open()))
{
using (var csv = new CsvReader(reader))
{
csv.Configuration.HeaderValidated = null;
csv.Configuration.MissingFieldFound = null;
csv.Configuration.RegisterClassMap<Q>();
try
{
retList = csv.GetRecords<T>().ToList();
}
catch(CsvHelperException ex)
{
throw new System.Exception(ex.Message);
}
}
}
}
return retList;
}
}
这是主要代码:
var downloader = new GtfsFileDownloader(agency.GtfsZipUrlDirectory, agency.GtfsZipUrlFileName);
downloader.DownloadZip();
var agencyInfo = new List<DbAgency>();
var stopInfo = new List<DbStop>();
var routeInfo = new List<DbRoute>();
var tripInfo = new List<DbTrip>();
var stopTimeInfo = new List<DbStopTime>();
var calendarInfo = new List<DbCalendar>();
var fareAttributeInfo = new List<DbFareAttribute>();
var shapeInfo = new List<DbShape>();
var frequencyInfo = new List<DbFrequency>();
var transferInfo = new List<DbTransfer>();
var pathwayInfo = new List<DbPathway>();
var levelInfo = new List<DbLevel>();
var feedInfoInfo = new List<DbFeedInfo>();
var tasks = new List<Task>();
tasks.Add(new Task(() => { agencyInfo = downloader.GetFileContents<DbAgency, AgencyMap>("agencies.txt"); }));
tasks.Add(new Task(() => { stopInfo = downloader.GetFileContents<DbStop, StopMap>("stops.txt"); }));
tasks.Add(new Task(() => { routeInfo = downloader.GetFileContents<DbRoute, RouteMap>("routes.txt"); }));
tasks.Add(new Task(() => { tripInfo = downloader.GetFileContents<DbTrip, TripMap>("trips.txt"); }));
tasks.Add(new Task(() => { stopTimeInfo = downloader.GetFileContents<DbStopTime, StopTimeMap>("stop_times.txt"); }));
tasks.Add(new Task(() => { calendarInfo = downloader.GetFileContents<DbCalendar, CalendarMap>("calendar.txt"); }));
tasks.Add(new Task(() => { fareAttributeInfo = downloader.GetFileContents<DbFareAttribute, FareAttributeMap>("fare_attributes.txt"); }));
tasks.Add(new Task(() => { shapeInfo = downloader.GetFileContents<DbShape, ShapeMap>("shapes.txt"); }));
tasks.Add(new Task(() => { frequencyInfo = downloader.GetFileContents<DbFrequency, FrequencyMap>("frequencies.txt"); }));
tasks.Add(new Task(() => { transferInfo = downloader.GetFileContents<DbTransfer, TransferMap>("transfers.txt"); }));
tasks.Add(new Task(() => { pathwayInfo = downloader.GetFileContents<DbPathway, PathwayMap>("pathways.txt"); }));
tasks.Add(new Task(() => { levelInfo = downloader.GetFileContents<DbLevel, LevelMap>("levels.txt"); }));
tasks.Add(new Task(() => { feedInfoInfo = downloader.GetFileContents<DbFeedInfo, FeedInfoMap>("feed_info.txt"); }));
foreach(Task t in tasks)
{
t.Start();
}
Task.WaitAll(tasks.ToArray());
我假设我在多线程方面做错了什么(我在多线程方面不太有经验)。就像我提到的,如果我取出 Task 的东西并且 运行 它是单线程的,它不会抛出上面的错误。
尝试根据 Task
:
制作 MemoryStream
public class GtfsFileDownloader
{
public string FileLocation { get; set; }
public string FileName { get; set; }
public byte[] ZipBytes { get; set; }
public GtfsFileDownloader(string loc, string nm)
{
FileLocation = loc;
FileName = nm;
}
public void DownloadZip()
{
ZipBytes = new WebClient().DownloadData(FileLocation + FileName);
}
public List<T> GetFileContents<T, Q>(string fileName) where Q: ClassMap
{
var retList = new List<T>();
using (var ZipStream = new MemoryStream(ZipBytes)) {
var entry = new ZipArchive(ZipStream).Entries.SingleOrDefault(x => x.FullName == fileName);
if(entry != null)
{
using (var reader = new StreamReader(entry.Open()))
{
using (var csv = new CsvReader(reader))
{
csv.Configuration.HeaderValidated = null;
csv.Configuration.MissingFieldFound = null;
csv.Configuration.RegisterClassMap<Q>();
try
{
retList = csv.GetRecords<T>().ToList();
}
catch(CsvHelperException ex)
{
throw new System.Exception(ex.Message);
}
}
}
}
}
return retList;
}
}
我正在研究将下载一个 zip 存档,然后将所述存档中的每个文件读取到列表中的东西。当我同步编程时它成功了,但需要永远。
我决定尝试使用任务来读取不同线程上的每个文件。当我这样做时,我得到以下信息:
End of Central Directory record could not be found
这是我为处理下载和提取而写的class:
public class GtfsFileDownloader
{
public string FileLocation { get; set; }
public string FileName { get; set; }
public MemoryStream ZipStream { get; set; }
public GtfsFileDownloader(string loc, string nm)
{
FileLocation = loc;
FileName = nm;
}
public void DownloadZip()
{
ZipStream = new MemoryStream(new WebClient().DownloadData(FileLocation + FileName));
}
public List<T> GetFileContents<T, Q>(string fileName) where Q: ClassMap
{
var retList = new List<T>();
var entry = new ZipArchive(ZipStream).Entries.SingleOrDefault(x => x.FullName == fileName);
if(entry != null)
{
using (var reader = new StreamReader(entry.Open()))
{
using (var csv = new CsvReader(reader))
{
csv.Configuration.HeaderValidated = null;
csv.Configuration.MissingFieldFound = null;
csv.Configuration.RegisterClassMap<Q>();
try
{
retList = csv.GetRecords<T>().ToList();
}
catch(CsvHelperException ex)
{
throw new System.Exception(ex.Message);
}
}
}
}
return retList;
}
}
这是主要代码:
var downloader = new GtfsFileDownloader(agency.GtfsZipUrlDirectory, agency.GtfsZipUrlFileName);
downloader.DownloadZip();
var agencyInfo = new List<DbAgency>();
var stopInfo = new List<DbStop>();
var routeInfo = new List<DbRoute>();
var tripInfo = new List<DbTrip>();
var stopTimeInfo = new List<DbStopTime>();
var calendarInfo = new List<DbCalendar>();
var fareAttributeInfo = new List<DbFareAttribute>();
var shapeInfo = new List<DbShape>();
var frequencyInfo = new List<DbFrequency>();
var transferInfo = new List<DbTransfer>();
var pathwayInfo = new List<DbPathway>();
var levelInfo = new List<DbLevel>();
var feedInfoInfo = new List<DbFeedInfo>();
var tasks = new List<Task>();
tasks.Add(new Task(() => { agencyInfo = downloader.GetFileContents<DbAgency, AgencyMap>("agencies.txt"); }));
tasks.Add(new Task(() => { stopInfo = downloader.GetFileContents<DbStop, StopMap>("stops.txt"); }));
tasks.Add(new Task(() => { routeInfo = downloader.GetFileContents<DbRoute, RouteMap>("routes.txt"); }));
tasks.Add(new Task(() => { tripInfo = downloader.GetFileContents<DbTrip, TripMap>("trips.txt"); }));
tasks.Add(new Task(() => { stopTimeInfo = downloader.GetFileContents<DbStopTime, StopTimeMap>("stop_times.txt"); }));
tasks.Add(new Task(() => { calendarInfo = downloader.GetFileContents<DbCalendar, CalendarMap>("calendar.txt"); }));
tasks.Add(new Task(() => { fareAttributeInfo = downloader.GetFileContents<DbFareAttribute, FareAttributeMap>("fare_attributes.txt"); }));
tasks.Add(new Task(() => { shapeInfo = downloader.GetFileContents<DbShape, ShapeMap>("shapes.txt"); }));
tasks.Add(new Task(() => { frequencyInfo = downloader.GetFileContents<DbFrequency, FrequencyMap>("frequencies.txt"); }));
tasks.Add(new Task(() => { transferInfo = downloader.GetFileContents<DbTransfer, TransferMap>("transfers.txt"); }));
tasks.Add(new Task(() => { pathwayInfo = downloader.GetFileContents<DbPathway, PathwayMap>("pathways.txt"); }));
tasks.Add(new Task(() => { levelInfo = downloader.GetFileContents<DbLevel, LevelMap>("levels.txt"); }));
tasks.Add(new Task(() => { feedInfoInfo = downloader.GetFileContents<DbFeedInfo, FeedInfoMap>("feed_info.txt"); }));
foreach(Task t in tasks)
{
t.Start();
}
Task.WaitAll(tasks.ToArray());
我假设我在多线程方面做错了什么(我在多线程方面不太有经验)。就像我提到的,如果我取出 Task 的东西并且 运行 它是单线程的,它不会抛出上面的错误。
尝试根据 Task
:
MemoryStream
public class GtfsFileDownloader
{
public string FileLocation { get; set; }
public string FileName { get; set; }
public byte[] ZipBytes { get; set; }
public GtfsFileDownloader(string loc, string nm)
{
FileLocation = loc;
FileName = nm;
}
public void DownloadZip()
{
ZipBytes = new WebClient().DownloadData(FileLocation + FileName);
}
public List<T> GetFileContents<T, Q>(string fileName) where Q: ClassMap
{
var retList = new List<T>();
using (var ZipStream = new MemoryStream(ZipBytes)) {
var entry = new ZipArchive(ZipStream).Entries.SingleOrDefault(x => x.FullName == fileName);
if(entry != null)
{
using (var reader = new StreamReader(entry.Open()))
{
using (var csv = new CsvReader(reader))
{
csv.Configuration.HeaderValidated = null;
csv.Configuration.MissingFieldFound = null;
csv.Configuration.RegisterClassMap<Q>();
try
{
retList = csv.GetRecords<T>().ToList();
}
catch(CsvHelperException ex)
{
throw new System.Exception(ex.Message);
}
}
}
}
}
return retList;
}
}