如果 record/row 已存在于 csv 中,则替换或不追加
Replace or dont append if record/row already exists in csv
我有以下代码循环遍历目录中的 json 个文件并创建一个包含以下记录的 csv 文件:
results.csv
File Name Page Practice Name
fileXYZ.json 1 XYZ & Co
fileAB2.json 1 ABC & Co
file1.json 1 Associates & Co
但是,如果我停止执行并重新运行程序,会发生相同的记录再次插入到 csv 文件中,导致:
File Name Page Practice Name
fileXYZ.json 1 XYZ & Co
fileAB2.json 1 ABC & Co
file1.json 1 Associates & Co
fileXYZ.json 1 XYZ & Co
fileAB2.json 1 ABC & Co
file1.json 1 Associates & Co
我如何检查记录是否已经存在(即每个字段与插入的字段相同)并替换它(或者基本上不再追加它?)例如,如果我要 运行 程序再次出现,因为 file1.json 中发生了变化,而且目录中添加了一个新文件,新的 csv 应该如下所示:
results.csv:
File Name Page Practice Name
fileXYZ.json 1 XYZ & Co
fileAB2.json 1 ABC & Co
file1.json 1 Corpum & Co
file32.json 1 FirmA
代码:
using ChoETL;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
static void Main(string[] args)
{
//Output to CSV
foreach (var jsonFile in Directory.GetFiles(jsonFilesPath))
{
JsonToCsv(jsonFile, csvFilePath);
}
}
public static string fieldValue(IEnumerable<dynamic> lines, string nameOfField, bool throwException = false)
{
var skipped = lines.SkipWhile(l => l.text != nameOfField);
switch (throwException)
{
case true:
var enumerator = lines.GetEnumerator();
while (enumerator.MoveNext())
{
if (skipped.Count() == 0)
return skipped.Skip(1).First().text;
else
throw new InvalidDataException("Odd number of items found in IEnumerable<>");
}
break;
case false:
// Skip(#) to skip over the unnecessary Lines,
// such as "Account Information", preceding "Practice Name".
return skipped.Skip(1).First().text;
break;
default:
Console.WriteLine("Default case");
break;
}
// Returning null isn't recommended, but it does fix the error "not all code paths return a value"
return null;
}
public static void JsonToCsv(string jsonInputFile, string csvFile)
{
using (var p = new ChoJSONReader(jsonInputFile).WithJSONPath("$..readResults"))
{
using (var fs = new FileStream(csvFile, FileMode.Append, FileAccess.Write))
{
using (var writer = new ChoCSVWriter(fs))
{
writer.WithField("FileName", fieldName: "File Name")
.WithField("Page")
.WithField("PracticeName", fieldName: "Practice Name");
if (fs.Position == 0) // we don't need header if file already existed before
{
writer.WithFirstLineHeader();
}
// Limit the result to page 1 since the fields below only exist on the 1st page
writer.Write(p
.Where(r1 => r1.page == 1)
.Select(r1 =>
{
var lines = (dynamic[])r1.lines;
return new
{
FileName = jsonInputFile,
Page = r1.page,
PracticeName = //lines[6].text,
fieldValue(lines, "Practice Name"),
};
}
));
}
fs.Write(Environment.NewLine); // append new line carrier so we don't write to the same line when file reopened for writing
}
}
}
样本JSON文件
{
"status": "succeeded",
"createdDateTime": "2020-10-30T15:56:11Z",
"lastUpdatedDateTime": "2020-10-30T15:56:12Z",
"analyzeResult": {
"version": "3.0.0",
"readResults": [
{
"page": 1,
"angle": 0.086,
"width": 684,
"height": 272,
"unit": "pixel",
"lines": [
{
"boundingBox": [
7,
6,
196,
5,
196,
24,
7,
25
],
"text": "Account Information",
"words": [
{
"boundingBox": [
10,
7,
83,
7,
81,
24,
7,
26
],
"text": "Account",
"confidence": 0.981
},
{
"boundingBox": [
87,
7,
196,
6,
196,
24,
85,
24
],
"text": "Information",
"confidence": 0.939
}
]
},
{
"boundingBox": [
120,
56,
223,
57,
223,
70,
120,
70
],
"text": "Practice Name",
"words": [
{
"boundingBox": [
120,
57,
176,
57,
176,
70,
120,
71
],
"text": "Practice",
"confidence": 0.982
},
{
"boundingBox": [
179,
57,
222,
57,
222,
71,
179,
70
],
"text": "Name",
"confidence": 0.985
}
]
},
{
"boundingBox": [
236,
62,
390,
62,
390,
77,
236,
77
],
"text": "Some Practice Name",
"words": [
{
"boundingBox": [
236,
62,
277,
62,
277,
78,
236,
78
],
"text": "Some",
"confidence": 0.987
},
{
"boundingBox": [
280,
62,
340,
62,
341,
78,
280,
77
],
"text": "Practice",
"confidence": 0.984
},
{
"boundingBox": [
343,
62,
390,
62,
390,
78,
344,
78
],
"text": "Name",
"confidence": 0.987
}
]
},
{
"boundingBox": [
107,
102,
223,
102,
223,
115,
107,
115
],
"text": "Owner Full Name",
"words": [
{
"boundingBox": [
108,
103,
151,
102,
151,
116,
107,
116
],
"text": "Owner",
"confidence": 0.985
},
{
"boundingBox": [
154,
102,
177,
102,
176,
116,
153,
116
],
"text": "Full",
"confidence": 0.954
},
{
"boundingBox": [
180,
102,
224,
103,
223,
116,
179,
116
],
"text": "Name",
"confidence": 0.987
}
]
},
{
"boundingBox": [
237,
104,
298,
104,
298,
119,
237,
119
],
"text": "Bob Lee",
"words": [
{
"boundingBox": [
238,
104,
266,
104,
266,
119,
238,
120
],
"text": "Bob",
"confidence": 0.987
},
{
"boundingBox": [
269,
104,
298,
105,
298,
120,
269,
119
],
"text": "Lee",
"confidence": 0.987
}
]
},
{
"boundingBox": [
136,
147,
223,
147,
223,
160,
137,
161
],
"text": "Owner Email",
"words": [
{
"boundingBox": [
137,
148,
181,
147,
181,
161,
137,
162
],
"text": "Owner",
"confidence": 0.985
},
{
"boundingBox": [
184,
147,
224,
147,
224,
161,
184,
161
],
"text": "Email",
"confidence": 0.985
}
]
},
{
"boundingBox": [
239,
144,
361,
144,
361,
162,
239,
162
],
"text": "bob@gmail.com",
"words": [
{
"boundingBox": [
240,
145,
362,
146,
361,
163,
240,
163
],
"text": "bob@gmail.com",
"confidence": 0.974
}
]
},
{
"boundingBox": [
137,
193,
224,
193,
224,
208,
137,
208
],
"text": "Server Setup",
"words": [
{
"boundingBox": [
137,
194,
179,
194,
179,
208,
137,
208
],
"text": "Server",
"confidence": 0.985
},
{
"boundingBox": [
182,
194,
224,
194,
224,
209,
182,
208
],
"text": "Setup",
"confidence": 0.985
}
]
},
{
"boundingBox": [
276,
188,
340,
192,
339,
211,
275,
209
],
"text": "cloud",
"words": [
{
"boundingBox": [
297,
192,
339,
194,
339,
211,
297,
211
],
"text": "cloud",
"confidence": 0.933
}
]
},
{
"boundingBox": [
376,
187,
461,
191,
460,
212,
376,
211
],
"text": "Location",
"words": [
{
"boundingBox": [
394,
191,
460,
196,
459,
211,
394,
211
],
"text": "Location",
"confidence": 0.844
}
]
},
{
"boundingBox": [
500,
189,
666,
192,
665,
212,
499,
211
],
"text": "LIcentral (multi-location)",
"words": [
{
"boundingBox": [
501,
190,
567,
195,
567,
212,
500,
212
],
"text": "LIcentral",
"confidence": 0.665
},
{
"boundingBox": [
572,
195,
665,
195,
665,
212,
571,
212
],
"text": "(multi-location)",
"confidence": 0.899
}
]
},
{
"boundingBox": [
21,
238,
224,
238,
223,
255,
21,
253
],
"text": "Number of Locations Enrolling",
"words": [
{
"boundingBox": [
21,
239,
76,
239,
76,
253,
21,
253
],
"text": "Number",
"confidence": 0.985
},
{
"boundingBox": [
79,
239,
92,
239,
92,
253,
79,
253
],
"text": "of",
"confidence": 0.983
},
{
"boundingBox": [
95,
239,
161,
239,
161,
254,
95,
253
],
"text": "Locations",
"confidence": 0.981
},
{
"boundingBox": [
164,
239,
224,
239,
223,
256,
163,
254
],
"text": "Enrolling",
"confidence": 0.983
}
]
},
{
"boundingBox": [
273,
237,
289,
239,
288,
257,
272,
255
],
"text": "1",
"words": [
{
"boundingBox": [
278,
237,
290,
239,
287,
257,
276,
255
],
"text": "1",
"confidence": 0.981
}
]
},
{
"boundingBox": [
337,
239,
670,
239,
670,
253,
337,
252
],
"text": "*If more than 1 location, add info on the locations form",
"words": [
{
"boundingBox": [
338,
239,
347,
239,
347,
252,
338,
252
],
"text": "*If",
"confidence": 0.874
},
{
"boundingBox": [
350,
239,
384,
239,
384,
253,
350,
252
],
"text": "more",
"confidence": 0.983
},
{
"boundingBox": [
386,
239,
416,
239,
416,
253,
386,
253
],
"text": "than",
"confidence": 0.986
},
{
"boundingBox": [
419,
239,
422,
239,
422,
253,
419,
253
],
"text": "1",
"confidence": 0.635
},
{
"boundingBox": [
425,
239,
478,
239,
478,
253,
425,
253
],
"text": "location,",
"confidence": 0.955
},
{
"boundingBox": [
481,
239,
506,
239,
506,
253,
481,
253
],
"text": "add",
"confidence": 0.986
},
{
"boundingBox": [
509,
239,
533,
239,
533,
253,
509,
253
],
"text": "info",
"confidence": 0.981
},
{
"boundingBox": [
535,
239,
551,
239,
552,
253,
535,
253
],
"text": "on",
"confidence": 0.988
},
{
"boundingBox": [
554,
239,
574,
239,
575,
253,
554,
253
],
"text": "the",
"confidence": 0.987
},
{
"boundingBox": [
577,
239,
634,
239,
634,
253,
577,
253
],
"text": "locations",
"confidence": 0.973
},
{
"boundingBox": [
636,
239,
666,
240,
666,
253,
637,
253
],
"text": "form",
"confidence": 0.986
}
]
}
]
}
]
}
}
添加 Supun De Silva 答案的 spproach 2 后的 csv 屏幕截图:
仅供参考。您提供的示例文件不起作用,因为它在 var lines = (dynamic[])r1.lines;
失败
方法 1 - 重命名旧文件并创建一个新文件用于数据附加
1.引入新函数
private static void RenameIfExist(string csvFilePath)
{
if (File.Exists(csvFilePath))
{
System.IO.File.Move(csvFilePath, $"{csvFilePath}_{DateTime.Now.ToString("backup_yyyyMMdd_HHmmss")}");
}
}
2。调用移动函数并在新文件
中使用Create
模式
public static void JsonToCsv(string jsonInputFile, string csvFile)
{
using (var p = new ChoJSONReader(jsonInputFile).WithJSONPath("$..readResults"))
{
Program.RenameIfExist(csvFile);
using (var fs = new FileStream(csvFile, FileMode.Create, FileAccess.Write))
{
try
{
using (ChoCSVWriter<dynamic> writer = new ChoCSVWriter(fs)
.WithField("FileName", fieldName: "File Name")
.WithField("Page")
.WithField("PracticeName", fieldName: "Practice Name")
.WithFirstLineHeader())
{
// Limit the result to page 1 since the fields below only exist on the 1st page
writer.Write(p
.Where(r1 => r1.page == 1)
.Select(r1 =>
{
var lines = (dynamic[])r1.lines;
return new
{
FileName = jsonInputFile,
Page = r1.page,
PracticeName = fieldValue(lines, "Practice Name"),
};
}
));
}
}
catch(Exception e)
{
throw e;
}
}
}
}
方法 2 - 打开现有文件并使用数据创建查找结构
- 您可能需要稍微调整一下
1.声明新结构来存储密钥
private static Dictionary<string, bool> processedfileStates = new Dictionary<string, bool>();
2。预加载器功能
private static void LoadOldStatsIfExist(string csvFilePath)
{
if (File.Exists(csvFilePath))
{
using (var fs = new FileStream(csvFilePath, FileMode.Open, FileAccess.Read))
{
using (ChoCSVReader<dynamic> reader = new ChoCSVReader(fs).WithFirstLineHeader())
{
using (var dataReader = reader.AsDataReader())
{
while (dataReader.Read())
{
Program.processedfileStates.Add($"{dataReader[0].ToString()}_{dataReader[1].ToString()}_{dataReader[2].ToString()}", true);
}
}
}
}
}
}
3。 Json 到 CSV fcn
public static void JsonToCsv(string jsonInputFile, string csvFile)
{
using (var p = new ChoJSONReader(jsonInputFile).WithJSONPath("$..readResults"))
{
Program.LoadOldStatsIfExist(csvFile);
using (var fs = new FileStream(csvFile, Program.processedfileStates.Count == 0 ? FileMode.Create : FileMode.Append, FileAccess.Write))
{
if (Program.processedfileStates.Count != 0)
{
fs.Write(Environment.NewLine);
}
try
{
ChoCSVWriter<dynamic> writer = new ChoCSVWriter(fs);
if (Program.processedfileStates.Count == 0)
{
writer.WithFirstLineHeader();
}
using (writer
.WithField("FileName", fieldName: "File Name")
.WithField("Page")
.WithField("PracticeName", fieldName: "Practice Name")
)
{
if (Program.processedfileStates.Count == 0)
{
writer = writer.WithFirstLineHeader();
}
// Limit the result to page 1 since the fields below only exist on the 1st page
var data = p
.Where(r1 => r1.page == 1)
.Select(r1 =>
{
var lines = (dynamic[])r1.lines;
return new
{
FileName = jsonInputFile,
Page = r1.page,
PracticeName = fieldValue(lines, "Practice Name"),
};
}
).Where(de => !processedfileStates.ContainsKey($"{de.FileName.ToString()}_{de.Page.ToString()}_{de.PracticeName.ToString()}"));
writer.Write(data);
}
}
catch (Exception e)
{
throw e;
}
}
}
}
方法 2 - 重构
public class OPModel
{
public string FileName { get; set; }
public long Page { get; set; }
public string PracticeName { get; set; }
}
public class Program
{
const string jsonFilesPath = "D:\DevWork\C#\TempProject1\ConsoleApp1\data";
const string csvFilePath = "D:\DevWork\C#\TempProject1\ConsoleApp1\output\op.csv";
private static Dictionary<string, bool> processedfileStates = new Dictionary<string, bool>();
private static bool fileExisted = false;
private static void RenameIfExist(string csvFilePath)
{
if (File.Exists(csvFilePath))
{
System.IO.File.Move(csvFilePath, $"{csvFilePath}_{DateTime.Now.ToString("backup_yyyyMMdd_HHmmss")}");
}
}
private static void LoadOldStatsIfExist(string csvFilePath)
{
if (File.Exists(csvFilePath))
{
using (var fs = new FileStream(csvFilePath, FileMode.Open, FileAccess.Read))
{
using (ChoCSVReader<dynamic> reader = new ChoCSVReader(fs).WithFirstLineHeader())
{
using (var dTable = reader.AsDataTable())
{
foreach (DataRow row in dTable.Rows)
{
Program.processedfileStates.Add($"{row["File Name"].ToString()}_{row["Page"].ToString()}_{row["Practice Name"].ToString()}", true);
}
}
}
}
}
}
public static void Main(string[] args)
{
try
{
Program.fileExisted = File.Exists(csvFilePath);
Program.LoadOldStatsIfExist(csvFilePath);
List<OPModel> dataToWrite = new List<OPModel>();
// Persist each file to
foreach (var jsonFile in Directory.GetFiles(jsonFilesPath))
{
dataToWrite.AddRange(JsonToCsv(jsonFile));
}
if (dataToWrite.Count != 0)
{
using (var fs = new FileStream(csvFilePath, !Program.fileExisted ? FileMode.Create : FileMode.Append, FileAccess.Write))
{
try
{
ChoCSVWriter<OPModel> writer = new ChoCSVWriter<OPModel>(fs);
using (writer.WithField("FileName", fieldName: "File Name").WithField("Page").WithField("PracticeName", fieldName: "Practice Name"))
{
if (!Program.fileExisted)
{
writer = writer.WithFirstLineHeader();
}
writer.Write(dataToWrite);
}
fs.Write(Environment.NewLine);
}
catch (Exception e)
{
throw e;
}
}
}
//Output to CSV
Console.ReadKey();
}
catch (Exception ex)
{
}
}
public static string fieldValue(IEnumerable<dynamic> lines, string nameOfField, bool throwException = false)
{
var skipped = lines.SkipWhile(l => l.text != nameOfField);
switch (throwException)
{
case true:
var enumerator = lines.GetEnumerator();
while (enumerator.MoveNext())
{
if (enumerator.MoveNext())
return skipped.Skip(1).First().text;
else
throw new InvalidDataException("Odd number of items found in IEnumerable<>");
}
break;
case false:
// Skip(#) to skip over the unnecessary Lines,
// such as "Account Information", preceding "Practice Name".
return skipped.Skip(1).First().text;
default:
Console.WriteLine("Default case");
break;
}
// Returning null isn't recommended, but it does fix the error "not all code paths return a value"
return null;
}
public static List<OPModel> JsonToCsv(string jsonInputFile)
{
using (var reader = new ChoJSONReader(jsonInputFile).WithJSONPath("$..readResults"))
{
var data = reader.Where(r1 => r1.page == 1)
.Select(r1 =>
{
var lines = (dynamic[])r1.lines;
return new OPModel
{
FileName = jsonInputFile,
Page = r1.page,
PracticeName = Program.fieldValue(lines, "Practice Name")
};
}
).Where(de => !processedfileStates.ContainsKey($"{de.FileName.ToString()}_{de.Page.ToString()}_{de.PracticeName.ToString()}")).ToList();
return data;
}
}
}
我有以下代码循环遍历目录中的 json 个文件并创建一个包含以下记录的 csv 文件:
results.csv
File Name Page Practice Name
fileXYZ.json 1 XYZ & Co
fileAB2.json 1 ABC & Co
file1.json 1 Associates & Co
但是,如果我停止执行并重新运行程序,会发生相同的记录再次插入到 csv 文件中,导致:
File Name Page Practice Name
fileXYZ.json 1 XYZ & Co
fileAB2.json 1 ABC & Co
file1.json 1 Associates & Co
fileXYZ.json 1 XYZ & Co
fileAB2.json 1 ABC & Co
file1.json 1 Associates & Co
我如何检查记录是否已经存在(即每个字段与插入的字段相同)并替换它(或者基本上不再追加它?)例如,如果我要 运行 程序再次出现,因为 file1.json 中发生了变化,而且目录中添加了一个新文件,新的 csv 应该如下所示:
results.csv:
File Name Page Practice Name
fileXYZ.json 1 XYZ & Co
fileAB2.json 1 ABC & Co
file1.json 1 Corpum & Co
file32.json 1 FirmA
代码:
using ChoETL;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
static void Main(string[] args)
{
//Output to CSV
foreach (var jsonFile in Directory.GetFiles(jsonFilesPath))
{
JsonToCsv(jsonFile, csvFilePath);
}
}
public static string fieldValue(IEnumerable<dynamic> lines, string nameOfField, bool throwException = false)
{
var skipped = lines.SkipWhile(l => l.text != nameOfField);
switch (throwException)
{
case true:
var enumerator = lines.GetEnumerator();
while (enumerator.MoveNext())
{
if (skipped.Count() == 0)
return skipped.Skip(1).First().text;
else
throw new InvalidDataException("Odd number of items found in IEnumerable<>");
}
break;
case false:
// Skip(#) to skip over the unnecessary Lines,
// such as "Account Information", preceding "Practice Name".
return skipped.Skip(1).First().text;
break;
default:
Console.WriteLine("Default case");
break;
}
// Returning null isn't recommended, but it does fix the error "not all code paths return a value"
return null;
}
public static void JsonToCsv(string jsonInputFile, string csvFile)
{
using (var p = new ChoJSONReader(jsonInputFile).WithJSONPath("$..readResults"))
{
using (var fs = new FileStream(csvFile, FileMode.Append, FileAccess.Write))
{
using (var writer = new ChoCSVWriter(fs))
{
writer.WithField("FileName", fieldName: "File Name")
.WithField("Page")
.WithField("PracticeName", fieldName: "Practice Name");
if (fs.Position == 0) // we don't need header if file already existed before
{
writer.WithFirstLineHeader();
}
// Limit the result to page 1 since the fields below only exist on the 1st page
writer.Write(p
.Where(r1 => r1.page == 1)
.Select(r1 =>
{
var lines = (dynamic[])r1.lines;
return new
{
FileName = jsonInputFile,
Page = r1.page,
PracticeName = //lines[6].text,
fieldValue(lines, "Practice Name"),
};
}
));
}
fs.Write(Environment.NewLine); // append new line carrier so we don't write to the same line when file reopened for writing
}
}
}
样本JSON文件
{
"status": "succeeded",
"createdDateTime": "2020-10-30T15:56:11Z",
"lastUpdatedDateTime": "2020-10-30T15:56:12Z",
"analyzeResult": {
"version": "3.0.0",
"readResults": [
{
"page": 1,
"angle": 0.086,
"width": 684,
"height": 272,
"unit": "pixel",
"lines": [
{
"boundingBox": [
7,
6,
196,
5,
196,
24,
7,
25
],
"text": "Account Information",
"words": [
{
"boundingBox": [
10,
7,
83,
7,
81,
24,
7,
26
],
"text": "Account",
"confidence": 0.981
},
{
"boundingBox": [
87,
7,
196,
6,
196,
24,
85,
24
],
"text": "Information",
"confidence": 0.939
}
]
},
{
"boundingBox": [
120,
56,
223,
57,
223,
70,
120,
70
],
"text": "Practice Name",
"words": [
{
"boundingBox": [
120,
57,
176,
57,
176,
70,
120,
71
],
"text": "Practice",
"confidence": 0.982
},
{
"boundingBox": [
179,
57,
222,
57,
222,
71,
179,
70
],
"text": "Name",
"confidence": 0.985
}
]
},
{
"boundingBox": [
236,
62,
390,
62,
390,
77,
236,
77
],
"text": "Some Practice Name",
"words": [
{
"boundingBox": [
236,
62,
277,
62,
277,
78,
236,
78
],
"text": "Some",
"confidence": 0.987
},
{
"boundingBox": [
280,
62,
340,
62,
341,
78,
280,
77
],
"text": "Practice",
"confidence": 0.984
},
{
"boundingBox": [
343,
62,
390,
62,
390,
78,
344,
78
],
"text": "Name",
"confidence": 0.987
}
]
},
{
"boundingBox": [
107,
102,
223,
102,
223,
115,
107,
115
],
"text": "Owner Full Name",
"words": [
{
"boundingBox": [
108,
103,
151,
102,
151,
116,
107,
116
],
"text": "Owner",
"confidence": 0.985
},
{
"boundingBox": [
154,
102,
177,
102,
176,
116,
153,
116
],
"text": "Full",
"confidence": 0.954
},
{
"boundingBox": [
180,
102,
224,
103,
223,
116,
179,
116
],
"text": "Name",
"confidence": 0.987
}
]
},
{
"boundingBox": [
237,
104,
298,
104,
298,
119,
237,
119
],
"text": "Bob Lee",
"words": [
{
"boundingBox": [
238,
104,
266,
104,
266,
119,
238,
120
],
"text": "Bob",
"confidence": 0.987
},
{
"boundingBox": [
269,
104,
298,
105,
298,
120,
269,
119
],
"text": "Lee",
"confidence": 0.987
}
]
},
{
"boundingBox": [
136,
147,
223,
147,
223,
160,
137,
161
],
"text": "Owner Email",
"words": [
{
"boundingBox": [
137,
148,
181,
147,
181,
161,
137,
162
],
"text": "Owner",
"confidence": 0.985
},
{
"boundingBox": [
184,
147,
224,
147,
224,
161,
184,
161
],
"text": "Email",
"confidence": 0.985
}
]
},
{
"boundingBox": [
239,
144,
361,
144,
361,
162,
239,
162
],
"text": "bob@gmail.com",
"words": [
{
"boundingBox": [
240,
145,
362,
146,
361,
163,
240,
163
],
"text": "bob@gmail.com",
"confidence": 0.974
}
]
},
{
"boundingBox": [
137,
193,
224,
193,
224,
208,
137,
208
],
"text": "Server Setup",
"words": [
{
"boundingBox": [
137,
194,
179,
194,
179,
208,
137,
208
],
"text": "Server",
"confidence": 0.985
},
{
"boundingBox": [
182,
194,
224,
194,
224,
209,
182,
208
],
"text": "Setup",
"confidence": 0.985
}
]
},
{
"boundingBox": [
276,
188,
340,
192,
339,
211,
275,
209
],
"text": "cloud",
"words": [
{
"boundingBox": [
297,
192,
339,
194,
339,
211,
297,
211
],
"text": "cloud",
"confidence": 0.933
}
]
},
{
"boundingBox": [
376,
187,
461,
191,
460,
212,
376,
211
],
"text": "Location",
"words": [
{
"boundingBox": [
394,
191,
460,
196,
459,
211,
394,
211
],
"text": "Location",
"confidence": 0.844
}
]
},
{
"boundingBox": [
500,
189,
666,
192,
665,
212,
499,
211
],
"text": "LIcentral (multi-location)",
"words": [
{
"boundingBox": [
501,
190,
567,
195,
567,
212,
500,
212
],
"text": "LIcentral",
"confidence": 0.665
},
{
"boundingBox": [
572,
195,
665,
195,
665,
212,
571,
212
],
"text": "(multi-location)",
"confidence": 0.899
}
]
},
{
"boundingBox": [
21,
238,
224,
238,
223,
255,
21,
253
],
"text": "Number of Locations Enrolling",
"words": [
{
"boundingBox": [
21,
239,
76,
239,
76,
253,
21,
253
],
"text": "Number",
"confidence": 0.985
},
{
"boundingBox": [
79,
239,
92,
239,
92,
253,
79,
253
],
"text": "of",
"confidence": 0.983
},
{
"boundingBox": [
95,
239,
161,
239,
161,
254,
95,
253
],
"text": "Locations",
"confidence": 0.981
},
{
"boundingBox": [
164,
239,
224,
239,
223,
256,
163,
254
],
"text": "Enrolling",
"confidence": 0.983
}
]
},
{
"boundingBox": [
273,
237,
289,
239,
288,
257,
272,
255
],
"text": "1",
"words": [
{
"boundingBox": [
278,
237,
290,
239,
287,
257,
276,
255
],
"text": "1",
"confidence": 0.981
}
]
},
{
"boundingBox": [
337,
239,
670,
239,
670,
253,
337,
252
],
"text": "*If more than 1 location, add info on the locations form",
"words": [
{
"boundingBox": [
338,
239,
347,
239,
347,
252,
338,
252
],
"text": "*If",
"confidence": 0.874
},
{
"boundingBox": [
350,
239,
384,
239,
384,
253,
350,
252
],
"text": "more",
"confidence": 0.983
},
{
"boundingBox": [
386,
239,
416,
239,
416,
253,
386,
253
],
"text": "than",
"confidence": 0.986
},
{
"boundingBox": [
419,
239,
422,
239,
422,
253,
419,
253
],
"text": "1",
"confidence": 0.635
},
{
"boundingBox": [
425,
239,
478,
239,
478,
253,
425,
253
],
"text": "location,",
"confidence": 0.955
},
{
"boundingBox": [
481,
239,
506,
239,
506,
253,
481,
253
],
"text": "add",
"confidence": 0.986
},
{
"boundingBox": [
509,
239,
533,
239,
533,
253,
509,
253
],
"text": "info",
"confidence": 0.981
},
{
"boundingBox": [
535,
239,
551,
239,
552,
253,
535,
253
],
"text": "on",
"confidence": 0.988
},
{
"boundingBox": [
554,
239,
574,
239,
575,
253,
554,
253
],
"text": "the",
"confidence": 0.987
},
{
"boundingBox": [
577,
239,
634,
239,
634,
253,
577,
253
],
"text": "locations",
"confidence": 0.973
},
{
"boundingBox": [
636,
239,
666,
240,
666,
253,
637,
253
],
"text": "form",
"confidence": 0.986
}
]
}
]
}
]
}
}
添加 Supun De Silva 答案的 spproach 2 后的 csv 屏幕截图:
仅供参考。您提供的示例文件不起作用,因为它在 var lines = (dynamic[])r1.lines;
方法 1 - 重命名旧文件并创建一个新文件用于数据附加
1.引入新函数
private static void RenameIfExist(string csvFilePath)
{
if (File.Exists(csvFilePath))
{
System.IO.File.Move(csvFilePath, $"{csvFilePath}_{DateTime.Now.ToString("backup_yyyyMMdd_HHmmss")}");
}
}
2。调用移动函数并在新文件
中使用Create
模式
public static void JsonToCsv(string jsonInputFile, string csvFile)
{
using (var p = new ChoJSONReader(jsonInputFile).WithJSONPath("$..readResults"))
{
Program.RenameIfExist(csvFile);
using (var fs = new FileStream(csvFile, FileMode.Create, FileAccess.Write))
{
try
{
using (ChoCSVWriter<dynamic> writer = new ChoCSVWriter(fs)
.WithField("FileName", fieldName: "File Name")
.WithField("Page")
.WithField("PracticeName", fieldName: "Practice Name")
.WithFirstLineHeader())
{
// Limit the result to page 1 since the fields below only exist on the 1st page
writer.Write(p
.Where(r1 => r1.page == 1)
.Select(r1 =>
{
var lines = (dynamic[])r1.lines;
return new
{
FileName = jsonInputFile,
Page = r1.page,
PracticeName = fieldValue(lines, "Practice Name"),
};
}
));
}
}
catch(Exception e)
{
throw e;
}
}
}
}
方法 2 - 打开现有文件并使用数据创建查找结构
- 您可能需要稍微调整一下
1.声明新结构来存储密钥
private static Dictionary<string, bool> processedfileStates = new Dictionary<string, bool>();
2。预加载器功能
private static void LoadOldStatsIfExist(string csvFilePath)
{
if (File.Exists(csvFilePath))
{
using (var fs = new FileStream(csvFilePath, FileMode.Open, FileAccess.Read))
{
using (ChoCSVReader<dynamic> reader = new ChoCSVReader(fs).WithFirstLineHeader())
{
using (var dataReader = reader.AsDataReader())
{
while (dataReader.Read())
{
Program.processedfileStates.Add($"{dataReader[0].ToString()}_{dataReader[1].ToString()}_{dataReader[2].ToString()}", true);
}
}
}
}
}
}
3。 Json 到 CSV fcn
public static void JsonToCsv(string jsonInputFile, string csvFile)
{
using (var p = new ChoJSONReader(jsonInputFile).WithJSONPath("$..readResults"))
{
Program.LoadOldStatsIfExist(csvFile);
using (var fs = new FileStream(csvFile, Program.processedfileStates.Count == 0 ? FileMode.Create : FileMode.Append, FileAccess.Write))
{
if (Program.processedfileStates.Count != 0)
{
fs.Write(Environment.NewLine);
}
try
{
ChoCSVWriter<dynamic> writer = new ChoCSVWriter(fs);
if (Program.processedfileStates.Count == 0)
{
writer.WithFirstLineHeader();
}
using (writer
.WithField("FileName", fieldName: "File Name")
.WithField("Page")
.WithField("PracticeName", fieldName: "Practice Name")
)
{
if (Program.processedfileStates.Count == 0)
{
writer = writer.WithFirstLineHeader();
}
// Limit the result to page 1 since the fields below only exist on the 1st page
var data = p
.Where(r1 => r1.page == 1)
.Select(r1 =>
{
var lines = (dynamic[])r1.lines;
return new
{
FileName = jsonInputFile,
Page = r1.page,
PracticeName = fieldValue(lines, "Practice Name"),
};
}
).Where(de => !processedfileStates.ContainsKey($"{de.FileName.ToString()}_{de.Page.ToString()}_{de.PracticeName.ToString()}"));
writer.Write(data);
}
}
catch (Exception e)
{
throw e;
}
}
}
}
方法 2 - 重构
public class OPModel
{
public string FileName { get; set; }
public long Page { get; set; }
public string PracticeName { get; set; }
}
public class Program
{
const string jsonFilesPath = "D:\DevWork\C#\TempProject1\ConsoleApp1\data";
const string csvFilePath = "D:\DevWork\C#\TempProject1\ConsoleApp1\output\op.csv";
private static Dictionary<string, bool> processedfileStates = new Dictionary<string, bool>();
private static bool fileExisted = false;
private static void RenameIfExist(string csvFilePath)
{
if (File.Exists(csvFilePath))
{
System.IO.File.Move(csvFilePath, $"{csvFilePath}_{DateTime.Now.ToString("backup_yyyyMMdd_HHmmss")}");
}
}
private static void LoadOldStatsIfExist(string csvFilePath)
{
if (File.Exists(csvFilePath))
{
using (var fs = new FileStream(csvFilePath, FileMode.Open, FileAccess.Read))
{
using (ChoCSVReader<dynamic> reader = new ChoCSVReader(fs).WithFirstLineHeader())
{
using (var dTable = reader.AsDataTable())
{
foreach (DataRow row in dTable.Rows)
{
Program.processedfileStates.Add($"{row["File Name"].ToString()}_{row["Page"].ToString()}_{row["Practice Name"].ToString()}", true);
}
}
}
}
}
}
public static void Main(string[] args)
{
try
{
Program.fileExisted = File.Exists(csvFilePath);
Program.LoadOldStatsIfExist(csvFilePath);
List<OPModel> dataToWrite = new List<OPModel>();
// Persist each file to
foreach (var jsonFile in Directory.GetFiles(jsonFilesPath))
{
dataToWrite.AddRange(JsonToCsv(jsonFile));
}
if (dataToWrite.Count != 0)
{
using (var fs = new FileStream(csvFilePath, !Program.fileExisted ? FileMode.Create : FileMode.Append, FileAccess.Write))
{
try
{
ChoCSVWriter<OPModel> writer = new ChoCSVWriter<OPModel>(fs);
using (writer.WithField("FileName", fieldName: "File Name").WithField("Page").WithField("PracticeName", fieldName: "Practice Name"))
{
if (!Program.fileExisted)
{
writer = writer.WithFirstLineHeader();
}
writer.Write(dataToWrite);
}
fs.Write(Environment.NewLine);
}
catch (Exception e)
{
throw e;
}
}
}
//Output to CSV
Console.ReadKey();
}
catch (Exception ex)
{
}
}
public static string fieldValue(IEnumerable<dynamic> lines, string nameOfField, bool throwException = false)
{
var skipped = lines.SkipWhile(l => l.text != nameOfField);
switch (throwException)
{
case true:
var enumerator = lines.GetEnumerator();
while (enumerator.MoveNext())
{
if (enumerator.MoveNext())
return skipped.Skip(1).First().text;
else
throw new InvalidDataException("Odd number of items found in IEnumerable<>");
}
break;
case false:
// Skip(#) to skip over the unnecessary Lines,
// such as "Account Information", preceding "Practice Name".
return skipped.Skip(1).First().text;
default:
Console.WriteLine("Default case");
break;
}
// Returning null isn't recommended, but it does fix the error "not all code paths return a value"
return null;
}
public static List<OPModel> JsonToCsv(string jsonInputFile)
{
using (var reader = new ChoJSONReader(jsonInputFile).WithJSONPath("$..readResults"))
{
var data = reader.Where(r1 => r1.page == 1)
.Select(r1 =>
{
var lines = (dynamic[])r1.lines;
return new OPModel
{
FileName = jsonInputFile,
Page = r1.page,
PracticeName = Program.fieldValue(lines, "Practice Name")
};
}
).Where(de => !processedfileStates.ContainsKey($"{de.FileName.ToString()}_{de.Page.ToString()}_{de.PracticeName.ToString()}")).ToList();
return data;
}
}
}