ChoETL 嵌套 JSON 到 CSV
ChoETL nested JSON to CSV
我需要将 json 转换为 csv。问题是我不能 select 在嵌套 json 结构中我需要的一切。 json 文件示例:
{
"system": {
"created": "2021-08-01T13:33:37.123Z",
"by": "web"
},
"location": {
"id": 100,
"country": "DE"
},
"order": [
{
"OrderID": 22,
"OrderName": "Soda",
"OrderArticles": [
{
"Size": 33,
"ProductName": "Coke",
"ProductId": "999"
},
{
"Size": 66,
"ProductName": "Fanta",
"ProductId": "888"
},
{
"Size": 50,
"ProductName": "Pepsi",
"ProductId": "444"
}
],
"ProcessedId": 1001,
"Date": "2021-08-02"
},
{
"OrderID": 23,
"OrderName": "Beverage",
"OrderArticles": [
{
"Size": 44,
"ProductName": "Coke",
"ProductId": "999"
}
],
"ProcessedId": 1002,
"Date": "2021-08-03"
}
]
}
这是我想要的输出:
created;by;id;country;OrderID;OrderName;Size;ProductName;ProductId
2021-08-01T13:33:37.123Z;web;100;DE;22;Soda;33;Coke;999
2021-08-01T13:33:37.123Z;web;100;DE;22;Soda;66;Fanta;888
2021-08-01T13:33:37.123Z;web;100;DE;22;Soda;50;Pepsi;444
2021-08-01T13:33:37.123Z;web;100;DE;23;Beverage;44;Coke;999
我可以自己获取 created 和 by 值以及 OrderArticles 的值。我只是不知道如何让他们在一起。这是我用来获取结果但分为 2 个不同结果的代码:
using (var r = new ChoJSONReader(inBlob).WithJSONPath("$..order[*]").AllowComplexJSONPath(true))
{
return (r.SelectMany(r1 => ((dynamic[])r1.OutputArticles).Select(r2 => new
{
r1.OrderID,
r1.OrderName,
r1.Size,
r1.ProductName,
r1.ProductId
})));
}
using (var r = new ChoJSONReader(inBlob).WithJSONPath("$").AllowComplexJSONPath(true))
{
return (r.Select(r1 => new
{
r1.system.created,
r1.system.by
}));
}
这是我的解决方案。
这是我的数据模型:
using System.Text.Json.Serialization;
namespace JsonToCSV.Models;
// Root myDeserializedClass = JsonSerializer.Deserialize<Root>(myJsonResponse);
public class System
{
[JsonPropertyName("created")]
public string Created { get; set; }
[JsonPropertyName("by")]
public string By { get; set; }
}
public class Location
{
[JsonPropertyName("id")]
public int Id { get; set; }
[JsonPropertyName("country")]
public string Country { get; set; }
}
public class OrderArticle
{
[JsonPropertyName("Size")]
public int Size { get; set; }
[JsonPropertyName("ProductName")]
public string ProductName { get; set; }
[JsonPropertyName("ProductId")]
public string ProductId { get; set; }
}
public class Order
{
[JsonPropertyName("OrderID")]
public int OrderID { get; set; }
[JsonPropertyName("OrderName")]
public string OrderName { get; set; }
[JsonPropertyName("OrderArticles")]
public List<OrderArticle> OrderArticles { get; set; }
[JsonPropertyName("ProcessedId")]
public int ProcessedId { get; set; }
[JsonPropertyName("Date")]
public string Date { get; set; }
}
public class Root
{
[JsonPropertyName("system")]
public System System { get; set; }
[JsonPropertyName("location")]
public Location Location { get; set; }
[JsonPropertyName("order")]
public List<Order> Orders { get; set; }
}
这是业务逻辑(如果你愿意,我可以用 LINQ 替换它):
using System.Text.Json;
using JsonToCSV.Models;
var dataAsText = File.ReadAllText("data.json");
var data = JsonSerializer.Deserialize<Root>(dataAsText);
var csv = new List<string> { "created;by;id;country;OrderID;OrderName;Size;ProductName;ProductId" };
foreach (var order in data.Orders)
{
foreach (var orderArticle in order.OrderArticles)
{
csv.Add(String.Format("{0};{1};{2};{3};{4};{5};{6};{7};{8}",
data.System.Created,
data.System.By,
data.Location.Id,
data.Location.Country,
order.OrderID,
order.OrderName,
orderArticle.Size,
orderArticle.ProductName,
orderArticle.ProductId
));
}
}
File.WriteAllLines("data.csv", csv);
创建 .csv
文件,内容为:
created;by;id;country;OrderID;OrderName;Size;ProductName;ProductId
2021-08-01T13:33:37.123Z;web;100;DE;22;Soda;33;Coke;999
2021-08-01T13:33:37.123Z;web;100;DE;22;Soda;66;Fanta;888
2021-08-01T13:33:37.123Z;web;100;DE;22;Soda;50;Pepsi;444
2021-08-01T13:33:37.123Z;web;100;DE;23;Beverage;44;Coke;999
由于需要system.created
、system.by
、location.id
、location.country
字段,必须从root加载整个json,然后组合预期的csv
的对象
这是工作示例(采用最新的 nuget 包)
方法一:(使用动态模型)
StringBuilder csv = new StringBuilder();
using (var r = new ChoJSONReader("*** YOUR JSON FILE PATH ***")
.JsonSerializationSettings(s => s.DateParseHandling = DateParseHandling.None)
)
{
using (var w = new ChoCSVWriter(csv)
.WithDelimiter(";")
.WithFirstLineHeader())
{
w.Write(r.SelectMany(root =>
((Array)root.order).Cast<dynamic>()
.SelectMany(order => ((Array)order.OrderArticles).Cast<dynamic>()
.Select(orderarticle => new
{
root.system.created,
root.system.by,
root.location.id,
order.OrderID,
order.OrderName,
orderarticle.Size,
orderarticle.ProductName,
orderarticle.ProductId,
})
)
)
);
}
}
Console.WriteLine(csv.ToString());
输出:
created;by;id;OrderID;OrderName;Size;ProductName;ProductId
2021-08-01T01:33:37.123Z;web;100;22;Soda;33;Coke;999
2021-08-01T01:33:37.123Z;web;100;22;Soda;66;Fanta;888
2021-08-01T01:33:37.123Z;web;100;22;Soda;50;Pepsi;444
2021-08-01T01:33:37.123Z;web;100;23;Beverage;44;Coke;999
方法二:使用POCO模型
定义与输入匹配的 POCO 对象 JSON
public class System
{
[JsonProperty("created")]
public string Created { get; set; }
[JsonProperty("by")]
public string By { get; set; }
}
public class Location
{
[JsonProperty("id")]
public int Id { get; set; }
[JsonProperty("country")]
public string Country { get; set; }
}
public class OrderArticle
{
[JsonProperty("Size")]
public int Size { get; set; }
[JsonProperty("ProductName")]
public string ProductName { get; set; }
[JsonProperty("ProductId")]
public string ProductId { get; set; }
}
public class Order
{
[JsonProperty("OrderID")]
public int OrderID { get; set; }
[JsonProperty("OrderName")]
public string OrderName { get; set; }
[JsonProperty("OrderArticles")]
public List<OrderArticle> OrderArticles { get; set; }
[JsonProperty("ProcessedId")]
public int ProcessedId { get; set; }
[JsonProperty("Date")]
public string Date { get; set; }
}
public class OrderRoot
{
[JsonProperty("system")]
public System System { get; set; }
[JsonProperty("location")]
public Location Location { get; set; }
[JsonProperty("order")]
public List<Order> Orders { get; set; }
}
然后使用下面的代码加载 json 并以预期格式输出 CSV
StringBuilder csv = new StringBuilder();
using (var r = new ChoJSONReader<OrderRoot>("*** YOUR JSON FILE PATH ***")
.UseJsonSerialization()
)
{
using (var w = new ChoCSVWriter(csv)
.WithDelimiter(";")
.WithFirstLineHeader())
{
w.Write(r.SelectMany(root =>
root.Orders
.SelectMany(order => order.OrderArticles
.Select(orderarticle => new
{
created = root.System.Created,
by = root.System.By,
id = root.Location.Id,
order.OrderID,
order.OrderName,
orderarticle.Size,
orderarticle.ProductName,
orderarticle.ProductId,
})
)
)
);
}
}
Console.WriteLine(csv.ToString());
方法 3:简化动态模型方法
StringBuilder csv = new StringBuilder();
using (var r = new ChoJSONReader("*** YOUR JSON FILE PATH ***")
.WithField("created", jsonPath: "$..system.created", isArray: false, valueConverter: o => ((DateTime)o).ToString("yyyy-MM-ddThh:mm:ss.fffZ"))
.WithField("by", jsonPath: "$..system.by", isArray: false)
.WithField("id", jsonPath: "$..location.id", isArray: false)
.WithField("country", jsonPath: "$..location.country", isArray: false)
.WithField("OrderID")
.WithField("OrderName")
.WithField("Size")
.WithField("ProductName")
.WithField("ProductId")
.Configure(c => c.FlattenNode = true)
)
{
using (var w = new ChoCSVWriter(csv)
.WithDelimiter(";")
.WithFirstLineHeader())
{
w.Write(r);
}
}
Console.WriteLine(csv.ToString());
方法 4:更简化的动态模型方法
StringBuilder csv = new StringBuilder();
using (var r = new ChoJSONReader("*** YOUR JSON FILE PATH ***")
.Configure(c => c.FlattenNode = true)
.JsonSerializationSettings(s => s.DateParseHandling = DateParseHandling.None)
)
{
using (var w = new ChoCSVWriter(csv)
.WithDelimiter(";")
.WithFirstLineHeader()
.Configure(c => c.IgnoreDictionaryFieldPrefix = true)
)
{
w.Write(r);
}
}
Console.WriteLine(csv.ToString());
样本fiddle:https://dotnetfiddle.net/VCezp8
我需要将 json 转换为 csv。问题是我不能 select 在嵌套 json 结构中我需要的一切。 json 文件示例:
{
"system": {
"created": "2021-08-01T13:33:37.123Z",
"by": "web"
},
"location": {
"id": 100,
"country": "DE"
},
"order": [
{
"OrderID": 22,
"OrderName": "Soda",
"OrderArticles": [
{
"Size": 33,
"ProductName": "Coke",
"ProductId": "999"
},
{
"Size": 66,
"ProductName": "Fanta",
"ProductId": "888"
},
{
"Size": 50,
"ProductName": "Pepsi",
"ProductId": "444"
}
],
"ProcessedId": 1001,
"Date": "2021-08-02"
},
{
"OrderID": 23,
"OrderName": "Beverage",
"OrderArticles": [
{
"Size": 44,
"ProductName": "Coke",
"ProductId": "999"
}
],
"ProcessedId": 1002,
"Date": "2021-08-03"
}
]
}
这是我想要的输出:
created;by;id;country;OrderID;OrderName;Size;ProductName;ProductId
2021-08-01T13:33:37.123Z;web;100;DE;22;Soda;33;Coke;999
2021-08-01T13:33:37.123Z;web;100;DE;22;Soda;66;Fanta;888
2021-08-01T13:33:37.123Z;web;100;DE;22;Soda;50;Pepsi;444
2021-08-01T13:33:37.123Z;web;100;DE;23;Beverage;44;Coke;999
我可以自己获取 created 和 by 值以及 OrderArticles 的值。我只是不知道如何让他们在一起。这是我用来获取结果但分为 2 个不同结果的代码:
using (var r = new ChoJSONReader(inBlob).WithJSONPath("$..order[*]").AllowComplexJSONPath(true))
{
return (r.SelectMany(r1 => ((dynamic[])r1.OutputArticles).Select(r2 => new
{
r1.OrderID,
r1.OrderName,
r1.Size,
r1.ProductName,
r1.ProductId
})));
}
using (var r = new ChoJSONReader(inBlob).WithJSONPath("$").AllowComplexJSONPath(true))
{
return (r.Select(r1 => new
{
r1.system.created,
r1.system.by
}));
}
这是我的解决方案。
这是我的数据模型:
using System.Text.Json.Serialization;
namespace JsonToCSV.Models;
// Root myDeserializedClass = JsonSerializer.Deserialize<Root>(myJsonResponse);
public class System
{
[JsonPropertyName("created")]
public string Created { get; set; }
[JsonPropertyName("by")]
public string By { get; set; }
}
public class Location
{
[JsonPropertyName("id")]
public int Id { get; set; }
[JsonPropertyName("country")]
public string Country { get; set; }
}
public class OrderArticle
{
[JsonPropertyName("Size")]
public int Size { get; set; }
[JsonPropertyName("ProductName")]
public string ProductName { get; set; }
[JsonPropertyName("ProductId")]
public string ProductId { get; set; }
}
public class Order
{
[JsonPropertyName("OrderID")]
public int OrderID { get; set; }
[JsonPropertyName("OrderName")]
public string OrderName { get; set; }
[JsonPropertyName("OrderArticles")]
public List<OrderArticle> OrderArticles { get; set; }
[JsonPropertyName("ProcessedId")]
public int ProcessedId { get; set; }
[JsonPropertyName("Date")]
public string Date { get; set; }
}
public class Root
{
[JsonPropertyName("system")]
public System System { get; set; }
[JsonPropertyName("location")]
public Location Location { get; set; }
[JsonPropertyName("order")]
public List<Order> Orders { get; set; }
}
这是业务逻辑(如果你愿意,我可以用 LINQ 替换它):
using System.Text.Json;
using JsonToCSV.Models;
var dataAsText = File.ReadAllText("data.json");
var data = JsonSerializer.Deserialize<Root>(dataAsText);
var csv = new List<string> { "created;by;id;country;OrderID;OrderName;Size;ProductName;ProductId" };
foreach (var order in data.Orders)
{
foreach (var orderArticle in order.OrderArticles)
{
csv.Add(String.Format("{0};{1};{2};{3};{4};{5};{6};{7};{8}",
data.System.Created,
data.System.By,
data.Location.Id,
data.Location.Country,
order.OrderID,
order.OrderName,
orderArticle.Size,
orderArticle.ProductName,
orderArticle.ProductId
));
}
}
File.WriteAllLines("data.csv", csv);
创建 .csv
文件,内容为:
created;by;id;country;OrderID;OrderName;Size;ProductName;ProductId
2021-08-01T13:33:37.123Z;web;100;DE;22;Soda;33;Coke;999
2021-08-01T13:33:37.123Z;web;100;DE;22;Soda;66;Fanta;888
2021-08-01T13:33:37.123Z;web;100;DE;22;Soda;50;Pepsi;444
2021-08-01T13:33:37.123Z;web;100;DE;23;Beverage;44;Coke;999
由于需要system.created
、system.by
、location.id
、location.country
字段,必须从root加载整个json,然后组合预期的csv
这是工作示例(采用最新的 nuget 包)
方法一:(使用动态模型)
StringBuilder csv = new StringBuilder();
using (var r = new ChoJSONReader("*** YOUR JSON FILE PATH ***")
.JsonSerializationSettings(s => s.DateParseHandling = DateParseHandling.None)
)
{
using (var w = new ChoCSVWriter(csv)
.WithDelimiter(";")
.WithFirstLineHeader())
{
w.Write(r.SelectMany(root =>
((Array)root.order).Cast<dynamic>()
.SelectMany(order => ((Array)order.OrderArticles).Cast<dynamic>()
.Select(orderarticle => new
{
root.system.created,
root.system.by,
root.location.id,
order.OrderID,
order.OrderName,
orderarticle.Size,
orderarticle.ProductName,
orderarticle.ProductId,
})
)
)
);
}
}
Console.WriteLine(csv.ToString());
输出:
created;by;id;OrderID;OrderName;Size;ProductName;ProductId
2021-08-01T01:33:37.123Z;web;100;22;Soda;33;Coke;999
2021-08-01T01:33:37.123Z;web;100;22;Soda;66;Fanta;888
2021-08-01T01:33:37.123Z;web;100;22;Soda;50;Pepsi;444
2021-08-01T01:33:37.123Z;web;100;23;Beverage;44;Coke;999
方法二:使用POCO模型
定义与输入匹配的 POCO 对象 JSON
public class System
{
[JsonProperty("created")]
public string Created { get; set; }
[JsonProperty("by")]
public string By { get; set; }
}
public class Location
{
[JsonProperty("id")]
public int Id { get; set; }
[JsonProperty("country")]
public string Country { get; set; }
}
public class OrderArticle
{
[JsonProperty("Size")]
public int Size { get; set; }
[JsonProperty("ProductName")]
public string ProductName { get; set; }
[JsonProperty("ProductId")]
public string ProductId { get; set; }
}
public class Order
{
[JsonProperty("OrderID")]
public int OrderID { get; set; }
[JsonProperty("OrderName")]
public string OrderName { get; set; }
[JsonProperty("OrderArticles")]
public List<OrderArticle> OrderArticles { get; set; }
[JsonProperty("ProcessedId")]
public int ProcessedId { get; set; }
[JsonProperty("Date")]
public string Date { get; set; }
}
public class OrderRoot
{
[JsonProperty("system")]
public System System { get; set; }
[JsonProperty("location")]
public Location Location { get; set; }
[JsonProperty("order")]
public List<Order> Orders { get; set; }
}
然后使用下面的代码加载 json 并以预期格式输出 CSV
StringBuilder csv = new StringBuilder();
using (var r = new ChoJSONReader<OrderRoot>("*** YOUR JSON FILE PATH ***")
.UseJsonSerialization()
)
{
using (var w = new ChoCSVWriter(csv)
.WithDelimiter(";")
.WithFirstLineHeader())
{
w.Write(r.SelectMany(root =>
root.Orders
.SelectMany(order => order.OrderArticles
.Select(orderarticle => new
{
created = root.System.Created,
by = root.System.By,
id = root.Location.Id,
order.OrderID,
order.OrderName,
orderarticle.Size,
orderarticle.ProductName,
orderarticle.ProductId,
})
)
)
);
}
}
Console.WriteLine(csv.ToString());
方法 3:简化动态模型方法
StringBuilder csv = new StringBuilder();
using (var r = new ChoJSONReader("*** YOUR JSON FILE PATH ***")
.WithField("created", jsonPath: "$..system.created", isArray: false, valueConverter: o => ((DateTime)o).ToString("yyyy-MM-ddThh:mm:ss.fffZ"))
.WithField("by", jsonPath: "$..system.by", isArray: false)
.WithField("id", jsonPath: "$..location.id", isArray: false)
.WithField("country", jsonPath: "$..location.country", isArray: false)
.WithField("OrderID")
.WithField("OrderName")
.WithField("Size")
.WithField("ProductName")
.WithField("ProductId")
.Configure(c => c.FlattenNode = true)
)
{
using (var w = new ChoCSVWriter(csv)
.WithDelimiter(";")
.WithFirstLineHeader())
{
w.Write(r);
}
}
Console.WriteLine(csv.ToString());
方法 4:更简化的动态模型方法
StringBuilder csv = new StringBuilder();
using (var r = new ChoJSONReader("*** YOUR JSON FILE PATH ***")
.Configure(c => c.FlattenNode = true)
.JsonSerializationSettings(s => s.DateParseHandling = DateParseHandling.None)
)
{
using (var w = new ChoCSVWriter(csv)
.WithDelimiter(";")
.WithFirstLineHeader()
.Configure(c => c.IgnoreDictionaryFieldPrefix = true)
)
{
w.Write(r);
}
}
Console.WriteLine(csv.ToString());
样本fiddle:https://dotnetfiddle.net/VCezp8