MultiLevelJsonExtractor - 提取所需的级别
MultiLevelJsonExtractor - Extract the desired level
我有一个 JSON 文档,如下所示:
{
"Region": "Main",
"MarketLocations": [
{
"MarketName": "Central",
"MarketId": 1,
"SalesCategories": {
"Produce": [
{
"Type": "Apple",
"Name": "Granny Smith",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 24,
"Calories": 45,
"Price": 0.29
}
],
"BakedGoods": [
{
"DateMade": "2016-11-08T14:14:33.712Z",
"Name": "Apple Pie",
"Price": 14.25
}
],
"RestaurantItems": [
{
"Name": "Turkey Sandwich",
"Price": 4.85,
"PreparationTimeInMinutes": 20
}
],
"NonPerishable": [
{
"Name": "Honey Mustard",
"Type": "Condiments"
}
]
}
},
{
"MarketName": "Southern",
"MarketId": 2,
"SalesCategories": {
"Produce": [
{
"Type": "Apple",
"Name": "Granny Smith",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 24,
"Calories": 45,
"Price": 0.29
},
{
"Type": "Plums",
"Name": "Red Plums",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 12,
"Calories": 21,
"Price": 0.33
},
{
"Type": "Pears",
"Name": "Golden Nature",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 20,
"Calories": 40,
"Price": 0.45
}
],
"BakedGoods": [
{
"DateMade": "2016-11-08T14:14:33.712Z",
"Name": "Apple Pie",
"Price": 14.25
}
],
"RestaurantItems": [
{
"Name": "Turkey Sandwich",
"Price": 4.85,
"PreparationTimeInMinutes": 20
}
],
"NonPerishable": [
{
"Name": "Honey Mustard",
"Type": "Condiments"
}
]
}
},
{
"MarketName": "Western",
"MarketId": 3,
"SalesCategories": {
"Produce": [
{
"Type": "Plums",
"Name": "Red Plums",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 12,
"Calories": 21,
"Price": 0.33
},
{
"Type": "Pears",
"Name": "Golden Nature",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 20,
"Calories": 40,
"Price": 0.45
}
],
"BakedGoods": [
{
"DateMade": "2016-11-08T14:14:33.712Z",
"Name": "Plum Pie",
"Price": 18.25
}
],
"RestaurantItems": [
{
"Name": "Ham Sandwich",
"Price": 4.85,
"PreparationTimeInMinutes": 20
},
{
"Name": "Chicken Soup",
"Price": 2.25,
"PreparationTimeInMinutes": 5
}
],
"NonPerishable": [
{
"Name": "Mayo",
"Type": "Condiments"
},
{
"Name": "Syrup",
"Type": "Condiments"
},
{
"Name": "Ginger",
"Type": "Spices"
}
]
}
}
]
}
我有以下 U-SQL,它处理这个 JSON 文件,运行 在 Visual Studio:
DECLARE @in string=@"/JsonDoc2.json";
DECLARE @out string=@"Output/JsonDoc2.csv";
@produce =
EXTRACT Name string,
DatePicked DateTime,
ShelfLifeInDays int,
Calories int,
Price decimal,
MarketId string,
MarketName string
FROM @in
USING new MultiLevelJsonExtractor("MarketLocations[*].SalesCategories.Produce[*]",
false,
"Name",
"DatePicked",
"ShelfLifeInDays",
"Calories",
"Price",
"MarketId",
"MarketName");
OUTPUT @produce
TO @out
USING Outputters.Csv(outputHeader : true);
这执行没有错误。问题是我具体指定了我想要的销售类别 ('produce')。我想更改此查询,以便包含所有销售类别(农产品、烘焙食品等),并包含类别名称。我还没想出办法。
NewtonSoft的JsonType
方法 JsonFunctions class, returns一个MAP
值是一个键值对。然后,您可以引用密钥以获取 JSON 属性 / 对象 / 数组名称,至少在使用 CROSS APPLY
和 EXPLODE
.
进行一些其他操作之后
对于你的例子,我得到了以下的工作:
REFERENCE ASSEMBLY [Newtonsoft.Json];
REFERENCE ASSEMBLY [Microsoft.Analytics.Samples.Formats];
USING Microsoft.Analytics.Samples.Formats.Json;
DECLARE @input string = @"/input/myinputfile.json";
DECLARE @output string = @"output/output.csv";
@json =
EXTRACT Region string,
MarketName string,
SalesCategories string // get the SalesCategories as JSON
FROM @input
USING new MultiLevelJsonExtractor("MarketLocations[*].SalesCategories",
true,
"Region",
"MarketName",
"SalesCategories"
);
// Convert the json string to tuple/MAP
@working =
SELECT Region,
MarketName,
JsonFunctions.JsonTuple(SalesCategories) AS x
FROM @json;
// Explode the tuple as key-value pair;
@working =
SELECT Region,
MarketName,
key,
value
FROM @working
CROSS APPLY
EXPLODE(x) AS y(key, value);
// Explode the value which is JSON
@working =
SELECT Region,
MarketName,
key,
JsonFunctions.JsonTuple(y) AS z
FROM @working
CROSS APPLY
EXPLODE(JsonFunctions.JsonTuple(value).Values) AS x(y);
// Prep the result, naming the items you want
@result =
SELECT Region,
MarketName,
key,
z["Type"] AS Type,
z["Name"] AS Name,
z["DatePicked"] AS DatePicked,
z["ShelfLifeInDays"] AS ShelfLifeInDays,
z["Calories"] AS Calories,
z["Price"] AS Price,
z["DateMade"] AS DateMade,
z["PreparationTimeInMinutes"] AS PreparationTimeInMinutes
FROM @working;
OUTPUT @result
TO @output
USING Outputters.Csv(quoting:false);
我的结果:
感觉可以简化一下,但看你进展如何。粉碎样品 JSON 供不应求,但请尝试 here and here.
我有一个 JSON 文档,如下所示:
{
"Region": "Main",
"MarketLocations": [
{
"MarketName": "Central",
"MarketId": 1,
"SalesCategories": {
"Produce": [
{
"Type": "Apple",
"Name": "Granny Smith",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 24,
"Calories": 45,
"Price": 0.29
}
],
"BakedGoods": [
{
"DateMade": "2016-11-08T14:14:33.712Z",
"Name": "Apple Pie",
"Price": 14.25
}
],
"RestaurantItems": [
{
"Name": "Turkey Sandwich",
"Price": 4.85,
"PreparationTimeInMinutes": 20
}
],
"NonPerishable": [
{
"Name": "Honey Mustard",
"Type": "Condiments"
}
]
}
},
{
"MarketName": "Southern",
"MarketId": 2,
"SalesCategories": {
"Produce": [
{
"Type": "Apple",
"Name": "Granny Smith",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 24,
"Calories": 45,
"Price": 0.29
},
{
"Type": "Plums",
"Name": "Red Plums",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 12,
"Calories": 21,
"Price": 0.33
},
{
"Type": "Pears",
"Name": "Golden Nature",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 20,
"Calories": 40,
"Price": 0.45
}
],
"BakedGoods": [
{
"DateMade": "2016-11-08T14:14:33.712Z",
"Name": "Apple Pie",
"Price": 14.25
}
],
"RestaurantItems": [
{
"Name": "Turkey Sandwich",
"Price": 4.85,
"PreparationTimeInMinutes": 20
}
],
"NonPerishable": [
{
"Name": "Honey Mustard",
"Type": "Condiments"
}
]
}
},
{
"MarketName": "Western",
"MarketId": 3,
"SalesCategories": {
"Produce": [
{
"Type": "Plums",
"Name": "Red Plums",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 12,
"Calories": 21,
"Price": 0.33
},
{
"Type": "Pears",
"Name": "Golden Nature",
"DatePicked": "2016-11-08T14:14:33.712Z",
"ShelfLifeInDays": 20,
"Calories": 40,
"Price": 0.45
}
],
"BakedGoods": [
{
"DateMade": "2016-11-08T14:14:33.712Z",
"Name": "Plum Pie",
"Price": 18.25
}
],
"RestaurantItems": [
{
"Name": "Ham Sandwich",
"Price": 4.85,
"PreparationTimeInMinutes": 20
},
{
"Name": "Chicken Soup",
"Price": 2.25,
"PreparationTimeInMinutes": 5
}
],
"NonPerishable": [
{
"Name": "Mayo",
"Type": "Condiments"
},
{
"Name": "Syrup",
"Type": "Condiments"
},
{
"Name": "Ginger",
"Type": "Spices"
}
]
}
}
]
}
我有以下 U-SQL,它处理这个 JSON 文件,运行 在 Visual Studio:
DECLARE @in string=@"/JsonDoc2.json";
DECLARE @out string=@"Output/JsonDoc2.csv";
@produce =
EXTRACT Name string,
DatePicked DateTime,
ShelfLifeInDays int,
Calories int,
Price decimal,
MarketId string,
MarketName string
FROM @in
USING new MultiLevelJsonExtractor("MarketLocations[*].SalesCategories.Produce[*]",
false,
"Name",
"DatePicked",
"ShelfLifeInDays",
"Calories",
"Price",
"MarketId",
"MarketName");
OUTPUT @produce
TO @out
USING Outputters.Csv(outputHeader : true);
这执行没有错误。问题是我具体指定了我想要的销售类别 ('produce')。我想更改此查询,以便包含所有销售类别(农产品、烘焙食品等),并包含类别名称。我还没想出办法。
NewtonSoft的JsonType
方法 JsonFunctions class, returns一个MAP
值是一个键值对。然后,您可以引用密钥以获取 JSON 属性 / 对象 / 数组名称,至少在使用 CROSS APPLY
和 EXPLODE
.
对于你的例子,我得到了以下的工作:
REFERENCE ASSEMBLY [Newtonsoft.Json];
REFERENCE ASSEMBLY [Microsoft.Analytics.Samples.Formats];
USING Microsoft.Analytics.Samples.Formats.Json;
DECLARE @input string = @"/input/myinputfile.json";
DECLARE @output string = @"output/output.csv";
@json =
EXTRACT Region string,
MarketName string,
SalesCategories string // get the SalesCategories as JSON
FROM @input
USING new MultiLevelJsonExtractor("MarketLocations[*].SalesCategories",
true,
"Region",
"MarketName",
"SalesCategories"
);
// Convert the json string to tuple/MAP
@working =
SELECT Region,
MarketName,
JsonFunctions.JsonTuple(SalesCategories) AS x
FROM @json;
// Explode the tuple as key-value pair;
@working =
SELECT Region,
MarketName,
key,
value
FROM @working
CROSS APPLY
EXPLODE(x) AS y(key, value);
// Explode the value which is JSON
@working =
SELECT Region,
MarketName,
key,
JsonFunctions.JsonTuple(y) AS z
FROM @working
CROSS APPLY
EXPLODE(JsonFunctions.JsonTuple(value).Values) AS x(y);
// Prep the result, naming the items you want
@result =
SELECT Region,
MarketName,
key,
z["Type"] AS Type,
z["Name"] AS Name,
z["DatePicked"] AS DatePicked,
z["ShelfLifeInDays"] AS ShelfLifeInDays,
z["Calories"] AS Calories,
z["Price"] AS Price,
z["DateMade"] AS DateMade,
z["PreparationTimeInMinutes"] AS PreparationTimeInMinutes
FROM @working;
OUTPUT @result
TO @output
USING Outputters.Csv(quoting:false);
我的结果:
感觉可以简化一下,但看你进展如何。粉碎样品 JSON 供不应求,但请尝试 here and here.