如何使用 USQL 将 JSON 扁平化为 CSV

How do I flatten JSON to CSV using USQL

我可以使用 Microsoft.Analytics.Samples.Formats.Json.JsonFunctions.JsonTuple 获取一些数据,但我无法展平整个文件。

这是我正在使用的文件格式:

{
 "SourceUrl":"http://www.unittest.org/test.html",
 "Title":"Unit Test File",
 "Guest":"Unit Test Guest",
 "PublishDate":"2017-05-15T00:00:00",
 "TranscriptionSections":[  
    {  
     "SectionStartTime":"00:00:03",
     "Sentences":[  
        {  
           "Text":"Intro."
        },
        {  
           "Text":"Sentence one"
        },
        {  
           "Text":"Sentence two"
        }
     ]
  },
  {  
     "SectionStartTime":"00:04:46",
     "Sentences":[  
        {  
           "Text":"Sentence three"
        },
        {  
           "Text":"Sentence four"
        }
     ]
  }
 ],
 "Categories":null
}

我想要得到的是每个文本一行(其中 5 个),包括它的 'SectionStartTime' 和所有顶级属性('PublishDate'、'Guest'...) .

到目前为止,我可以使用这个 'SectionStartTime' 得到一行:

USE econosphere;

REFERENCE ASSEMBLY [Newtonsoft.Json];
REFERENCE ASSEMBLY [Microsoft.Analytics.Samples.Formats];

DECLARE @in string="adl://abc.azuredatalakestore.net/data/20170515UnitTest.json";

DECLARE @out 
string="adl://abc.azuredatalakestore.net/processed/20170515UnitTest.csv";

@ep = EXTRACT
Title string,
SourceUrl string,
Guest string,
PublishDate DateTime,
TranscriptionSections string
FROM @in
USING new Microsoft.Analytics.Samples.Formats.Json.JsonExtractor();

@epAndTransctripts =
    SELECT Title,
        SourceUrl,
        Guest,
        PublishDate,
        Microsoft.Analytics.Samples.Formats.Json.JsonFunctions.JsonTuple(TranscriptionSections).Values AS TranscriptionSections_arr
    FROM @ep;

@all =
    SELECT
        Title,
        SourceUrl,
        Guest,
        PublishDate,
        Microsoft.Analytics.Samples.Formats.Json.JsonFunctions.JsonTuple(sects)["SectionStartTime"] AS TranscriptionSectionTimes

    FROM @epAndTransctripts
    CROSS APPLY
        EXPLODE(TranscriptionSections_arr) AS t(sects);


OUTPUT @all
TO @out 
USING Outputters.Csv();

这是对我有用的解决方案:

DECLARE @input string = "/input/data.json";

REFERENCE ASSEMBLY JSONBlog.[Newtonsoft.Json];
REFERENCE ASSEMBLY JSONBlog.[Microsoft.Analytics.Samples.Formats];

USING Microsoft.Analytics.Samples.Formats.Json;

@data =
EXTRACT SourceUrl string,
        Title string,
        Guest string,
        PublishDate DateTime,
        TranscriptionSections string,
        Categories string
FROM @input
USING new JsonExtractor();

@data =
SELECT SourceUrl,
       Title,
       Guest,
       PublishDate,
       Categories,
       JsonFunctions.JsonTuple(transcription_section) AS ts_map
FROM @data
 CROSS APPLY
     EXPLODE(JsonFunctions.JsonTuple(TranscriptionSections).Values) AS T(transcription_section);

@data =
SELECT SourceUrl,
       Title,
       Guest,
       PublishDate,
       Categories,
       ts_map["SectionStartTime"]AS SectionStartTime,
       JsonFunctions.JsonTuple(text_item) ["Text"]AS text
FROM @data
      CROSS APPLY
         EXPLODE(JsonFunctions.JsonTuple(ts_map["Sentences"]).Values) AS S(text_item);

OUTPUT @data
TO "/output/jsondata.csv"
USING Outputters.Csv(outputHeader : true);