BigQuery 优化查询,过滤嵌套的 STRUCT 字段数组并返回分组
BigQuery optimisation query with filtering on nested array of STRUCT fields and grouping back
我想弄清楚如何编写 GQL (Google SQL) 查询来过滤深度嵌套的结构,然后再次嵌套它并在与ARRAY同级别。
我准备了一个模式示例
WITH
Sale AS (
SELECT
"1" AS _id,
STRUCT("11" AS _id,
"SERVICE" AS feedbackType,
DATE(TIMESTAMP("2017-01-20 14:05:51.655")) AS createDate) AS serviceFeedback,
[STRUCT("host" AS key,
"localhost" AS value),
STRUCT("location" AS key,
"Paris" AS value)] AS tags,
TRUE AS reviewed,
[STRUCT("1" as saleId, STRUCT("101" AS _id,
"PRODUCT" AS feedbackType,
DATE(TIMESTAMP("2017-01-20 14:05:51.655")) AS createDate) AS productFeedback),
STRUCT("1" as saleId, STRUCT("102" AS _id,
"PRODUCT" AS feedbackType,
DATE(TIMESTAMP("2017-01-20 14:06:51.655")) AS createDate) AS productFeedback) ] AS saleItems,
DATE(TIMESTAMP("2017-01-20 14:05:51.655")) AS latestFeedbackDate )
过滤需要一个将所有嵌套字段展平的源过滤查询。
SELECT
saleId,
serviceFeedback,
saleTags,
reviewed,
saleItems,
latestFeedbackDate
FROM (
SELECT
sale._id AS saleId,
serviceFeedback,
sale.tags AS saleTags,
reviewed,
saleItems,
latestFeedbackDate
FROM
`Sale` AS sale,
sale.saleItems AS saleItems
WHERE
reviewed = TRUE
AND serviceFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
AND serviceFeedback._id IS NOT NULL
AND saleItems.productFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655")))
ORDER BY
latestFeedbackDate DESC
LIMIT
20
主要问题是,在过滤之后想要将所有 saleItems
按 sale._id
(return 初始结构)分组并检索具有类型的 serviceFeedback
字段结构。
JSON格式的预期结果是:
{
"saleId":"1",
"serviceFeedback":{"_id":"11","feedbackType":"SERVICE","createDate":"2017-01-20"},
"saleTags":[{"key":"host","value":"localhost"},{"key":"location","value":"Paris"}],
"reviewed":"true",
"saleItems":[
{"saleId":"1","productFeedback":{"_id":"101","feedbackType":"PRODUCT","createDate":"2017-01-20"},
{"saleId":"1","productFeedback":{"_id":"102","feedbackType":"PRODUCT","createDate":"2017-01-20"},
],
"latestFeedbackDate":"2017-01-20"
}
我写下了我想到的最简单的查询想法。它产生正确的结果。但可能可以更有效地重写它,
SELECT
saleId,
serviceFeedback,
latestFeedbackDate,
subQuery.saleItems as saleItems
FROM
sale
RIGHT JOIN (
SELECT
saleId,
ARRAY_AGG(saleItems) as saleItems
FROM (
SELECT
saleId,
saleItems
FROM (
SELECT
sale._id AS saleId,
latestFeedbackDate,
saleItems
FROM
`Sale` AS sale,
sale.saleItems AS saleItems
WHERE
reviewed = TRUE
AND serviceFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
AND serviceFeedback._id IS NOT NULL
AND saleItems.productFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655")))
ORDER BY
latestFeedbackDate DESC)
GROUP BY
saleId
) AS subQuery
ON
sale._id = subQuery.saleId
你能建议我一个更好的解决方案来达到同样的效果吗?
Could you suggest me a better solution to achieve the same results?
下面生成与原始 table 完全相同的架构,只是将需要的过滤器应用于 saleItems
#standardSQL
SELECT * REPLACE(
ARRAY(
SELECT saleItems FROM UNNEST(saleItems) saleItems
WHERE reviewed = TRUE
AND serviceFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
AND serviceFeedback._id IS NOT NULL
AND saleItems.productFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
) AS saleItems)
FROM sale
如果您只需要字段的子集 - 使用下面的示例
#standardSQL
SELECT
_id saleId,
serviceFeedback,
ARRAY(
SELECT saleItems FROM UNNEST(saleItems) saleItems
WHERE reviewed = TRUE
AND serviceFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
AND serviceFeedback._id IS NOT NULL
AND saleItems.productFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
) AS saleItems
FROM sale
我想弄清楚如何编写 GQL (Google SQL) 查询来过滤深度嵌套的结构,然后再次嵌套它并在与ARRAY同级别。
我准备了一个模式示例
WITH
Sale AS (
SELECT
"1" AS _id,
STRUCT("11" AS _id,
"SERVICE" AS feedbackType,
DATE(TIMESTAMP("2017-01-20 14:05:51.655")) AS createDate) AS serviceFeedback,
[STRUCT("host" AS key,
"localhost" AS value),
STRUCT("location" AS key,
"Paris" AS value)] AS tags,
TRUE AS reviewed,
[STRUCT("1" as saleId, STRUCT("101" AS _id,
"PRODUCT" AS feedbackType,
DATE(TIMESTAMP("2017-01-20 14:05:51.655")) AS createDate) AS productFeedback),
STRUCT("1" as saleId, STRUCT("102" AS _id,
"PRODUCT" AS feedbackType,
DATE(TIMESTAMP("2017-01-20 14:06:51.655")) AS createDate) AS productFeedback) ] AS saleItems,
DATE(TIMESTAMP("2017-01-20 14:05:51.655")) AS latestFeedbackDate )
过滤需要一个将所有嵌套字段展平的源过滤查询。
SELECT
saleId,
serviceFeedback,
saleTags,
reviewed,
saleItems,
latestFeedbackDate
FROM (
SELECT
sale._id AS saleId,
serviceFeedback,
sale.tags AS saleTags,
reviewed,
saleItems,
latestFeedbackDate
FROM
`Sale` AS sale,
sale.saleItems AS saleItems
WHERE
reviewed = TRUE
AND serviceFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
AND serviceFeedback._id IS NOT NULL
AND saleItems.productFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655")))
ORDER BY
latestFeedbackDate DESC
LIMIT
20
主要问题是,在过滤之后想要将所有 saleItems
按 sale._id
(return 初始结构)分组并检索具有类型的 serviceFeedback
字段结构。
JSON格式的预期结果是:
{
"saleId":"1",
"serviceFeedback":{"_id":"11","feedbackType":"SERVICE","createDate":"2017-01-20"},
"saleTags":[{"key":"host","value":"localhost"},{"key":"location","value":"Paris"}],
"reviewed":"true",
"saleItems":[
{"saleId":"1","productFeedback":{"_id":"101","feedbackType":"PRODUCT","createDate":"2017-01-20"},
{"saleId":"1","productFeedback":{"_id":"102","feedbackType":"PRODUCT","createDate":"2017-01-20"},
],
"latestFeedbackDate":"2017-01-20"
}
我写下了我想到的最简单的查询想法。它产生正确的结果。但可能可以更有效地重写它,
SELECT
saleId,
serviceFeedback,
latestFeedbackDate,
subQuery.saleItems as saleItems
FROM
sale
RIGHT JOIN (
SELECT
saleId,
ARRAY_AGG(saleItems) as saleItems
FROM (
SELECT
saleId,
saleItems
FROM (
SELECT
sale._id AS saleId,
latestFeedbackDate,
saleItems
FROM
`Sale` AS sale,
sale.saleItems AS saleItems
WHERE
reviewed = TRUE
AND serviceFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
AND serviceFeedback._id IS NOT NULL
AND saleItems.productFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655")))
ORDER BY
latestFeedbackDate DESC)
GROUP BY
saleId
) AS subQuery
ON
sale._id = subQuery.saleId
你能建议我一个更好的解决方案来达到同样的效果吗?
Could you suggest me a better solution to achieve the same results?
下面生成与原始 table 完全相同的架构,只是将需要的过滤器应用于 saleItems
#standardSQL
SELECT * REPLACE(
ARRAY(
SELECT saleItems FROM UNNEST(saleItems) saleItems
WHERE reviewed = TRUE
AND serviceFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
AND serviceFeedback._id IS NOT NULL
AND saleItems.productFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
) AS saleItems)
FROM sale
如果您只需要字段的子集 - 使用下面的示例
#standardSQL
SELECT
_id saleId,
serviceFeedback,
ARRAY(
SELECT saleItems FROM UNNEST(saleItems) saleItems
WHERE reviewed = TRUE
AND serviceFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
AND serviceFeedback._id IS NOT NULL
AND saleItems.productFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
) AS saleItems
FROM sale