优化动态 7 天队列 Firebase BigQuery
Optimizing a Dynamic 7-Day Cohort Firebase BigQuery
我在下面针对我们移动应用程序的数据编写了以下查询。由于用户基数高,我在底部添加 ORDER BY
时收到 400 请求错误 "Resources exceeded during query execution: The query could not be executed in the allotted memory"
。
问题:有什么办法可以优化查询,但仍然保留底部的ORDER BY
吗?
我已经添加了 firebase 的演示数据集,但我认为他们的数据集太小而不会出现问题(与我的 5-1000 万条记录大的数据集相比)。
SELECT
f.user_pseudo_id,
f.event_timestamp,
DATE(TIMESTAMP_MICROS(f.event_timestamp)) as event_timestamp_date,
f.event_name,
f.user_first_touch_timestamp,
DATE(TIMESTAMP_MICROS(f.user_first_touch_timestamp)) as user_first_touch_date,
CASE WHEN r.has_appRemove >= 1 THEN "removed" ELSE "not-removed" END AS status_after_first7days
FROM `firebase-analytics-sample-data.ios_dataset.app_events_*` f
LEFT JOIN (
SELECT user_pseudo_id, 1 has_appRemove
FROM `firebase-analytics-sample-data.ios_dataset.app_events_*`
WHERE DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) >= DATE_SUB(CURRENT_DATE(), INTERVAL 10 DAY)
AND DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) < DATE_SUB(CURRENT_DATE(), INTERVAL 9 DAY)
AND _TABLE_SUFFIX >= FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 10 DAY))
AND _TABLE_SUFFIX < FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 3 DAY))
AND platform = "ANDROID"
AND event_name = "app_remove"
GROUP BY user_pseudo_id
) r on f.user_pseudo_id = r.user_pseudo_id
WHERE
DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) >= DATE_SUB(CURRENT_DATE(), INTERVAL 10 DAY)
AND DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) < DATE_SUB(CURRENT_DATE(), INTERVAL 9 DAY)
AND _TABLE_SUFFIX >= FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 10 DAY))
AND _TABLE_SUFFIX < FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 3 DAY))
AND platform = "ANDROID"
ORDER BY 1,2 ASC
您可以应用 windowing/analytical 函数而不是加入 - 如下例所示(未测试)
#standardSQL
SELECT
user_pseudo_id,
event_timestamp,
DATE(TIMESTAMP_MICROS(event_timestamp)) AS event_timestamp_date,
event_name,
user_first_touch_timestamp,
DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) AS user_first_touch_date,
COUNTIF(event_name = "app_remove") OVER(PARTITION BY user_pseudo_id) > 0 isRemoved
FROM `firebase-analytics-sample-data.ios_dataset.app_events_*`
WHERE
DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) >= DATE_SUB(CURRENT_DATE(), INTERVAL 10 DAY)
AND DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) < DATE_SUB(CURRENT_DATE(), INTERVAL 9 DAY)
AND _TABLE_SUFFIX >= FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 10 DAY))
AND _TABLE_SUFFIX < FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 3 DAY))
AND platform = "ANDROID"
ORDER BY 1,2 ASC
我在下面针对我们移动应用程序的数据编写了以下查询。由于用户基数高,我在底部添加 ORDER BY
时收到 400 请求错误 "Resources exceeded during query execution: The query could not be executed in the allotted memory"
。
问题:有什么办法可以优化查询,但仍然保留底部的ORDER BY
吗?
我已经添加了 firebase 的演示数据集,但我认为他们的数据集太小而不会出现问题(与我的 5-1000 万条记录大的数据集相比)。
SELECT
f.user_pseudo_id,
f.event_timestamp,
DATE(TIMESTAMP_MICROS(f.event_timestamp)) as event_timestamp_date,
f.event_name,
f.user_first_touch_timestamp,
DATE(TIMESTAMP_MICROS(f.user_first_touch_timestamp)) as user_first_touch_date,
CASE WHEN r.has_appRemove >= 1 THEN "removed" ELSE "not-removed" END AS status_after_first7days
FROM `firebase-analytics-sample-data.ios_dataset.app_events_*` f
LEFT JOIN (
SELECT user_pseudo_id, 1 has_appRemove
FROM `firebase-analytics-sample-data.ios_dataset.app_events_*`
WHERE DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) >= DATE_SUB(CURRENT_DATE(), INTERVAL 10 DAY)
AND DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) < DATE_SUB(CURRENT_DATE(), INTERVAL 9 DAY)
AND _TABLE_SUFFIX >= FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 10 DAY))
AND _TABLE_SUFFIX < FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 3 DAY))
AND platform = "ANDROID"
AND event_name = "app_remove"
GROUP BY user_pseudo_id
) r on f.user_pseudo_id = r.user_pseudo_id
WHERE
DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) >= DATE_SUB(CURRENT_DATE(), INTERVAL 10 DAY)
AND DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) < DATE_SUB(CURRENT_DATE(), INTERVAL 9 DAY)
AND _TABLE_SUFFIX >= FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 10 DAY))
AND _TABLE_SUFFIX < FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 3 DAY))
AND platform = "ANDROID"
ORDER BY 1,2 ASC
您可以应用 windowing/analytical 函数而不是加入 - 如下例所示(未测试)
#standardSQL
SELECT
user_pseudo_id,
event_timestamp,
DATE(TIMESTAMP_MICROS(event_timestamp)) AS event_timestamp_date,
event_name,
user_first_touch_timestamp,
DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) AS user_first_touch_date,
COUNTIF(event_name = "app_remove") OVER(PARTITION BY user_pseudo_id) > 0 isRemoved
FROM `firebase-analytics-sample-data.ios_dataset.app_events_*`
WHERE
DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) >= DATE_SUB(CURRENT_DATE(), INTERVAL 10 DAY)
AND DATE(TIMESTAMP_MICROS(user_first_touch_timestamp)) < DATE_SUB(CURRENT_DATE(), INTERVAL 9 DAY)
AND _TABLE_SUFFIX >= FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 10 DAY))
AND _TABLE_SUFFIX < FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL 3 DAY))
AND platform = "ANDROID"
ORDER BY 1,2 ASC