使用 CASE WHEN 将语言代码整理成一个组合区域设置代码,并计算组合区域设置代码在某个日期出现的次数
Collate language codes into one combined locale code using CASE WHEN, and count the number of times the combined locale code occurs on a date
了解 CASE WHEN,并且我在 Analytics 中看到多个区域设置代码时遇到了一个用例。这是一个简单得多的问题,比我之前发布的问题更容易回答和阅读。
例如:
en-us(美国英语)
en-au(澳大利亚英语)
en-br(英语巴西)
es-es(西班牙语西班牙)
es-419(西班牙语-拉美语)
pt-br(巴西葡萄牙语)
pt-pt(葡萄牙语)
我如何在 BigQuery 中聚合这些值,而不是计算不同的值,我可以计算仅找到区域设置的前两个字符的次数?
这个问题的第二部分是:如何构建我的 table 以便我能够按日期绘制这些计数?
目前,输出是:
date:language_code:CombinedLocale
Link 示例数据 table: https://docs.google.com/spreadsheets/d/1XZp1nhNZySWI39kKhb3ydYYIImmrfAMcGJDS6ASThqg/edit?usp=sharing
我试过:
SELECT date, COUNT(language_code),
CASE
WHEN language_code like '%af%' THEN 'AF'
WHEN language_code like '%en%' THEN 'EN'
WHEN language_code like '%ar%' THEN 'AR'
WHEN language_code like '%ba%' THEN 'BA'
ELSE "Others"
END AS CombinedLocale
FROM date_locales
并且:
Select date, COUNT(language_code)
FROM date_locales
WHERE CASE
WHEN language_code like '%af%' THEN 'AF'
WHEN language_code like '%en%' THEN 'EN'
WHEN language_code like '%ar%' THEN 'AR'
WHEN language_code like '%ba%' THEN 'BA'
ELSE "Others"
END
这是我的工作代码:
SELECT date, language_code,
CASE
WHEN language_code like '%af%' THEN 'AF'
WHEN language_code like '%en%' THEN 'EN'
WHEN language_code like '%ar%' THEN 'AR'
WHEN language_code like '%ba%' THEN 'BA'
ELSE "Others"
END AS CombinedLocale
FROM date_locales
我希望结果随着时间的推移显示 CombinedLocale table 的计数,如下所示:
Jan AF 3
JAN EN 5
FEB AF 5
FEB EN 6
MAR EN 2
MAR EN 3
但我收到一条错误消息,指出:
SELECT 列表表达式引用既不分组也不聚合的列日期(行:1,列:8)
我想我需要先将日期汇总到月中?我的印象是 BigQuery 与 DataStudio 的集成会自动聚合日期列。
您只是在寻找聚合查询吗?
SELECT date,
(CASE WHEN language_code like '%af%' THEN 'AF'
WHEN language_code like '%en%' THEN 'EN'
WHEN language_code like '%ar%' THEN 'AR'
WHEN language_code like '%ba%' THEN 'BA'
ELSE 'Others'
END) AS CombinedLocale,
COUNT(*)
FROM date_locales
GROUP BY date, CombinedLocale;
以下内容适用于 BigQuery 标准 SQL 并回答了您问题中的两项
#standardSQL
SELECT
FORMAT_DATE('%b %Y', PARSE_DATE('%m/%d/%Y', dt)) month_year,
REGEXP_EXTRACT(code, r'(.*?)-') code,
COUNT(1) cnt
FROM `project.dataset.date_locales`
GROUP BY month_year, code
您可以使用下面示例中的一些虚拟数据来测试和玩上面的游戏
#standardSQL
WITH `project.dataset.date_locales` AS (
SELECT '3/14/2019' dt, 'af-ZA' code UNION ALL
SELECT '3/14/2019', 'am-ET' UNION ALL
SELECT '5/7/2019', 'ar-AE' UNION ALL
SELECT '5/19/2019', 'ar-BH' UNION ALL
SELECT '3/5/2019', 'ar-DZ' UNION ALL
SELECT '1/1/2019', 'ar-EG' UNION ALL
SELECT '3/31/2019', 'ar-IQ' UNION ALL
SELECT '4/20/2019', 'ar-JO' UNION ALL
SELECT '3/17/2019', 'ar-KW' UNION ALL
SELECT '1/8/2019', 'ar-LB' UNION ALL
SELECT '3/26/2019', 'ar-LY' UNION ALL
SELECT '5/7/2019', 'ar-MA' UNION ALL
SELECT '3/12/2019', 'arn-CL' UNION ALL
SELECT '5/19/2019', 'ar-OM' UNION ALL
SELECT '4/19/2019', 'ar-QA' UNION ALL
SELECT '4/20/2019', 'ar-SA' UNION ALL
SELECT '5/22/2019', 'ar-SY' UNION ALL
SELECT '5/23/2019', 'ar-TN' UNION ALL
SELECT '3/10/2019', 'ar-YE' UNION ALL
SELECT '4/6/2019', 'as-IN' UNION ALL
SELECT '2/5/2019', 'az-Cyrl' UNION ALL
SELECT '3/1/2019', 'az-Latn' UNION ALL
SELECT '3/25/2019', 'ba-RU' UNION ALL
SELECT '1/1/2019', 'be-BY' UNION ALL
SELECT '2/1/2019', 'bg-BG' UNION ALL
SELECT '5/3/2019', 'bn-BD' UNION ALL
SELECT '5/2/2019', 'bn-IN' UNION ALL
SELECT '3/19/2019', 'bo-CN' UNION ALL
SELECT '1/19/2019', 'br-FR'
)
SELECT
FORMAT_DATE('%b %Y', PARSE_DATE('%m/%d/%Y', dt)) month_year,
REGEXP_EXTRACT(code, r'(.*?)-') code,
COUNT(1) cnt
FROM `project.dataset.date_locales`
GROUP BY month_year, code
结果为
Row month_year code cnt
1 Jan 2019 ar 2
2 Mar 2019 ar 5
3 Mar 2019 af 1
4 Feb 2019 az 1
5 Mar 2019 am 1
6 Apr 2019 as 1
7 May 2019 ar 6
8 Mar 2019 ba 1
9 May 2019 bn 2
10 Feb 2019 bg 1
11 Mar 2019 arn 1
12 Mar 2019 bo 1
13 Mar 2019 az 1
14 Jan 2019 br 1
15 Apr 2019 ar 3
16 Jan 2019 be 1
了解 CASE WHEN,并且我在 Analytics 中看到多个区域设置代码时遇到了一个用例。这是一个简单得多的问题,比我之前发布的问题更容易回答和阅读。
例如: en-us(美国英语) en-au(澳大利亚英语) en-br(英语巴西) es-es(西班牙语西班牙) es-419(西班牙语-拉美语) pt-br(巴西葡萄牙语) pt-pt(葡萄牙语)
我如何在 BigQuery 中聚合这些值,而不是计算不同的值,我可以计算仅找到区域设置的前两个字符的次数?
这个问题的第二部分是:如何构建我的 table 以便我能够按日期绘制这些计数?
目前,输出是: date:language_code:CombinedLocale
Link 示例数据 table: https://docs.google.com/spreadsheets/d/1XZp1nhNZySWI39kKhb3ydYYIImmrfAMcGJDS6ASThqg/edit?usp=sharing
我试过:
SELECT date, COUNT(language_code),
CASE
WHEN language_code like '%af%' THEN 'AF'
WHEN language_code like '%en%' THEN 'EN'
WHEN language_code like '%ar%' THEN 'AR'
WHEN language_code like '%ba%' THEN 'BA'
ELSE "Others"
END AS CombinedLocale
FROM date_locales
并且:
Select date, COUNT(language_code)
FROM date_locales
WHERE CASE
WHEN language_code like '%af%' THEN 'AF'
WHEN language_code like '%en%' THEN 'EN'
WHEN language_code like '%ar%' THEN 'AR'
WHEN language_code like '%ba%' THEN 'BA'
ELSE "Others"
END
这是我的工作代码:
SELECT date, language_code,
CASE
WHEN language_code like '%af%' THEN 'AF'
WHEN language_code like '%en%' THEN 'EN'
WHEN language_code like '%ar%' THEN 'AR'
WHEN language_code like '%ba%' THEN 'BA'
ELSE "Others"
END AS CombinedLocale
FROM date_locales
我希望结果随着时间的推移显示 CombinedLocale table 的计数,如下所示:
Jan AF 3 JAN EN 5 FEB AF 5 FEB EN 6 MAR EN 2 MAR EN 3
但我收到一条错误消息,指出: SELECT 列表表达式引用既不分组也不聚合的列日期(行:1,列:8)
我想我需要先将日期汇总到月中?我的印象是 BigQuery 与 DataStudio 的集成会自动聚合日期列。
您只是在寻找聚合查询吗?
SELECT date,
(CASE WHEN language_code like '%af%' THEN 'AF'
WHEN language_code like '%en%' THEN 'EN'
WHEN language_code like '%ar%' THEN 'AR'
WHEN language_code like '%ba%' THEN 'BA'
ELSE 'Others'
END) AS CombinedLocale,
COUNT(*)
FROM date_locales
GROUP BY date, CombinedLocale;
以下内容适用于 BigQuery 标准 SQL 并回答了您问题中的两项
#standardSQL
SELECT
FORMAT_DATE('%b %Y', PARSE_DATE('%m/%d/%Y', dt)) month_year,
REGEXP_EXTRACT(code, r'(.*?)-') code,
COUNT(1) cnt
FROM `project.dataset.date_locales`
GROUP BY month_year, code
您可以使用下面示例中的一些虚拟数据来测试和玩上面的游戏
#standardSQL
WITH `project.dataset.date_locales` AS (
SELECT '3/14/2019' dt, 'af-ZA' code UNION ALL
SELECT '3/14/2019', 'am-ET' UNION ALL
SELECT '5/7/2019', 'ar-AE' UNION ALL
SELECT '5/19/2019', 'ar-BH' UNION ALL
SELECT '3/5/2019', 'ar-DZ' UNION ALL
SELECT '1/1/2019', 'ar-EG' UNION ALL
SELECT '3/31/2019', 'ar-IQ' UNION ALL
SELECT '4/20/2019', 'ar-JO' UNION ALL
SELECT '3/17/2019', 'ar-KW' UNION ALL
SELECT '1/8/2019', 'ar-LB' UNION ALL
SELECT '3/26/2019', 'ar-LY' UNION ALL
SELECT '5/7/2019', 'ar-MA' UNION ALL
SELECT '3/12/2019', 'arn-CL' UNION ALL
SELECT '5/19/2019', 'ar-OM' UNION ALL
SELECT '4/19/2019', 'ar-QA' UNION ALL
SELECT '4/20/2019', 'ar-SA' UNION ALL
SELECT '5/22/2019', 'ar-SY' UNION ALL
SELECT '5/23/2019', 'ar-TN' UNION ALL
SELECT '3/10/2019', 'ar-YE' UNION ALL
SELECT '4/6/2019', 'as-IN' UNION ALL
SELECT '2/5/2019', 'az-Cyrl' UNION ALL
SELECT '3/1/2019', 'az-Latn' UNION ALL
SELECT '3/25/2019', 'ba-RU' UNION ALL
SELECT '1/1/2019', 'be-BY' UNION ALL
SELECT '2/1/2019', 'bg-BG' UNION ALL
SELECT '5/3/2019', 'bn-BD' UNION ALL
SELECT '5/2/2019', 'bn-IN' UNION ALL
SELECT '3/19/2019', 'bo-CN' UNION ALL
SELECT '1/19/2019', 'br-FR'
)
SELECT
FORMAT_DATE('%b %Y', PARSE_DATE('%m/%d/%Y', dt)) month_year,
REGEXP_EXTRACT(code, r'(.*?)-') code,
COUNT(1) cnt
FROM `project.dataset.date_locales`
GROUP BY month_year, code
结果为
Row month_year code cnt
1 Jan 2019 ar 2
2 Mar 2019 ar 5
3 Mar 2019 af 1
4 Feb 2019 az 1
5 Mar 2019 am 1
6 Apr 2019 as 1
7 May 2019 ar 6
8 Mar 2019 ba 1
9 May 2019 bn 2
10 Feb 2019 bg 1
11 Mar 2019 arn 1
12 Mar 2019 bo 1
13 Mar 2019 az 1
14 Jan 2019 br 1
15 Apr 2019 ar 3
16 Jan 2019 be 1