Google Bigquery Legacy SQL - 如何 return 如果没有结果 return 为 null 或零?
Google Bigquery Legacy SQL - How to return a null or zero if no results returned?
假设我有以下 table
month region revenue
------ -------- ----------
jan north 100
feb north 150
mar north 250
我怎样才能查询上面的 table 以获得以下结果?:
month region revenue
------ -------- ----------
jan north 100
feb north 150
mar north 250
apr north 0
may north 0
jun north 0
0 可以为空,反之亦然。本质上是尝试将 empty/null 字段添加到我的查询中(在本例中为 apr、may、jun 行)。任何帮助将不胜感激
谢谢
一个选项 - 运行 a LEFT/RIGHT JOIN 与您要查看的值列表。
让我们从缺少 nulls/zeros:
的查询开始
#standardSQL
SELECT year, SUM(number) c
FROM `bigquery-public-data.usa_names.usa_1910_current`
WHERE name='Felipe'
AND year>2014
GROUP BY year
ORDER BY year
如果我们想为 2015 年之前的值取 0:
SELECT b.year, IFNULL(c, 0) c
FROM (
SELECT year, SUM(number) c
FROM `bigquery-public-data.usa_names.usa_1910_current`
WHERE name='Felipe'
AND year>2014
GROUP BY year
) a
RIGHT JOIN (
SELECT year FROM UNNEST(GENERATE_ARRAY(2012, 2016)) year
) b
ON a.year=b.year
ORDER BY year
相关子查询也可以挽救局面:
SELECT year, (
SELECT IFNULL(SUM(number), 0)
FROM `bigquery-public-data.usa_names.usa_1910_current` a
WHERE name='Felipe'
AND year>2014
AND a.year=b.year
) c
FROM (SELECT year FROM UNNEST(GENERATE_ARRAY(2012, 2016)) year) b
ORDER BY year
以下适用于旧版 BigQuery SQL,但请注意 - 强烈建议 BigQuery 团队迁移到 BigQuery Standard SQL
下面的例子应该会给你一个想法
#legacySQL
SELECT
months.month_abr AS month_abr,
regions.region AS region,
COALESCE(revenues.revenue, 0) revenue
FROM months
CROSS JOIN (
SELECT region FROM revenues
) regions
LEFT JOIN revenues
ON months.month_abr = revenues.month_abr
AND regions.region = revenues.region
-- ORDER BY regions.region, months.month_number
其中 revenues
是带有收入数据的原始 table,month
是带有月份列表的 table(或者您可以使用子查询,如下例所示)
您可以使用下面的示例使用问题中的虚拟数据测试/播放上面的内容
#legacySQL
SELECT
months.month_abr AS month_abr,
regions.region AS region,
COALESCE(revenues.revenue, 0) revenue
FROM (
SELECT month_number, month_abr FROM
(SELECT 1 month_number, 'jan' month_abr),
(SELECT 2 month_number, 'feb' month_abr),
(SELECT 3 month_number, 'mar' month_abr),
(SELECT 4 month_number, 'apr' month_abr),
(SELECT 5 month_number, 'may' month_abr),
(SELECT 6 month_number, 'jun' month_abr)
) AS months
CROSS JOIN (
SELECT region FROM (
SELECT region FROM
(SELECT 'jan' month_abr, 'north' region, 100 revenue),
(SELECT 'feb' month_abr, 'north' region, 150 revenue),
(SELECT 'mar' month_abr, 'north' region, 250 revenue)
) GROUP BY region
) regions
LEFT JOIN (
SELECT month_abr, region, revenue FROM
(SELECT 'jan' month_abr, 'north' region, 100 revenue),
(SELECT 'feb' month_abr, 'north' region, 150 revenue),
(SELECT 'mar' month_abr, 'north' region, 250 revenue)
) AS revenues
ON months.month_abr = revenues.month_abr
AND regions.region = revenues.region
ORDER BY regions.region, months.month_number
结果如下
Row month_abr region revenue
1 jan north 100
2 feb north 150
3 mar north 250
4 apr north 0
5 may north 0
6 jun north 0
最后 - 下面是 BigQuery Standard SQL
#standardSQL
WITH regions AS (
SELECT DISTINCT region FROM revenues
), months AS (
SELECT EXTRACT(MONTH FROM month) month_number,
LOWER(FORMAT_DATE('%b', month)) month_abr
FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2010-01-01', DATE '2010-12-01', INTERVAL 1 MONTH)) month
)
SELECT month_abr, region, COALESCE(revenues.revenue, 0) revenue
FROM months
CROSS JOIN regions
LEFT JOIN revenues
USING(month_abr, region)
ORDER BY region, month_number
你可以测试,使用你问题中的虚拟数据来玩这个
#standardSQL
WITH revenues AS (
SELECT 'jan' month_abr, 'north' region, 100 revenue UNION ALL
SELECT 'feb', 'north', 150 UNION ALL
SELECT 'mar', 'north', 250
), regions AS (
SELECT DISTINCT region FROM revenues
), months AS (
SELECT EXTRACT(MONTH FROM month) month_number,
LOWER(FORMAT_DATE('%b', month)) month_abr
FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2010-01-01', DATE '2010-12-01', INTERVAL 1 MONTH)) month
)
SELECT month_abr, region, COALESCE(revenues.revenue, 0) revenue
FROM months
CROSS JOIN regions
LEFT JOIN revenues
USING(month_abr, region)
ORDER BY region, month_number
您应该能够将以上应用到您的实际用例中
假设我有以下 table
month region revenue
------ -------- ----------
jan north 100
feb north 150
mar north 250
我怎样才能查询上面的 table 以获得以下结果?:
month region revenue
------ -------- ----------
jan north 100
feb north 150
mar north 250
apr north 0
may north 0
jun north 0
0 可以为空,反之亦然。本质上是尝试将 empty/null 字段添加到我的查询中(在本例中为 apr、may、jun 行)。任何帮助将不胜感激
谢谢
一个选项 - 运行 a LEFT/RIGHT JOIN 与您要查看的值列表。
让我们从缺少 nulls/zeros:
的查询开始#standardSQL
SELECT year, SUM(number) c
FROM `bigquery-public-data.usa_names.usa_1910_current`
WHERE name='Felipe'
AND year>2014
GROUP BY year
ORDER BY year
如果我们想为 2015 年之前的值取 0:
SELECT b.year, IFNULL(c, 0) c
FROM (
SELECT year, SUM(number) c
FROM `bigquery-public-data.usa_names.usa_1910_current`
WHERE name='Felipe'
AND year>2014
GROUP BY year
) a
RIGHT JOIN (
SELECT year FROM UNNEST(GENERATE_ARRAY(2012, 2016)) year
) b
ON a.year=b.year
ORDER BY year
相关子查询也可以挽救局面:
SELECT year, (
SELECT IFNULL(SUM(number), 0)
FROM `bigquery-public-data.usa_names.usa_1910_current` a
WHERE name='Felipe'
AND year>2014
AND a.year=b.year
) c
FROM (SELECT year FROM UNNEST(GENERATE_ARRAY(2012, 2016)) year) b
ORDER BY year
以下适用于旧版 BigQuery SQL,但请注意 - 强烈建议 BigQuery 团队迁移到 BigQuery Standard SQL
下面的例子应该会给你一个想法
#legacySQL
SELECT
months.month_abr AS month_abr,
regions.region AS region,
COALESCE(revenues.revenue, 0) revenue
FROM months
CROSS JOIN (
SELECT region FROM revenues
) regions
LEFT JOIN revenues
ON months.month_abr = revenues.month_abr
AND regions.region = revenues.region
-- ORDER BY regions.region, months.month_number
其中 revenues
是带有收入数据的原始 table,month
是带有月份列表的 table(或者您可以使用子查询,如下例所示)
您可以使用下面的示例使用问题中的虚拟数据测试/播放上面的内容
#legacySQL
SELECT
months.month_abr AS month_abr,
regions.region AS region,
COALESCE(revenues.revenue, 0) revenue
FROM (
SELECT month_number, month_abr FROM
(SELECT 1 month_number, 'jan' month_abr),
(SELECT 2 month_number, 'feb' month_abr),
(SELECT 3 month_number, 'mar' month_abr),
(SELECT 4 month_number, 'apr' month_abr),
(SELECT 5 month_number, 'may' month_abr),
(SELECT 6 month_number, 'jun' month_abr)
) AS months
CROSS JOIN (
SELECT region FROM (
SELECT region FROM
(SELECT 'jan' month_abr, 'north' region, 100 revenue),
(SELECT 'feb' month_abr, 'north' region, 150 revenue),
(SELECT 'mar' month_abr, 'north' region, 250 revenue)
) GROUP BY region
) regions
LEFT JOIN (
SELECT month_abr, region, revenue FROM
(SELECT 'jan' month_abr, 'north' region, 100 revenue),
(SELECT 'feb' month_abr, 'north' region, 150 revenue),
(SELECT 'mar' month_abr, 'north' region, 250 revenue)
) AS revenues
ON months.month_abr = revenues.month_abr
AND regions.region = revenues.region
ORDER BY regions.region, months.month_number
结果如下
Row month_abr region revenue
1 jan north 100
2 feb north 150
3 mar north 250
4 apr north 0
5 may north 0
6 jun north 0
最后 - 下面是 BigQuery Standard SQL
#standardSQL
WITH regions AS (
SELECT DISTINCT region FROM revenues
), months AS (
SELECT EXTRACT(MONTH FROM month) month_number,
LOWER(FORMAT_DATE('%b', month)) month_abr
FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2010-01-01', DATE '2010-12-01', INTERVAL 1 MONTH)) month
)
SELECT month_abr, region, COALESCE(revenues.revenue, 0) revenue
FROM months
CROSS JOIN regions
LEFT JOIN revenues
USING(month_abr, region)
ORDER BY region, month_number
你可以测试,使用你问题中的虚拟数据来玩这个
#standardSQL
WITH revenues AS (
SELECT 'jan' month_abr, 'north' region, 100 revenue UNION ALL
SELECT 'feb', 'north', 150 UNION ALL
SELECT 'mar', 'north', 250
), regions AS (
SELECT DISTINCT region FROM revenues
), months AS (
SELECT EXTRACT(MONTH FROM month) month_number,
LOWER(FORMAT_DATE('%b', month)) month_abr
FROM UNNEST(GENERATE_DATE_ARRAY(DATE '2010-01-01', DATE '2010-12-01', INTERVAL 1 MONTH)) month
)
SELECT month_abr, region, COALESCE(revenues.revenue, 0) revenue
FROM months
CROSS JOIN regions
LEFT JOIN revenues
USING(month_abr, region)
ORDER BY region, month_number
您应该能够将以上应用到您的实际用例中