将 Table 加入自身以获得缺少日期的去年结果
Join Table to Itself for Last Year Results with Missing Dates
我有一个 table 的日期、频道和会话,我正在尝试使用连接命令为包含去年相关值的每一行添加列,但是,我想包括日期从去年开始,今年没有价值,反之亦然。问题是对于不存在的日期,我的行数加倍了。关于如何解决的任何想法?
SELECT
ty.*,
ly.Date as Date_LY,
ly.Sessions as Sessions_LY
FROM
`testjoin` AS ty
FULL JOIN
`testjoin` as ly
ON
ly.Date = DATE_SUB(ty.Date, INTERVAl 1 YEAR)
AND ly.Channel = ty.Channel
数据:
Date Channel Sessions
01/01/2017 Email 5
02/02/2017 Email 10
01/01/2018 Email 11
02/02/2018 Email 17
01/01/2017 Organic 10
02/02/2017 Organic 15
01/01/2018 Organic 20
期望的输出:
Date Channel Sessions Sessions_LY
01/01/2017 Email 5 null
02/02/2017 Email 10 null
01/01/2018 Email 11 5
02/02/2018 Email 17 10
01/01/2017 Organic 10 null
02/02/2017 Organic 15 null
01/01/2018 Organic 20 10
02/02/2018 Organic null 15
实际输出:
Date Channel Sessions Sessions_LY
01/01/2017 Organic 10
02/02/2017 Email 10
02/02/2017 Organic 15
01/01/2017 Email 5
01/01/2018 Email 11 5
01/01/2018 Organic 20 10
02/02/2018 Email 17 10
15
11
20
17
我想你想要一个 cross join
来生成行和一个 left join
来引入值:
SELECT d.Date, c.Channel, ty.Sessions, ty_prev.Sessions
FROM (SELECT DISTINCT ty.Date
FROM testjoin ty
) d CROSS JOIN
(SELECT DISTINCT ty.channel FROM testjoin ty) c LEFT JOIN
testjoin ty
ON ty.Date = d.Date AND ty.Channel = c.Channel LEFT JOIN
testjoin ty_prev
ON ty_prev.Date = d.date - interval 1 year and ty.Channel = c.Channel;
根据需要使用 datepart
以 t(日期、频道、会话)为
(
select '01/01/2017', 'Email', 5 联合所有
select '02/02/2017', 'Email', 10 联合所有
select '01/01/2018', 'Email', 11 联合所有
select '02/02/2018', 'Email', 17 联合所有
select '01/01/2017', 'Organic', 10 联合所有
select '02/02/2017', 'Organic', 15 联合所有
select '01/01/2018', 'Organic', 20
)
select *, lag(sessions) over (partition by d.channel, datepart(mm, d.date) order by d.channel, datepart(mm, d.date))升
来自 (select * 来自 ((SELECT DISTINCT t.Date
从 t) d
交叉连接
(SELECT DISTINCT t.channel FROM t) c)) d left join t on d.Date = t.Date and d.channel = t.channel
按 d.channel、datepart(yyyy,d.date)、datepart(mm, d.date)<br> 排序
您问题中的所有内容都表明您只有当前(2018 年)和之前(2017 年),因此以下内容基于此假设并且适用于 BigQuery 标准 SQL
#standardSQL
WITH temp AS (
SELECT PARSE_DATE('%m/%d/%Y', Date) Date, Channel, Sessions
FROM `project.dataset.your_table`
), all_days AS (
SELECT Date, Channel FROM temp UNION DISTINCT
SELECT DATE_ADD(Date, INTERVAL 1 YEAR), Channel
FROM temp WHERE EXTRACT(YEAR FROM Date) = 2017
), all_data AS (
SELECT Date, Channel, Sessions, FORMAT_DATE('%m%d', Date) day
FROM all_days
LEFT JOIN temp USING(Date, Channel)
)
SELECT Date, Channel, Sessions,
LAG(Sessions) OVER(PARTITION BY day, Channel ORDER BY Date) Sessions_LY
FROM all_data
您可以使用问题中的虚拟数据测试/玩上面的内容,如下所示
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT '01/01/2017' Date, 'Email' Channel, 5 Sessions UNION ALL
SELECT '02/02/2017', 'Email', 10 UNION ALL
SELECT '01/01/2018', 'Email', 11 UNION ALL
SELECT '02/02/2018', 'Email', 17 UNION ALL
SELECT '01/01/2017', 'Organic', 10 UNION ALL
SELECT '02/02/2017', 'Organic', 15 UNION ALL
SELECT '01/01/2018', 'Organic', 20
), temp AS (
SELECT PARSE_DATE('%m/%d/%Y', Date) Date, Channel, Sessions
FROM `project.dataset.your_table`
), all_days AS (
SELECT Date, Channel FROM temp UNION DISTINCT
SELECT DATE_ADD(Date, INTERVAL 1 YEAR), Channel
FROM temp WHERE EXTRACT(YEAR FROM Date) = 2017
), all_data AS (
SELECT Date, Channel, Sessions, FORMAT_DATE('%m%d', Date) day
FROM all_days
LEFT JOIN temp USING(Date, Channel)
)
SELECT Date, Channel, Sessions,
LAG(Sessions) OVER(PARTITION BY day, Channel ORDER BY Date) Sessions_LY
FROM all_data
ORDER BY 2, 1
结果是
Row Date Channel Sessions Sessions_LY
1 2017-01-01 Email 5 null
2 2017-02-02 Email 10 null
3 2018-01-01 Email 11 5
4 2018-02-02 Email 17 10
5 2017-01-01 Organic 10 null
6 2017-02-02 Organic 15 null
7 2018-01-01 Organic 20 10
8 2018-02-02 Organic null 15
我有一个 table 的日期、频道和会话,我正在尝试使用连接命令为包含去年相关值的每一行添加列,但是,我想包括日期从去年开始,今年没有价值,反之亦然。问题是对于不存在的日期,我的行数加倍了。关于如何解决的任何想法?
SELECT
ty.*,
ly.Date as Date_LY,
ly.Sessions as Sessions_LY
FROM
`testjoin` AS ty
FULL JOIN
`testjoin` as ly
ON
ly.Date = DATE_SUB(ty.Date, INTERVAl 1 YEAR)
AND ly.Channel = ty.Channel
数据:
Date Channel Sessions
01/01/2017 Email 5
02/02/2017 Email 10
01/01/2018 Email 11
02/02/2018 Email 17
01/01/2017 Organic 10
02/02/2017 Organic 15
01/01/2018 Organic 20
期望的输出:
Date Channel Sessions Sessions_LY
01/01/2017 Email 5 null
02/02/2017 Email 10 null
01/01/2018 Email 11 5
02/02/2018 Email 17 10
01/01/2017 Organic 10 null
02/02/2017 Organic 15 null
01/01/2018 Organic 20 10
02/02/2018 Organic null 15
实际输出:
Date Channel Sessions Sessions_LY
01/01/2017 Organic 10
02/02/2017 Email 10
02/02/2017 Organic 15
01/01/2017 Email 5
01/01/2018 Email 11 5
01/01/2018 Organic 20 10
02/02/2018 Email 17 10
15
11
20
17
我想你想要一个 cross join
来生成行和一个 left join
来引入值:
SELECT d.Date, c.Channel, ty.Sessions, ty_prev.Sessions
FROM (SELECT DISTINCT ty.Date
FROM testjoin ty
) d CROSS JOIN
(SELECT DISTINCT ty.channel FROM testjoin ty) c LEFT JOIN
testjoin ty
ON ty.Date = d.Date AND ty.Channel = c.Channel LEFT JOIN
testjoin ty_prev
ON ty_prev.Date = d.date - interval 1 year and ty.Channel = c.Channel;
根据需要使用 datepart
以 t(日期、频道、会话)为
(
select '01/01/2017', 'Email', 5 联合所有
select '02/02/2017', 'Email', 10 联合所有
select '01/01/2018', 'Email', 11 联合所有
select '02/02/2018', 'Email', 17 联合所有
select '01/01/2017', 'Organic', 10 联合所有
select '02/02/2017', 'Organic', 15 联合所有
select '01/01/2018', 'Organic', 20
)
select *, lag(sessions) over (partition by d.channel, datepart(mm, d.date) order by d.channel, datepart(mm, d.date))升
来自 (select * 来自 ((SELECT DISTINCT t.Date
从 t) d
交叉连接
(SELECT DISTINCT t.channel FROM t) c)) d left join t on d.Date = t.Date and d.channel = t.channel
按 d.channel、datepart(yyyy,d.date)、datepart(mm, d.date)<br> 排序
您问题中的所有内容都表明您只有当前(2018 年)和之前(2017 年),因此以下内容基于此假设并且适用于 BigQuery 标准 SQL
#standardSQL
WITH temp AS (
SELECT PARSE_DATE('%m/%d/%Y', Date) Date, Channel, Sessions
FROM `project.dataset.your_table`
), all_days AS (
SELECT Date, Channel FROM temp UNION DISTINCT
SELECT DATE_ADD(Date, INTERVAL 1 YEAR), Channel
FROM temp WHERE EXTRACT(YEAR FROM Date) = 2017
), all_data AS (
SELECT Date, Channel, Sessions, FORMAT_DATE('%m%d', Date) day
FROM all_days
LEFT JOIN temp USING(Date, Channel)
)
SELECT Date, Channel, Sessions,
LAG(Sessions) OVER(PARTITION BY day, Channel ORDER BY Date) Sessions_LY
FROM all_data
您可以使用问题中的虚拟数据测试/玩上面的内容,如下所示
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT '01/01/2017' Date, 'Email' Channel, 5 Sessions UNION ALL
SELECT '02/02/2017', 'Email', 10 UNION ALL
SELECT '01/01/2018', 'Email', 11 UNION ALL
SELECT '02/02/2018', 'Email', 17 UNION ALL
SELECT '01/01/2017', 'Organic', 10 UNION ALL
SELECT '02/02/2017', 'Organic', 15 UNION ALL
SELECT '01/01/2018', 'Organic', 20
), temp AS (
SELECT PARSE_DATE('%m/%d/%Y', Date) Date, Channel, Sessions
FROM `project.dataset.your_table`
), all_days AS (
SELECT Date, Channel FROM temp UNION DISTINCT
SELECT DATE_ADD(Date, INTERVAL 1 YEAR), Channel
FROM temp WHERE EXTRACT(YEAR FROM Date) = 2017
), all_data AS (
SELECT Date, Channel, Sessions, FORMAT_DATE('%m%d', Date) day
FROM all_days
LEFT JOIN temp USING(Date, Channel)
)
SELECT Date, Channel, Sessions,
LAG(Sessions) OVER(PARTITION BY day, Channel ORDER BY Date) Sessions_LY
FROM all_data
ORDER BY 2, 1
结果是
Row Date Channel Sessions Sessions_LY
1 2017-01-01 Email 5 null
2 2017-02-02 Email 10 null
3 2018-01-01 Email 11 5
4 2018-02-02 Email 17 10
5 2017-01-01 Organic 10 null
6 2017-02-02 Organic 15 null
7 2018-01-01 Organic 20 10
8 2018-02-02 Organic null 15