BigQuery 的最长连续天数
Longest Consecutive Days Count for BigQuery
现在我只有一个用户工作了多少天的总和。我正在尝试将此查询更改为最连续的工作天数。
其中 u12345 将是 4
而 u1 将是 2
.
这可能与 BigQuery 语句有关吗?
EDIT 我正在 Kind of close 进行以下查询,但我的 u1 得到的是 3 而不是 2.
SELECT MIN(e.timestamp) as date_created, e.uid, COUNT(e.uid) + 1 AS streak
FROM OnSite e
LEFT JOIN OnSite ee
ON e.uid = ee.uid
AND DATE(e.timestamp) = DATE(DATE_ADD(ee.timestamp, INTERVAL -1 DAY))
WHERE ee.uid IS NOT NULL
GROUP BY e.uid;
架构(MySQL v5.7)
CREATE TABLE OnSite
(`uid` varchar(55), `worksite_id` varchar(55), `timestamp` datetime)
;
INSERT INTO OnSite
(`uid`, `worksite_id`, `timestamp`)
VALUES
("u12345", "worksite_1", '2019-01-01'),
("u12345", "worksite_1", '2019-01-02'),
("u12345", "worksite_1", '2019-01-03'),
("u12345", "worksite_1", '2019-01-04'),
("u12345", "worksite_1", '2019-01-06'),
("u1", "worksite_1", '2019-01-01'),
("u1", "worksite_1", '2019-01-02'),
("u1", "worksite_1", '2019-01-05'),
("u1", "worksite_1", '2019-01-06')
;
查询#1
SELECT uid, COUNT(DISTINCT timestamp) Total
FROM OnSite
GROUP BY uid;
| uid | Total |
| ------ | ----- |
| u1 | 4 |
| u12345 | 5 |
这适合你吗?
set @gr=1;
select uid, max(cnt) max_cnt from (
select uid, grp, count(*) cnt from (
select uid,
case when ifnull(DATE_ADD(oldDate, INTERVAL 1 DAY), timestamp)= timestamp then
@gr
else
@gr := @gr +1
end grp
from
(
SELECT
uid,
timestamp,
lag(timestamp) over (partition by uid order by timestamp asc) as oldDate
FROM OnSite
) t
)t2
group by uid, grp
)t3
group by uid
结果
| uid | max_cnt |
| ------ | ------- |
| u1 | 2 |
| u12345 | 4 |
以下适用于 BigQuery 标准 SQL
如果您对同一工作站点上用户的最大连续天数感兴趣:
#standardSQL
SELECT uid, MAX(consecuitive_days) max_consecuitive_days
FROM (
SELECT uid, grp, COUNT(1) consecuitive_days
FROM (
SELECT uid,
COUNTIF(step > 1) OVER(PARTITION BY uid, worksite_id ORDER BY ts) grp
FROM (
SELECT uid, worksite_id, ts,
DATE_DIFF(ts, LAG(ts) OVER(PARTITION BY uid, worksite_id ORDER BY ts), DAY) step
FROM `project.dataset.table`
)
) GROUP BY uid, grp
) GROUP BY uid
如果工作地点无关紧要,而您只是在寻找最大连续天数:
#standardSQL
SELECT uid, MAX(consecuitive_days) max_consecuitive_days
FROM (
SELECT uid, grp, COUNT(1) consecuitive_days
FROM (
SELECT uid,
COUNTIF(step > 1) OVER(PARTITION BY uid ORDER BY ts) grp
FROM (
SELECT uid, ts,
DATE_DIFF(ts, LAG(ts) OVER(PARTITION BY uid ORDER BY ts), DAY) step
FROM `project.dataset.table`
)
) GROUP BY uid, grp
) GROUP BY uid
您可以使用您问题中的示例数据来测试、播放以上任何内容,如下例所示
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'u12345' uid, 'worksite_1' worksite_id, DATE '2019-01-01' ts UNION ALL
SELECT 'u12345', 'worksite_1', '2019-01-02' UNION ALL
SELECT 'u12345', 'worksite_1', '2019-01-03' UNION ALL
SELECT 'u12345', 'worksite_1', '2019-01-04' UNION ALL
SELECT 'u12345', 'worksite_1', '2019-01-06' UNION ALL
SELECT 'u1', 'worksite_1', '2019-01-01' UNION ALL
SELECT 'u1', 'worksite_1', '2019-01-02' UNION ALL
SELECT 'u1', 'worksite_1', '2019-01-05' UNION ALL
SELECT 'u1', 'worksite_1', '2019-01-06'
)
SELECT uid, MAX(consecuitive_days) max_consecuitive_days
FROM (
SELECT uid, grp, COUNT(1) consecuitive_days
FROM (
SELECT uid,
COUNTIF(step > 1) OVER(PARTITION BY uid ORDER BY ts) grp
FROM (
SELECT uid, ts,
DATE_DIFF(ts, LAG(ts) OVER(PARTITION BY uid ORDER BY ts), DAY) step
FROM `project.dataset.table`
)
) GROUP BY uid, grp
) GROUP BY uid
结果:
Row uid max_consecuitive_days
1 u12345 4
2 u1 2
现在我只有一个用户工作了多少天的总和。我正在尝试将此查询更改为最连续的工作天数。
其中 u12345 将是 4
而 u1 将是 2
.
这可能与 BigQuery 语句有关吗?
EDIT 我正在 Kind of close 进行以下查询,但我的 u1 得到的是 3 而不是 2.
SELECT MIN(e.timestamp) as date_created, e.uid, COUNT(e.uid) + 1 AS streak
FROM OnSite e
LEFT JOIN OnSite ee
ON e.uid = ee.uid
AND DATE(e.timestamp) = DATE(DATE_ADD(ee.timestamp, INTERVAL -1 DAY))
WHERE ee.uid IS NOT NULL
GROUP BY e.uid;
架构(MySQL v5.7)
CREATE TABLE OnSite
(`uid` varchar(55), `worksite_id` varchar(55), `timestamp` datetime)
;
INSERT INTO OnSite
(`uid`, `worksite_id`, `timestamp`)
VALUES
("u12345", "worksite_1", '2019-01-01'),
("u12345", "worksite_1", '2019-01-02'),
("u12345", "worksite_1", '2019-01-03'),
("u12345", "worksite_1", '2019-01-04'),
("u12345", "worksite_1", '2019-01-06'),
("u1", "worksite_1", '2019-01-01'),
("u1", "worksite_1", '2019-01-02'),
("u1", "worksite_1", '2019-01-05'),
("u1", "worksite_1", '2019-01-06')
;
查询#1
SELECT uid, COUNT(DISTINCT timestamp) Total
FROM OnSite
GROUP BY uid;
| uid | Total |
| ------ | ----- |
| u1 | 4 |
| u12345 | 5 |
这适合你吗?
set @gr=1;
select uid, max(cnt) max_cnt from (
select uid, grp, count(*) cnt from (
select uid,
case when ifnull(DATE_ADD(oldDate, INTERVAL 1 DAY), timestamp)= timestamp then
@gr
else
@gr := @gr +1
end grp
from
(
SELECT
uid,
timestamp,
lag(timestamp) over (partition by uid order by timestamp asc) as oldDate
FROM OnSite
) t
)t2
group by uid, grp
)t3
group by uid
结果
| uid | max_cnt |
| ------ | ------- |
| u1 | 2 |
| u12345 | 4 |
以下适用于 BigQuery 标准 SQL
如果您对同一工作站点上用户的最大连续天数感兴趣:
#standardSQL
SELECT uid, MAX(consecuitive_days) max_consecuitive_days
FROM (
SELECT uid, grp, COUNT(1) consecuitive_days
FROM (
SELECT uid,
COUNTIF(step > 1) OVER(PARTITION BY uid, worksite_id ORDER BY ts) grp
FROM (
SELECT uid, worksite_id, ts,
DATE_DIFF(ts, LAG(ts) OVER(PARTITION BY uid, worksite_id ORDER BY ts), DAY) step
FROM `project.dataset.table`
)
) GROUP BY uid, grp
) GROUP BY uid
如果工作地点无关紧要,而您只是在寻找最大连续天数:
#standardSQL
SELECT uid, MAX(consecuitive_days) max_consecuitive_days
FROM (
SELECT uid, grp, COUNT(1) consecuitive_days
FROM (
SELECT uid,
COUNTIF(step > 1) OVER(PARTITION BY uid ORDER BY ts) grp
FROM (
SELECT uid, ts,
DATE_DIFF(ts, LAG(ts) OVER(PARTITION BY uid ORDER BY ts), DAY) step
FROM `project.dataset.table`
)
) GROUP BY uid, grp
) GROUP BY uid
您可以使用您问题中的示例数据来测试、播放以上任何内容,如下例所示
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'u12345' uid, 'worksite_1' worksite_id, DATE '2019-01-01' ts UNION ALL
SELECT 'u12345', 'worksite_1', '2019-01-02' UNION ALL
SELECT 'u12345', 'worksite_1', '2019-01-03' UNION ALL
SELECT 'u12345', 'worksite_1', '2019-01-04' UNION ALL
SELECT 'u12345', 'worksite_1', '2019-01-06' UNION ALL
SELECT 'u1', 'worksite_1', '2019-01-01' UNION ALL
SELECT 'u1', 'worksite_1', '2019-01-02' UNION ALL
SELECT 'u1', 'worksite_1', '2019-01-05' UNION ALL
SELECT 'u1', 'worksite_1', '2019-01-06'
)
SELECT uid, MAX(consecuitive_days) max_consecuitive_days
FROM (
SELECT uid, grp, COUNT(1) consecuitive_days
FROM (
SELECT uid,
COUNTIF(step > 1) OVER(PARTITION BY uid ORDER BY ts) grp
FROM (
SELECT uid, ts,
DATE_DIFF(ts, LAG(ts) OVER(PARTITION BY uid ORDER BY ts), DAY) step
FROM `project.dataset.table`
)
) GROUP BY uid, grp
) GROUP BY uid
结果:
Row uid max_consecuitive_days
1 u12345 4
2 u1 2