根据在另一个 table 中配置的时间 window 周期聚合事件 table
Aggregating over an Event table based on time-window periods in configured in another table
我有三个表,UpEvent、DownEvent 和 AnalysisWindow
UpEvent:
up_event_id | event_date | EventMetric
1 2015-01-01T06:00:00 54
2 2015-01-01T07:30:00 76
DownEvent:
down_event_id | event_date | EventMetric
1 2015-01-01T06:46:00 22
2 2015-01-01T07:33:00 34
AnalysisWindow:
window_id | win_start | win_end
1 2015-01-01T00:00:00 2015-01-01T04:00:00
2 2015-01-01T00:00:00 2015-01-01T08:00:00
.
.
我想在每个 AnalysisWindow 上进行分析,以便汇总在定义的 window 之间发生的 UpEvent 和 DownEvent。
所以对于每个 AnalysisWindow 记录,我最终会得到 1 个特征行:
WinStart | WinEnd | TotalUpEvents | TotalDownEvents
2015-01-01T00:00:00 2015-01-01T04:00:00 0 0
2015-01-01T00:00:00 2015-01-01T08:00:00 2 2
我的第一个想法是做类似的事情
select win.win_start,
win.win_end,
count(ue.*),
sum(ue.EventMetric)
from AnalysisWindow win
left join UpEvent ue on (ue.event_date between win.win_start and win.win_end)
这显然行不通。
我是否错误地处理了这个问题?我想对我配置的各种 windows 处的表进行 windowed 分析,每个 window
获得 1 条聚合记录
一种方法使用相关子查询:
select aw.*,
(select count(*)
from UpEvent ue
where ue.event_date between aw.win_start and aw.win_end)
) as ups,
(select count(*)
from DownEvent de
where de.event_date between aw.win_start and aw.win_end)
) as downs
from AnalysisWindow aw;
以上是有效的,至少当表述为:
with UpEvent as (
select 1 as up_event_id, '2015-01-01T06:00:00' as event_date, 54 as EventMetric union all
select 2, '2015-01-01T07:30:00', 76
),
DownEvent as (
select 1 as down_event_id, '2015-01-01T06:46:00' as event_date, 22 as EventMetric union all
select 2, '2015-01-01T07:33:00', 34
),
AnalysisWindow as (
select 1 as window_id , '2015-01-01T00:00:00' as win_start, '2015-01-01T04:00:00' as win_end union all
select 2, '2015-01-01T00:00:00', '2015-01-01T08:00:00'
)
select aw.*,
(select count(*)
from UpEvent ue
where ue.event_date between aw.win_start and aw.win_end
) as ups,
(select count(*)
from DownEvent de
where de.event_date between aw.win_start and aw.win_end
) as downs
from AnalysisWindow aw;
替代方法是使用 union all
:
ud as (
select event_date, 1 as ups, 0 as downs from upevent
union all
select event_date, 0 as ups, 1 as downs from downevent
)
select aw.window_id, aw.win_start, aw.win_end, sum(ups), sum(downs)
from AnalysisWindow aw join
ud
ON ud.event_date between aw.win_start and aw.win_end
group by aw.window_id, aw.win_start, aw.win_end
union all
select aw.window_id, aw.win_start, aw.win_end, 0, 0
from AnalysisWindow aw
where not exists (select 1 from ud where ud.event_date between aw.win_start and aw.win_end)
以下适用于 BigQuery 标准 SQL(并且确实有效!)
#standardSQL
WITH ue_win AS (
SELECT
window_id, COUNT(1) TotalUpEvents
FROM `project.dataset.AnalysisWindow` win
CROSS JOIN `project.dataset.UpEvent` ue
WHERE ue.event_date BETWEEN win.win_start AND win.win_end
GROUP BY window_id
), de_win AS (
SELECT
window_id, COUNT(1) TotalDownEvents
FROM `project.dataset.AnalysisWindow` win
CROSS JOIN `project.dataset.DownEvent` de
WHERE de.event_date BETWEEN win.win_start AND win.win_end
GROUP BY window_id
)
SELECT
window_id, win_start, win_end,
IFNULL(TotalUpEvents, 0) TotalUpEvents,
IFNULL(TotalDownEvents, 0) TotalDownEvents
FROM `project.dataset.AnalysisWindow` win
LEFT JOIN ue_win USING(window_id)
LEFT JOIN de_win USING(window_id)
我有三个表,UpEvent、DownEvent 和 AnalysisWindow
UpEvent:
up_event_id | event_date | EventMetric
1 2015-01-01T06:00:00 54
2 2015-01-01T07:30:00 76
DownEvent:
down_event_id | event_date | EventMetric
1 2015-01-01T06:46:00 22
2 2015-01-01T07:33:00 34
AnalysisWindow:
window_id | win_start | win_end
1 2015-01-01T00:00:00 2015-01-01T04:00:00
2 2015-01-01T00:00:00 2015-01-01T08:00:00
.
.
我想在每个 AnalysisWindow 上进行分析,以便汇总在定义的 window 之间发生的 UpEvent 和 DownEvent。
所以对于每个 AnalysisWindow 记录,我最终会得到 1 个特征行:
WinStart | WinEnd | TotalUpEvents | TotalDownEvents
2015-01-01T00:00:00 2015-01-01T04:00:00 0 0
2015-01-01T00:00:00 2015-01-01T08:00:00 2 2
我的第一个想法是做类似的事情
select win.win_start,
win.win_end,
count(ue.*),
sum(ue.EventMetric)
from AnalysisWindow win
left join UpEvent ue on (ue.event_date between win.win_start and win.win_end)
这显然行不通。
我是否错误地处理了这个问题?我想对我配置的各种 windows 处的表进行 windowed 分析,每个 window
获得 1 条聚合记录一种方法使用相关子查询:
select aw.*,
(select count(*)
from UpEvent ue
where ue.event_date between aw.win_start and aw.win_end)
) as ups,
(select count(*)
from DownEvent de
where de.event_date between aw.win_start and aw.win_end)
) as downs
from AnalysisWindow aw;
以上是有效的,至少当表述为:
with UpEvent as (
select 1 as up_event_id, '2015-01-01T06:00:00' as event_date, 54 as EventMetric union all
select 2, '2015-01-01T07:30:00', 76
),
DownEvent as (
select 1 as down_event_id, '2015-01-01T06:46:00' as event_date, 22 as EventMetric union all
select 2, '2015-01-01T07:33:00', 34
),
AnalysisWindow as (
select 1 as window_id , '2015-01-01T00:00:00' as win_start, '2015-01-01T04:00:00' as win_end union all
select 2, '2015-01-01T00:00:00', '2015-01-01T08:00:00'
)
select aw.*,
(select count(*)
from UpEvent ue
where ue.event_date between aw.win_start and aw.win_end
) as ups,
(select count(*)
from DownEvent de
where de.event_date between aw.win_start and aw.win_end
) as downs
from AnalysisWindow aw;
替代方法是使用 union all
:
ud as (
select event_date, 1 as ups, 0 as downs from upevent
union all
select event_date, 0 as ups, 1 as downs from downevent
)
select aw.window_id, aw.win_start, aw.win_end, sum(ups), sum(downs)
from AnalysisWindow aw join
ud
ON ud.event_date between aw.win_start and aw.win_end
group by aw.window_id, aw.win_start, aw.win_end
union all
select aw.window_id, aw.win_start, aw.win_end, 0, 0
from AnalysisWindow aw
where not exists (select 1 from ud where ud.event_date between aw.win_start and aw.win_end)
以下适用于 BigQuery 标准 SQL(并且确实有效!)
#standardSQL
WITH ue_win AS (
SELECT
window_id, COUNT(1) TotalUpEvents
FROM `project.dataset.AnalysisWindow` win
CROSS JOIN `project.dataset.UpEvent` ue
WHERE ue.event_date BETWEEN win.win_start AND win.win_end
GROUP BY window_id
), de_win AS (
SELECT
window_id, COUNT(1) TotalDownEvents
FROM `project.dataset.AnalysisWindow` win
CROSS JOIN `project.dataset.DownEvent` de
WHERE de.event_date BETWEEN win.win_start AND win.win_end
GROUP BY window_id
)
SELECT
window_id, win_start, win_end,
IFNULL(TotalUpEvents, 0) TotalUpEvents,
IFNULL(TotalDownEvents, 0) TotalDownEvents
FROM `project.dataset.AnalysisWindow` win
LEFT JOIN ue_win USING(window_id)
LEFT JOIN de_win USING(window_id)