重写查询以使用分析函数
Rewrite query to use Analytic Functions
我有一个 Table 事件记录 Insert、Update 和 D 事件元素。
见MWE她:http://sqlfiddle.com/#!4/6c2cb1/1
DDL 语句
CREATE TABLE "EVENTS"
(
"EVENT_ID" VARCHAR2(30 CHAR), --Name of the Event
"EVENT_LOCATION" VARCHAR2(60 CHAR), --Location on which the event occured
"EVENT_TRIGGER" VARCHAR2(2 CHAR), --Trigger which protocolled the event (I,U or D)
"EVENT_CHANGE_ID" NUMBER, --Unique Sequence Number
"EVENT_CHANGE_DATE" DATE DEFAULT SYSTIMESTAMP
);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT1','LOC1','I',1,SYSTIMESTAMP-1);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT1','LOC2','U',11,SYSTIMESTAMP-1);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT1','LOC4','U',117,SYSTIMESTAMP-1);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT1','LOC7','D',1430,SYSTIMESTAMP-1);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT2','LOC1','I',2,SYSTIMESTAMP-1/48);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT2','LOC2','U',131,SYSTIMESTAMP-1/48);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT2','LOC5','D',11337,SYSTIMESTAMP-1/48);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT2','LOC7','D',14430,SYSTIMESTAMP-1/48);
我想确定 I在 LOC1 插入和 D在 LOC7 删除而没有任何 [=23] 的事件数量=]D之间的元素。
SELECT COUNT(*) AS QTY, TRUNC(A.EVENT_CHANGE_DATE) AS DAY
FROM (
SELECT EVENT_ID, EVENT_CHANGE_ID, EVENT_CHANGE_DATE FROM EVENTS WHERE EVENT_TRIGGER = 'I' AND EVENT_LOCATION = 'LOC1'
) A,
(SELECT EVENT_ID, EVENT_CHANGE_ID, EVENT_CHANGE_DATE FROM EVENTS WHERE EVENT_TRIGGER = 'D' AND EVENT_LOCATION = 'LOC7')
B
WHERE B.EVENT_CHANGE_ID > A.EVENT_CHANGE_ID AND A.EVENT_ID = B.EVENT_ID
AND not exists (SELECT EVENT_ID, EVENT_CHANGE_ID, EVENT_CHANGE_DATE FROM EVENTS WHERE EVENT_TRIGGER = 'D' AND EVENT_CHANGE_ID > A.EVENT_CHANGE_ID AND EVENT_CHANGE_ID < B.EVENT_CHANGE_ID and EVENT_ID = A.EVENT_ID)
group by TRUNC(A.EVENT_CHANGE_DATE)
ORDER BY TRUNC(A.EVENT_CHANGE_DATE);
我天真的方法可行,但我想知道是否可以使用分析函数重写此查询。
原始 Tables 包含多达 100 万条记录,3 次完整 Table 扫描在执行时间和性能方面毫无意义。
是否可以使用分析函数使此查询更高效?
这看起来很适合 SQL 模式匹配:
select * from events
match_recognize (
partition by event_id
order by event_change_date
measures
count ( ins.* ) ins_count,
min ( event_change_date ) dt
pattern ( ins upd* del )
define
ins as event_trigger = 'I' and event_location = 'LOC1',
upd as event_trigger = 'U',
del as event_trigger = 'D' and event_location = 'LOC7'
);
INS_COUNT DT
1 16-MAR-2020 12:33:58
这会在 LOC1 处搜索 I(nserts),然后在 LOC7 处搜索 D(elete),中间有任意数量的 U(pdates)。
仅使用经典解析函数。
仅过滤相关事件
(EVENT_TRIGGER = 'I' AND EVENT_LOCATION = 'LOC1') OR -- only LOC1 inserts
EVENT_TRIGGER = 'D') -- all deletes
然后LEAD
下一个D
删除并检查位置
with evnt as
(
select EVENT_ID, EVENT_LOCATION, EVENT_TRIGGER, EVENT_CHANGE_DATE,
lead(EVENT_TRIGGER) over (PARTITION BY EVENT_ID
order by EVENT_CHANGE_DATE, EVENT_LOCATION)
as EVENT_TRIGGER_LEAD,
lead(EVENT_LOCATION) over (PARTITION BY EVENT_ID
order by EVENT_CHANGE_DATE, EVENT_LOCATION)
as EVENT_LOCATION_LEAD
from EVENTS
where (EVENT_TRIGGER = 'I' AND EVENT_LOCATION = 'LOC1') OR EVENT_TRIGGER = 'D'
)
select
EVENT_ID, EVENT_LOCATION, EVENT_TRIGGER, EVENT_CHANGE_DATE,
EVENT_TRIGGER_LEAD, EVENT_LOCATION_LEAD
from evnt
where EVENT_TRIGGER = 'I'
and EVENT_TRIGGER_LEAD = 'D'
and EVENT_LOCATION_LEAD = 'LOC7'
order by EVENT_ID, EVENT_CHANGE_DATE, EVENT_LOCATION;
你可以用解析函数SUM
在结果LOC1
和I
时加1,在[=15=时加-1
],则最终结果将是具有 sum = 0
和 location as LOC7
.
的记录
查看答案:
SQL> SELECT EVENT_ID FROM
2 ( SELECT SUM(CASE
3 WHEN EVENT_LOCATION = 'LOC1' AND EVENT_TRIGGER = 'I' THEN 1
4 WHEN EVENT_TRIGGER = 'D' THEN - 1
5 END) OVER( PARTITION BY EVENT_ID ORDER BY EVENT_CHANGE_DATE ) AS SM,
6 T.*
7 FROM EVENTS T
8 ) T
9 WHERE EVENT_LOCATION = 'LOC7' AND SM = 0;
EVENT_ID
------------
EVENT1
SQL>
干杯!!
使用 LEAD 分析函数:
SELECT COUNT(*) as qty,
TRUNC(event_change_date)day
FROM(
SELECT
event_location,
event_trigger,
event_change_date,
lead(event_trigger)
OVER(PARTITION BY trunc(event_change_date)
ORDER BY to_number(substr(event_location, - 1, 1))) rn
FROM events
) WHERE event_trigger <> 'D'
AND rn <> 'D'
AND event_trigger = rn
GROUP BY trunc(event_change_date);
QTY DAY
---------- --------
1 16-03-20
逻辑:
- 将每天的事件分组,并使用 SUBSTR 根据从 1 到 7 的位置对它们进行排序,并从字符串的反面获取数字。
- 使用 LEAD 比较 event_trigger 和它的铅。
- 每个日期的PARTITIONED组中的event_trigger不应该有1到7的DELETE。
我有一个 Table 事件记录 Insert、Update 和 D 事件元素。 见MWE她:http://sqlfiddle.com/#!4/6c2cb1/1
DDL 语句
CREATE TABLE "EVENTS"
(
"EVENT_ID" VARCHAR2(30 CHAR), --Name of the Event
"EVENT_LOCATION" VARCHAR2(60 CHAR), --Location on which the event occured
"EVENT_TRIGGER" VARCHAR2(2 CHAR), --Trigger which protocolled the event (I,U or D)
"EVENT_CHANGE_ID" NUMBER, --Unique Sequence Number
"EVENT_CHANGE_DATE" DATE DEFAULT SYSTIMESTAMP
);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT1','LOC1','I',1,SYSTIMESTAMP-1);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT1','LOC2','U',11,SYSTIMESTAMP-1);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT1','LOC4','U',117,SYSTIMESTAMP-1);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT1','LOC7','D',1430,SYSTIMESTAMP-1);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT2','LOC1','I',2,SYSTIMESTAMP-1/48);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT2','LOC2','U',131,SYSTIMESTAMP-1/48);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT2','LOC5','D',11337,SYSTIMESTAMP-1/48);
INSERT INTO EVENTS (EVENT_ID,EVENT_LOCATION,EVENT_TRIGGER,EVENT_CHANGE_ID,EVENT_CHANGE_DATE)
VALUES ('EVENT2','LOC7','D',14430,SYSTIMESTAMP-1/48);
我想确定 I在 LOC1 插入和 D在 LOC7 删除而没有任何 [=23] 的事件数量=]D之间的元素。
SELECT COUNT(*) AS QTY, TRUNC(A.EVENT_CHANGE_DATE) AS DAY
FROM (
SELECT EVENT_ID, EVENT_CHANGE_ID, EVENT_CHANGE_DATE FROM EVENTS WHERE EVENT_TRIGGER = 'I' AND EVENT_LOCATION = 'LOC1'
) A,
(SELECT EVENT_ID, EVENT_CHANGE_ID, EVENT_CHANGE_DATE FROM EVENTS WHERE EVENT_TRIGGER = 'D' AND EVENT_LOCATION = 'LOC7')
B
WHERE B.EVENT_CHANGE_ID > A.EVENT_CHANGE_ID AND A.EVENT_ID = B.EVENT_ID
AND not exists (SELECT EVENT_ID, EVENT_CHANGE_ID, EVENT_CHANGE_DATE FROM EVENTS WHERE EVENT_TRIGGER = 'D' AND EVENT_CHANGE_ID > A.EVENT_CHANGE_ID AND EVENT_CHANGE_ID < B.EVENT_CHANGE_ID and EVENT_ID = A.EVENT_ID)
group by TRUNC(A.EVENT_CHANGE_DATE)
ORDER BY TRUNC(A.EVENT_CHANGE_DATE);
我天真的方法可行,但我想知道是否可以使用分析函数重写此查询。 原始 Tables 包含多达 100 万条记录,3 次完整 Table 扫描在执行时间和性能方面毫无意义。
是否可以使用分析函数使此查询更高效?
这看起来很适合 SQL 模式匹配:
select * from events
match_recognize (
partition by event_id
order by event_change_date
measures
count ( ins.* ) ins_count,
min ( event_change_date ) dt
pattern ( ins upd* del )
define
ins as event_trigger = 'I' and event_location = 'LOC1',
upd as event_trigger = 'U',
del as event_trigger = 'D' and event_location = 'LOC7'
);
INS_COUNT DT
1 16-MAR-2020 12:33:58
这会在 LOC1 处搜索 I(nserts),然后在 LOC7 处搜索 D(elete),中间有任意数量的 U(pdates)。
仅使用经典解析函数。
仅过滤相关事件
(EVENT_TRIGGER = 'I' AND EVENT_LOCATION = 'LOC1') OR -- only LOC1 inserts
EVENT_TRIGGER = 'D') -- all deletes
然后LEAD
下一个D
删除并检查位置
with evnt as
(
select EVENT_ID, EVENT_LOCATION, EVENT_TRIGGER, EVENT_CHANGE_DATE,
lead(EVENT_TRIGGER) over (PARTITION BY EVENT_ID
order by EVENT_CHANGE_DATE, EVENT_LOCATION)
as EVENT_TRIGGER_LEAD,
lead(EVENT_LOCATION) over (PARTITION BY EVENT_ID
order by EVENT_CHANGE_DATE, EVENT_LOCATION)
as EVENT_LOCATION_LEAD
from EVENTS
where (EVENT_TRIGGER = 'I' AND EVENT_LOCATION = 'LOC1') OR EVENT_TRIGGER = 'D'
)
select
EVENT_ID, EVENT_LOCATION, EVENT_TRIGGER, EVENT_CHANGE_DATE,
EVENT_TRIGGER_LEAD, EVENT_LOCATION_LEAD
from evnt
where EVENT_TRIGGER = 'I'
and EVENT_TRIGGER_LEAD = 'D'
and EVENT_LOCATION_LEAD = 'LOC7'
order by EVENT_ID, EVENT_CHANGE_DATE, EVENT_LOCATION;
你可以用解析函数SUM
在结果LOC1
和I
时加1,在[=15=时加-1
],则最终结果将是具有 sum = 0
和 location as LOC7
.
查看答案:
SQL> SELECT EVENT_ID FROM
2 ( SELECT SUM(CASE
3 WHEN EVENT_LOCATION = 'LOC1' AND EVENT_TRIGGER = 'I' THEN 1
4 WHEN EVENT_TRIGGER = 'D' THEN - 1
5 END) OVER( PARTITION BY EVENT_ID ORDER BY EVENT_CHANGE_DATE ) AS SM,
6 T.*
7 FROM EVENTS T
8 ) T
9 WHERE EVENT_LOCATION = 'LOC7' AND SM = 0;
EVENT_ID
------------
EVENT1
SQL>
干杯!!
使用 LEAD 分析函数:
SELECT COUNT(*) as qty,
TRUNC(event_change_date)day
FROM(
SELECT
event_location,
event_trigger,
event_change_date,
lead(event_trigger)
OVER(PARTITION BY trunc(event_change_date)
ORDER BY to_number(substr(event_location, - 1, 1))) rn
FROM events
) WHERE event_trigger <> 'D'
AND rn <> 'D'
AND event_trigger = rn
GROUP BY trunc(event_change_date);
QTY DAY
---------- --------
1 16-03-20
逻辑:
- 将每天的事件分组,并使用 SUBSTR 根据从 1 到 7 的位置对它们进行排序,并从字符串的反面获取数字。
- 使用 LEAD 比较 event_trigger 和它的铅。
- 每个日期的PARTITIONED组中的event_trigger不应该有1到7的DELETE。