用于数据过滤的 Oracle SQL 函数或桶
Oracle SQL function or buckets for data filtering
SELECT
transaction
,date
,mail
,status
,ROW_NUMBER() OVER (PARTITION BY mail ORDER BY date) AS rownum
FROM table1
有了上面的 table 和脚本,我希望能够根据前 3 个状态为 'failed' 的 rowid 来过滤交易,如果 'failed',则显示 rowid 4,与 rowid 4,5,6 的事务失败 - 如果也失败则显示 7 等。我正在考虑将它添加到 pandas 数据框,其中 运行 一个简单的 lambda 函数,但真的很想找到仅 SQL 的解决方案。
试试这个:
select * from (
SELECT
transaction
,date
,mail
,status
,ROW_NUMBER() OVER (PARTITION BY mail ORDER BY date) AS rownum
FROM table1
WHERE status = 'FAILED' )
where mod(rownum, 3) = 1;
理查德
一种选择是使用 window 函数。使用 lag
获取先前的状态值(基于指定的排序)并将其与当前行的值进行比较,并使用 运行 总和分配组。然后计算每组中的值,最后筛选该条件。
SELECT t.*
FROM
( SELECT t.*,
count(*) over(PARTITION BY mail, grp) AS grp_count
FROM
( SELECT t.*,
sum(CASE
WHEN (prev_status IS NULL AND status='FAILED') OR
(prev_status='FAILED' AND status='FAILED') THEN 0
ELSE 1
END) over(PARTITION BY mail ORDER BY "date","transaction") AS grp
FROM
( SELECT t.*,
lag(status) over(PARTITION BY mail ORDER BY "date","transaction") AS prev_status
FROM tbl t
) t
) t
) t
WHERE grp_count>=4
如果您使用的是从 Oracle 12c 开始的版本,可以选择使用 MATCH_RECOGNIZE
来简化这一过程。
select *
from tbl
MATCH_RECOGNIZE (
PARTITION BY mail
ORDER BY "date" ,"transaction"
ALL ROWS PER MATCH
AFTER MATCH SKIP TO LAST FAIL
PATTERN(fail{4,})
DEFINE
fail AS (status='FAILED')
) MR
ORDER BY "date","transaction"
您可以使用 lead()
和 lag()
明确检查:
select t.*
from (select t1.*,
lag(status, 3) over (partition by mail order by date) as status_3,
lag(status, 3) over (partition by mail order by date) as status_2,
lag(status, 3) over (partition by mail order by date) as status_1,
lead(status, 1) over (partition by mail order by date) as status_3n,
lead(status, 2) over (partition by mail order by date) as status_2n,
lead(status, 3) over (partition by mail order by date) as status_3n
from t
) t
where status = 'FAILED' and
( (status_3 = 'FAILED' and status_2 = 'FAILED' and status_1 = 'FAILED') or
(status_2 = 'FAILED' and status_1 = 'FAILED' and status_1n = 'FAILED') or
(status_1 = 'FAILED' and status_1n = 'FAILED' and status_2n = 'FAILED') or
(status_1n = 'FAILED' and status_2n = 'FAILED and status_3n = 'FAILED')
)
这个有点暴力,不过我觉得逻辑还是挺清晰的
您可以将逻辑简化为:
where regexp_like(status_3 || status_2 || status_1 || status || status_1n || status_2n || status3n,
'FAILED{4}'
)
SELECT
transaction
,date
,mail
,status
,ROW_NUMBER() OVER (PARTITION BY mail ORDER BY date) AS rownum
FROM table1
有了上面的 table 和脚本,我希望能够根据前 3 个状态为 'failed' 的 rowid 来过滤交易,如果 'failed',则显示 rowid 4,与 rowid 4,5,6 的事务失败 - 如果也失败则显示 7 等。我正在考虑将它添加到 pandas 数据框,其中 运行 一个简单的 lambda 函数,但真的很想找到仅 SQL 的解决方案。
试试这个:
select * from (
SELECT
transaction
,date
,mail
,status
,ROW_NUMBER() OVER (PARTITION BY mail ORDER BY date) AS rownum
FROM table1
WHERE status = 'FAILED' )
where mod(rownum, 3) = 1;
理查德
一种选择是使用 window 函数。使用 lag
获取先前的状态值(基于指定的排序)并将其与当前行的值进行比较,并使用 运行 总和分配组。然后计算每组中的值,最后筛选该条件。
SELECT t.*
FROM
( SELECT t.*,
count(*) over(PARTITION BY mail, grp) AS grp_count
FROM
( SELECT t.*,
sum(CASE
WHEN (prev_status IS NULL AND status='FAILED') OR
(prev_status='FAILED' AND status='FAILED') THEN 0
ELSE 1
END) over(PARTITION BY mail ORDER BY "date","transaction") AS grp
FROM
( SELECT t.*,
lag(status) over(PARTITION BY mail ORDER BY "date","transaction") AS prev_status
FROM tbl t
) t
) t
) t
WHERE grp_count>=4
如果您使用的是从 Oracle 12c 开始的版本,可以选择使用 MATCH_RECOGNIZE
来简化这一过程。
select *
from tbl
MATCH_RECOGNIZE (
PARTITION BY mail
ORDER BY "date" ,"transaction"
ALL ROWS PER MATCH
AFTER MATCH SKIP TO LAST FAIL
PATTERN(fail{4,})
DEFINE
fail AS (status='FAILED')
) MR
ORDER BY "date","transaction"
您可以使用 lead()
和 lag()
明确检查:
select t.*
from (select t1.*,
lag(status, 3) over (partition by mail order by date) as status_3,
lag(status, 3) over (partition by mail order by date) as status_2,
lag(status, 3) over (partition by mail order by date) as status_1,
lead(status, 1) over (partition by mail order by date) as status_3n,
lead(status, 2) over (partition by mail order by date) as status_2n,
lead(status, 3) over (partition by mail order by date) as status_3n
from t
) t
where status = 'FAILED' and
( (status_3 = 'FAILED' and status_2 = 'FAILED' and status_1 = 'FAILED') or
(status_2 = 'FAILED' and status_1 = 'FAILED' and status_1n = 'FAILED') or
(status_1 = 'FAILED' and status_1n = 'FAILED' and status_2n = 'FAILED') or
(status_1n = 'FAILED' and status_2n = 'FAILED and status_3n = 'FAILED')
)
这个有点暴力,不过我觉得逻辑还是挺清晰的
您可以将逻辑简化为:
where regexp_like(status_3 || status_2 || status_1 || status || status_1n || status_2n || status3n,
'FAILED{4}'
)