如何在 Oracle SQL 中的键中的特定值之后的行中添加标志?
How to add a flag to the rows after a specific value within a key in Oracle SQL?
我有以下数据:
Key Stage CreateDate
AAF 0 01-Jan-2018
AAF 0 02-Jan-2018
AAF 0 10-Jan-2018
AAF 20 20-Jan-2018
AAF 40 20-Mar-2018
AAF 0 01-May-2018
AAF 0 10-May-2018
AAF 0 20-May-2018
AAF 30 20-Jun-2018
AAF 0 20-Jul-2018
AAF 100 20-Jul-2018
我基本上是在尝试计算每个阶段花费的天数。我目前在每个阶段取最小日期,并找出下一阶段的最小日期之间的差异:
select
key,
stage,
cast(extract (day from max(next_dt) - min(createddate)) as number) as interval_days
from
(
select
key,
stage,
createddate
lead(createddate,1) over (partition by key order by createddate) next_dt
from oppstages
)
group by key,stage
可以看出,有时阶段从 0-40 前进,但又回到 0。所以上面的逻辑不能正常工作,我认为有必要将 0-40 分组为一个类别,40 之后的任何内容作为下一类别,依此类推(如果阶段减少并以新的较小阶段编号重新开始)。下面的查询给出了概率下降的点,但我无法标记以进一步对行进行分组。
select key,
stage,
createddate,
next_dt,
next_prob,
case when next_prob < stage then 1 else 0 end as valid_flag,
from
(
select
key,
stage,
createddate,
lead(createddate,1) over (partition by key order by createddate) next_dt,
coalesce(lead(stage,1) over (partition by key order by createddate),101) next_prob,
from oppstages
) a
我期待此输出,以便我可以使用标志进行分组来计算在每个实例上花费的天数:
Key Stage CreateDate Flag
AAF 0 01-Jan-2018 1
AAF 0 02-Jan-2018 1
AAF 0 10-Jan-2018 1
AAF 20 20-Jan-2018 1
AAF 40 20-Mar-2018 1
AAF 0 01-May-2018 2
AAF 0 10-May-2018 2
AAF 0 20-May-2018 2
AAF 30 20-Jun-2018 2
AAF 10 20-Jul-2018 3
AAF 100 20-Jul-2018 3
谢谢。
您可以尝试使用 lag
window 函数获取 Stage
之前的值。
然后使用CASE WHEN
检查PREVAL > STAGE
做增加1
。
CREATE TABLE T(
Key varchar(50),
Stage int,
CreateDate date
);
INSERT INTO T VALUES ('AAF',0,TO_DATE('01-01-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',0,TO_DATE('02-01-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',0,TO_DATE('10-01-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',20,TO_DATE('20-01-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',40,TO_DATE('20-03-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',0,TO_DATE('01-05-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',0,TO_DATE('10-05-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',0,TO_DATE('20-05-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',30,TO_DATE('20-06-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',10,TO_DATE('20-07-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',100,TO_DATE('20-07-2018','dd-mm-yyyy'));
查询 1:
SELECT t1.KEY,
t1.STAGE,
(SUM(CASE WHEN PREVAL > STAGE THEN 1 ELSE 0 END) over (partition by Key order by CreateDate) + 1) Flag
FROM (
SELECT T.*,lag(Stage) over (partition by Key order by CreateDate) preVAL
FROM T
)t1
| KEY | STAGE | FLAG |
|-----|-------|------|
| AAF | 0 | 1 |
| AAF | 0 | 1 |
| AAF | 0 | 1 |
| AAF | 20 | 1 |
| AAF | 40 | 1 |
| AAF | 0 | 2 |
| AAF | 0 | 2 |
| AAF | 0 | 2 |
| AAF | 30 | 2 |
| AAF | 10 | 3 |
| AAF | 100 | 3 |
您遇到了间隙和孤岛问题。一个简单的解决方案是使用行号的差异。这定义了组。
select t.*, (seqnum_2 - seqnum_1) as grp
from (select os.*,
row_number() over (partition by key order by createdate) as seqnum,
row_number() over (partition by key, stage order by createdate) as seqnum_2
from oppstages os
) os;
您可能想要的是聚合:
select key, stage, min(createdate), max(createdate),
lead(min(createdate)) over (partition by key, stage, seqnum - seqnum_2 order by createdate) as next_creatdate
from (select os.*,
row_number() over (partition by key order by createdate) as seqnum,
row_number() over (partition by key, stage order by createdate) as seqnum_2
from oppstages os
) os
group by key, stage, (seqnum_2 - seqnum)
我不确定你想要什么逻辑,但这应该有你需要的所有信息。
我有以下数据:
Key Stage CreateDate
AAF 0 01-Jan-2018
AAF 0 02-Jan-2018
AAF 0 10-Jan-2018
AAF 20 20-Jan-2018
AAF 40 20-Mar-2018
AAF 0 01-May-2018
AAF 0 10-May-2018
AAF 0 20-May-2018
AAF 30 20-Jun-2018
AAF 0 20-Jul-2018
AAF 100 20-Jul-2018
我基本上是在尝试计算每个阶段花费的天数。我目前在每个阶段取最小日期,并找出下一阶段的最小日期之间的差异:
select
key,
stage,
cast(extract (day from max(next_dt) - min(createddate)) as number) as interval_days
from
(
select
key,
stage,
createddate
lead(createddate,1) over (partition by key order by createddate) next_dt
from oppstages
)
group by key,stage
可以看出,有时阶段从 0-40 前进,但又回到 0。所以上面的逻辑不能正常工作,我认为有必要将 0-40 分组为一个类别,40 之后的任何内容作为下一类别,依此类推(如果阶段减少并以新的较小阶段编号重新开始)。下面的查询给出了概率下降的点,但我无法标记以进一步对行进行分组。
select key,
stage,
createddate,
next_dt,
next_prob,
case when next_prob < stage then 1 else 0 end as valid_flag,
from
(
select
key,
stage,
createddate,
lead(createddate,1) over (partition by key order by createddate) next_dt,
coalesce(lead(stage,1) over (partition by key order by createddate),101) next_prob,
from oppstages
) a
我期待此输出,以便我可以使用标志进行分组来计算在每个实例上花费的天数:
Key Stage CreateDate Flag
AAF 0 01-Jan-2018 1
AAF 0 02-Jan-2018 1
AAF 0 10-Jan-2018 1
AAF 20 20-Jan-2018 1
AAF 40 20-Mar-2018 1
AAF 0 01-May-2018 2
AAF 0 10-May-2018 2
AAF 0 20-May-2018 2
AAF 30 20-Jun-2018 2
AAF 10 20-Jul-2018 3
AAF 100 20-Jul-2018 3
谢谢。
您可以尝试使用 lag
window 函数获取 Stage
之前的值。
然后使用CASE WHEN
检查PREVAL > STAGE
做增加1
。
CREATE TABLE T(
Key varchar(50),
Stage int,
CreateDate date
);
INSERT INTO T VALUES ('AAF',0,TO_DATE('01-01-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',0,TO_DATE('02-01-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',0,TO_DATE('10-01-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',20,TO_DATE('20-01-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',40,TO_DATE('20-03-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',0,TO_DATE('01-05-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',0,TO_DATE('10-05-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',0,TO_DATE('20-05-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',30,TO_DATE('20-06-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',10,TO_DATE('20-07-2018','dd-mm-yyyy'));
INSERT INTO T VALUES ('AAF',100,TO_DATE('20-07-2018','dd-mm-yyyy'));
查询 1:
SELECT t1.KEY,
t1.STAGE,
(SUM(CASE WHEN PREVAL > STAGE THEN 1 ELSE 0 END) over (partition by Key order by CreateDate) + 1) Flag
FROM (
SELECT T.*,lag(Stage) over (partition by Key order by CreateDate) preVAL
FROM T
)t1
| KEY | STAGE | FLAG |
|-----|-------|------|
| AAF | 0 | 1 |
| AAF | 0 | 1 |
| AAF | 0 | 1 |
| AAF | 20 | 1 |
| AAF | 40 | 1 |
| AAF | 0 | 2 |
| AAF | 0 | 2 |
| AAF | 0 | 2 |
| AAF | 30 | 2 |
| AAF | 10 | 3 |
| AAF | 100 | 3 |
您遇到了间隙和孤岛问题。一个简单的解决方案是使用行号的差异。这定义了组。
select t.*, (seqnum_2 - seqnum_1) as grp
from (select os.*,
row_number() over (partition by key order by createdate) as seqnum,
row_number() over (partition by key, stage order by createdate) as seqnum_2
from oppstages os
) os;
您可能想要的是聚合:
select key, stage, min(createdate), max(createdate),
lead(min(createdate)) over (partition by key, stage, seqnum - seqnum_2 order by createdate) as next_creatdate
from (select os.*,
row_number() over (partition by key order by createdate) as seqnum,
row_number() over (partition by key, stage order by createdate) as seqnum_2
from oppstages os
) os
group by key, stage, (seqnum_2 - seqnum)
我不确定你想要什么逻辑,但这应该有你需要的所有信息。