在 window 的 n 个类别之间的变化之前找到最大值,因为 m>n 类别之间的变化
Finding max value prior to a change between n categories across a window for m>n changes between categories
我有一个类似于下面测试数据的数据集:
create table #colors (mon int, grp varchar(1), color varchar(5))
insert #colors values
(201501,'A','Red'),
(201502,'A','Red'),
(201503,'A','Red'),
(201504,'A','Red'),
(201505,'A','Red'),
(201506,'A','Red'),
(201501,'B','Red'),
(201502,'B','Red'),
(201503,'B','Blue'),
(201504,'B','Blue'),
(201505,'B','Blue'),
(201506,'B','Blue'),
(201501,'C','Red'),
(201502,'C','Red'),
(201503,'C','Blue'),
(201504,'C','Green'),
(201505,'C','Green'),
(201506,'C','Green'),
(201501,'D','Red'),
(201502,'D','Red'),
(201503,'D','Blue'),
(201504,'D','Blue'),
(201505,'D','Red'),
(201506,'D','Red')
我想知道每个组在颜色方面所采取的路径,以及最近一个月某个类别在颜色变化之前是特定颜色。这样,与颜色相关联的月份作为类别-颜色组合的时间上限。
我已经尝试使用 CTE 和 row_number()
函数来执行此操作,如下面的代码所示,但效果不佳。
示例代码如下:
; with colors (grp, color, mon, rn) as (
select grp
, color
, mon
, row_number() over (partition by grp order by mon asc) rn
from (
select grp
, color
, max(mon) mon
from #colors
group by grp, color
) as z
)
select grp
, firstColor
, firstMonth
, secondColor
, secondMonth
, thirdColor
, thirdMonth
from (
select c1.grp
, c1.color firstColor
, c1.mon firstMonth
, c2.color secondColor
, c2.mon secondMonth
, c3.color thirdColor
, c3.mon thirdMonth
, row_number() over (partition by c1.grp order by c1.mon asc) rn
from colors c1 left outer join colors c2 on (
c1.grp = c2.grp
and c1.color <> c2.color
and c1.rn = c2.rn - 1
) left outer join colors c3 on (
c1.grp = c3.grp
and c2.color <> c3.color
and c2.rn = c3.rn - 1
)
) as d
where rn = 1
order by grp
这导致以下 (不正确) 结果集:
如您所见,没有迹象表明D组的原始颜色是红色——应该是红色(201502) --> 蓝色(201504) --> 红色(201506)。这是因为使用了 max()
函数,但删除它需要以我无法推断的方式修改连接逻辑。
我试过删除 max()
函数并更改 row_number()
上的分区以包含颜色,但我认为这在逻辑上减少到相同的集合。
当类别数量少于这些类别之间的变化时,我该如何解释这种情况?
编辑 - 现在可以使用了!
WITH colors AS(
SELECT *
, ROW_NUMBER() OVER (partition by grp ORDER BY mon desc) RowNumberOrder
FROM colorss
)
select * from (
SELECT row_number() over (partition by c1.grp order by c1.rowNumberOrder asc) rn, c1.grp
, c1.color firstColor
, c1.mon firstMonth
, c2.color secondColor
, c2.mon secondMonth
, c3.color thirdColor
, c3.mon thirdMonth
FROM colors c1
left join colors c2 on c1.grp = c2.grp and c1.RowNumberOrder < c2.rowNumberOrder and c1.color <> c2.color
left join colors c3 on c3.grp = c2.grp and c2.RowNumberOrder < c3.rowNumberOrder and c2.color <> c3.color
) a where rn = 1
SQL fiddle: http://sqlfiddle.com/#!3/e0d90/36
我会采取不同的方法,通常我会避免 "predefining" 列中的月数(如果可能)。这是一个可以将月份分成行的解决方案,但它实际上会将结果组合成预期的输出格式:
WITH nCTE (mon, grp, color, n) AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY grp ORDER BY mon) n
FROM colors
), monthsCTE (mon, grp, color, n) AS (
SELECT l.mon, l.grp, l.color, ROW_NUMBER() OVER(PARTITION BY l.grp ORDER BY l.mon) n
FROM nCTE l LEFT JOIN nCTE r
ON l.grp = r.grp AND l.n = r.n - 1
WHERE l.color != r.color OR r.color IS NULL
)
SELECT m1.grp, m1.color, m1.mon, m2.color, m2.mon, m3.color, m3.mon
FROM monthsCTE m1 LEFT JOIN monthsCTE m2
ON m1.grp = m2.grp AND m2.n = 2 LEFT JOIN monthsCTE m3
ON m1.grp = m3.grp AND m3.n = 3
WHERE m1.n = 1
ORDER BY 1
还有一个fiddle
您可以使用 monthsCTE 的 "inside" 而不是外部 SELECT
在单独的行中获取结果(这样您就不需要 ROW_NUMBER...
部分),或者就这样吧...
EDIT: It's actually easier to do what you REALLY wanted. Just remove the GROUP BY
clause (and the interrupting MAX()
functions).
EDIT2: As noted by Me.Name, old solution would fail over years. Corrected code fragment & fiddle.
使用略有不同的方法,首先使用引导 window 函数来确定颜色是否发生变化,然后才根据颜色发生变化的位置对行进行排名:
;with nextcols as
(
select grp, color, mon, lead(color, 1, 'none') over (partition by grp order by mon ) nextcol from #colors
)
, ranked as
(
select *, ROW_NUMBER() over (partition by grp order by mon) MonthIndex from nextcols where color <> nextcol
)
--perhaps you could go pivoting here, but joining on the monthindex works
select r1.grp, r1.color firstCol, r1.mon firstMon, r2.color secondCol, r2.mon secondMon, r3.color thirdCol, r3.mon thirdMon
from ranked r1
left join ranked r2 on r2.grp=r1.grp and r2.MonthIndex = 2
left join ranked r3 on r3.grp=r1.grp and r3.MonthIndex = 3
where r1.MonthIndex = 1
我有一个类似于下面测试数据的数据集:
create table #colors (mon int, grp varchar(1), color varchar(5))
insert #colors values
(201501,'A','Red'),
(201502,'A','Red'),
(201503,'A','Red'),
(201504,'A','Red'),
(201505,'A','Red'),
(201506,'A','Red'),
(201501,'B','Red'),
(201502,'B','Red'),
(201503,'B','Blue'),
(201504,'B','Blue'),
(201505,'B','Blue'),
(201506,'B','Blue'),
(201501,'C','Red'),
(201502,'C','Red'),
(201503,'C','Blue'),
(201504,'C','Green'),
(201505,'C','Green'),
(201506,'C','Green'),
(201501,'D','Red'),
(201502,'D','Red'),
(201503,'D','Blue'),
(201504,'D','Blue'),
(201505,'D','Red'),
(201506,'D','Red')
我想知道每个组在颜色方面所采取的路径,以及最近一个月某个类别在颜色变化之前是特定颜色。这样,与颜色相关联的月份作为类别-颜色组合的时间上限。
我已经尝试使用 CTE 和 row_number()
函数来执行此操作,如下面的代码所示,但效果不佳。
示例代码如下:
; with colors (grp, color, mon, rn) as (
select grp
, color
, mon
, row_number() over (partition by grp order by mon asc) rn
from (
select grp
, color
, max(mon) mon
from #colors
group by grp, color
) as z
)
select grp
, firstColor
, firstMonth
, secondColor
, secondMonth
, thirdColor
, thirdMonth
from (
select c1.grp
, c1.color firstColor
, c1.mon firstMonth
, c2.color secondColor
, c2.mon secondMonth
, c3.color thirdColor
, c3.mon thirdMonth
, row_number() over (partition by c1.grp order by c1.mon asc) rn
from colors c1 left outer join colors c2 on (
c1.grp = c2.grp
and c1.color <> c2.color
and c1.rn = c2.rn - 1
) left outer join colors c3 on (
c1.grp = c3.grp
and c2.color <> c3.color
and c2.rn = c3.rn - 1
)
) as d
where rn = 1
order by grp
这导致以下 (不正确) 结果集:
如您所见,没有迹象表明D组的原始颜色是红色——应该是红色(201502) --> 蓝色(201504) --> 红色(201506)。这是因为使用了 max()
函数,但删除它需要以我无法推断的方式修改连接逻辑。
我试过删除 max()
函数并更改 row_number()
上的分区以包含颜色,但我认为这在逻辑上减少到相同的集合。
当类别数量少于这些类别之间的变化时,我该如何解释这种情况?
编辑 - 现在可以使用了!
WITH colors AS(
SELECT *
, ROW_NUMBER() OVER (partition by grp ORDER BY mon desc) RowNumberOrder
FROM colorss
)
select * from (
SELECT row_number() over (partition by c1.grp order by c1.rowNumberOrder asc) rn, c1.grp
, c1.color firstColor
, c1.mon firstMonth
, c2.color secondColor
, c2.mon secondMonth
, c3.color thirdColor
, c3.mon thirdMonth
FROM colors c1
left join colors c2 on c1.grp = c2.grp and c1.RowNumberOrder < c2.rowNumberOrder and c1.color <> c2.color
left join colors c3 on c3.grp = c2.grp and c2.RowNumberOrder < c3.rowNumberOrder and c2.color <> c3.color
) a where rn = 1
SQL fiddle: http://sqlfiddle.com/#!3/e0d90/36
我会采取不同的方法,通常我会避免 "predefining" 列中的月数(如果可能)。这是一个可以将月份分成行的解决方案,但它实际上会将结果组合成预期的输出格式:
WITH nCTE (mon, grp, color, n) AS (
SELECT *, ROW_NUMBER() OVER(PARTITION BY grp ORDER BY mon) n
FROM colors
), monthsCTE (mon, grp, color, n) AS (
SELECT l.mon, l.grp, l.color, ROW_NUMBER() OVER(PARTITION BY l.grp ORDER BY l.mon) n
FROM nCTE l LEFT JOIN nCTE r
ON l.grp = r.grp AND l.n = r.n - 1
WHERE l.color != r.color OR r.color IS NULL
)
SELECT m1.grp, m1.color, m1.mon, m2.color, m2.mon, m3.color, m3.mon
FROM monthsCTE m1 LEFT JOIN monthsCTE m2
ON m1.grp = m2.grp AND m2.n = 2 LEFT JOIN monthsCTE m3
ON m1.grp = m3.grp AND m3.n = 3
WHERE m1.n = 1
ORDER BY 1
还有一个fiddle
您可以使用 monthsCTE 的 "inside" 而不是外部 SELECT
在单独的行中获取结果(这样您就不需要 ROW_NUMBER...
部分),或者就这样吧...
EDIT: It's actually easier to do what you REALLY wanted. Just remove the
GROUP BY
clause (and the interruptingMAX()
functions).
EDIT2: As noted by Me.Name, old solution would fail over years. Corrected code fragment & fiddle.
使用略有不同的方法,首先使用引导 window 函数来确定颜色是否发生变化,然后才根据颜色发生变化的位置对行进行排名:
;with nextcols as
(
select grp, color, mon, lead(color, 1, 'none') over (partition by grp order by mon ) nextcol from #colors
)
, ranked as
(
select *, ROW_NUMBER() over (partition by grp order by mon) MonthIndex from nextcols where color <> nextcol
)
--perhaps you could go pivoting here, but joining on the monthindex works
select r1.grp, r1.color firstCol, r1.mon firstMon, r2.color secondCol, r2.mon secondMon, r3.color thirdCol, r3.mon thirdMon
from ranked r1
left join ranked r2 on r2.grp=r1.grp and r2.MonthIndex = 2
left join ranked r3 on r3.grp=r1.grp and r3.MonthIndex = 3
where r1.MonthIndex = 1