T-SQL 按条件分组数据
T-SQL grouping data with condition
我有一些数据:
时期 | id_account | float_value
24217 | 303003 | 0
24218 | 303003 | 0
24219 | 303003 | 1
24220 | 303003 | 1
24221 | 303003 | 0
24222 | 303003 | 0
我需要像这样对这些数据进行分组:
begin_period | end_period| id_account | float_value
24217 | 24218 | 303003 | 0
24219 | 24220 | 303003 | 1
24221 | 24222 | 303003 | 0
我尝试了 row_number 分区和 while 循环,但没有成功。
我将此理解为 gaps-and-isald 问题,您希望将“相邻”行分组在一起,即具有相同 float_value
的行,跨越具有相同 id_account
和parameter
.
这里,我认为最简单的方法是使用行号之间的差异来计算每条记录属于哪个组:
select
min(period) begin_period,
max(period) end_period,
id_account,
parameter,
float_value
from (
select
t.*,
row_number() over(partition by id_account, parameter order by period) rn1,
row_number() over(partition by id_account, parameter, float_value order by period) rn2
from mytable t
) t
group by id_account, parameter, float_value, rn1 -rn2
order by id_account, parameter, begin_period
begin_period | end_period | id_account | parameter | float_value
-----------: | ---------: | ---------: | :------------- | ----------:
24217 | 24218 | 303003 | ACCOUNT_STATUS | 0
24219 | 24220 | 303003 | ACCOUNT_STATUS | 1
24221 | 24222 | 303003 | ACCOUNT_STATUS | 0
这是一个间隙和孤岛问题,一种方法是使用行数差异法:
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY ID_Account ORDER BY Period) rn1,
ROW_NUMBER() OVER (PARTITION BY ID_Account, Float_Value ORDER BY Period) rn2
FROM yourTable
)
SELECT
MIN(Period) AS Begin_Period,
MAX(Period) AS End_Period,
ID_Account,
MAX(Parameter) AS Parameter, -- assuming Parameter just always has the same value
Float_Value
FROM cte
GROUP BY
ID_Account,
Float_Value,
rn1 - rn2
ORDER BY
MIN(Period);
您可以按如下方式使用row_number
。这是 demo.
with cte as
(
select
*,
rn - row_number() over(order by Float_Value, rn) as nrnk
from
(
select
*,
row_number() over (order by Period) as rn
from Table1
) subq
)
select
min(Period) as Begin_Period,
max(Period) as End_Period,
ID_Account,
Parameter,
Float_Value
from cte
group by
ID_Account,
Parameter,
Float_Value,
nrnk
输出:
Begin_Period End_Period ID_Account Parameter Float_Value
---------------------------------------------------------------------
24217 24218 303003 ACCOUNT_STATUS 0
24221 24222 303003 ACCOUNT_STATUS 0
24219 24220 303003 ACCOUNT_STATUS 1
我有一些数据:
时期 | id_account | float_value
24217 | 303003 | 0
24218 | 303003 | 0
24219 | 303003 | 1
24220 | 303003 | 1
24221 | 303003 | 0
24222 | 303003 | 0
我需要像这样对这些数据进行分组:
begin_period | end_period| id_account | float_value
24217 | 24218 | 303003 | 0
24219 | 24220 | 303003 | 1
24221 | 24222 | 303003 | 0
我尝试了 row_number 分区和 while 循环,但没有成功。
我将此理解为 gaps-and-isald 问题,您希望将“相邻”行分组在一起,即具有相同 float_value
的行,跨越具有相同 id_account
和parameter
.
这里,我认为最简单的方法是使用行号之间的差异来计算每条记录属于哪个组:
select
min(period) begin_period,
max(period) end_period,
id_account,
parameter,
float_value
from (
select
t.*,
row_number() over(partition by id_account, parameter order by period) rn1,
row_number() over(partition by id_account, parameter, float_value order by period) rn2
from mytable t
) t
group by id_account, parameter, float_value, rn1 -rn2
order by id_account, parameter, begin_period
begin_period | end_period | id_account | parameter | float_value -----------: | ---------: | ---------: | :------------- | ----------: 24217 | 24218 | 303003 | ACCOUNT_STATUS | 0 24219 | 24220 | 303003 | ACCOUNT_STATUS | 1 24221 | 24222 | 303003 | ACCOUNT_STATUS | 0
这是一个间隙和孤岛问题,一种方法是使用行数差异法:
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY ID_Account ORDER BY Period) rn1,
ROW_NUMBER() OVER (PARTITION BY ID_Account, Float_Value ORDER BY Period) rn2
FROM yourTable
)
SELECT
MIN(Period) AS Begin_Period,
MAX(Period) AS End_Period,
ID_Account,
MAX(Parameter) AS Parameter, -- assuming Parameter just always has the same value
Float_Value
FROM cte
GROUP BY
ID_Account,
Float_Value,
rn1 - rn2
ORDER BY
MIN(Period);
您可以按如下方式使用row_number
。这是 demo.
with cte as
(
select
*,
rn - row_number() over(order by Float_Value, rn) as nrnk
from
(
select
*,
row_number() over (order by Period) as rn
from Table1
) subq
)
select
min(Period) as Begin_Period,
max(Period) as End_Period,
ID_Account,
Parameter,
Float_Value
from cte
group by
ID_Account,
Parameter,
Float_Value,
nrnk
输出:
Begin_Period End_Period ID_Account Parameter Float_Value
---------------------------------------------------------------------
24217 24218 303003 ACCOUNT_STATUS 0
24221 24222 303003 ACCOUNT_STATUS 0
24219 24220 303003 ACCOUNT_STATUS 1