SQL 服务器中基于列值的条件聚合
Conditional aggregation in SQL Server based on a column value
我的数据是这样的:
IdType Date ActualEst Value PriceType
1 01-06 A 1 Settle
1 02-06 A 2 Settle
1 02-06 F 3 Settle
1 03-06 A 4 Settle
2 01-06 A 5 Settle
2 02-06 A 6 Settle
2 03-06 F 7 Settle
我想对 "Value" 列进行平均,对 "IdType" 和 "PriceType" 列进行分组,这样如果对于特定日期(列),ActualEst 都有 'A' 和 'F',我选择 'A' 而不是 'F',否则如果日期只有其中一个存在,则选择其中任何一个。
因此,对于以上数据,我应该选择这些记录进行平均 值:
IdType Date ActualEst Value PriceType
1 01-06 A 1 Settle
1 02-06 A 2 Settle
1 03-06 A 4 Settle
对于02-06的日期,'A'和'F'记录都存在,如果一个日期存在这两种类型的记录,我优先考虑'A'。
第二组..
IdType Date ActualEst Value PriceType
2 01-06 A 5 Settle
2 02-06 A 6 Settle
2 03-06 F 7 Settle
等等...
Row_number() 根据需要的优先级排列行,select 只有最高优先级的行:
with tbl as (
-- source data
select * from
(values
(1 ,'01-06','A',1,'Settle')
,(1 ,'02-06','A',2,'Settle')
,(1 ,'02-06','F',3,'Settle')
,(1 ,'03-06','A',4,'Settle')
,(2 ,'01-06','A',5,'Settle')
,(2 ,'02-06','A',6,'Settle')
,(2 ,'03-06','F',7,'Settle')
) t (IdType, [Date],ActualEst,Value,PriceType)
),
-- the query
prioritized as(
select IdType, [Date],ActualEst,Value,PriceType,
rn = row_number() over(partition by IdType, [Date] order by ActualEst)
from tbl
)
select IdType, [Date], avg(value) v
from prioritized
where rn=1
group by IdType, [Date];
使用 UNION ALL 可以获得要分组的行:
select * from tablename where ActualEst = 'A'
union all
select t.* from tablename t where ActualEst = 'F'
and not exists (
select 1 from tablename
where IdType = t.IdType and Date = t.Date and ActualEst = 'A'
)
然后像这样使用它:
select
t.IdType, t.Date, avg(value) averagevalue
from (
select * from tablename where ActualEst = 'A'
union all
select t.* from tablename t where ActualEst = 'F'
and not exists (
select 1 from tablename
where IdType = t.IdType and Date = t.Date and ActualEst = 'A'
)
) t
group by t.IdType, t.Date
我的数据是这样的:
IdType Date ActualEst Value PriceType 1 01-06 A 1 Settle 1 02-06 A 2 Settle 1 02-06 F 3 Settle 1 03-06 A 4 Settle 2 01-06 A 5 Settle 2 02-06 A 6 Settle 2 03-06 F 7 Settle
我想对 "Value" 列进行平均,对 "IdType" 和 "PriceType" 列进行分组,这样如果对于特定日期(列),ActualEst 都有 'A' 和 'F',我选择 'A' 而不是 'F',否则如果日期只有其中一个存在,则选择其中任何一个。
因此,对于以上数据,我应该选择这些记录进行平均 值:
IdType Date ActualEst Value PriceType 1 01-06 A 1 Settle 1 02-06 A 2 Settle 1 03-06 A 4 Settle
对于02-06的日期,'A'和'F'记录都存在,如果一个日期存在这两种类型的记录,我优先考虑'A'。
第二组..
IdType Date ActualEst Value PriceType 2 01-06 A 5 Settle 2 02-06 A 6 Settle 2 03-06 F 7 Settle
等等...
Row_number() 根据需要的优先级排列行,select 只有最高优先级的行:
with tbl as (
-- source data
select * from
(values
(1 ,'01-06','A',1,'Settle')
,(1 ,'02-06','A',2,'Settle')
,(1 ,'02-06','F',3,'Settle')
,(1 ,'03-06','A',4,'Settle')
,(2 ,'01-06','A',5,'Settle')
,(2 ,'02-06','A',6,'Settle')
,(2 ,'03-06','F',7,'Settle')
) t (IdType, [Date],ActualEst,Value,PriceType)
),
-- the query
prioritized as(
select IdType, [Date],ActualEst,Value,PriceType,
rn = row_number() over(partition by IdType, [Date] order by ActualEst)
from tbl
)
select IdType, [Date], avg(value) v
from prioritized
where rn=1
group by IdType, [Date];
使用 UNION ALL 可以获得要分组的行:
select * from tablename where ActualEst = 'A'
union all
select t.* from tablename t where ActualEst = 'F'
and not exists (
select 1 from tablename
where IdType = t.IdType and Date = t.Date and ActualEst = 'A'
)
然后像这样使用它:
select
t.IdType, t.Date, avg(value) averagevalue
from (
select * from tablename where ActualEst = 'A'
union all
select t.* from tablename t where ActualEst = 'F'
and not exists (
select 1 from tablename
where IdType = t.IdType and Date = t.Date and ActualEst = 'A'
)
) t
group by t.IdType, t.Date