在 T-SQL 中使用 Over 和 Partition by 时缺少 Min() 上的日期
Missing dates on Min() while using Over and Partition by in T-SQL
我正在使用 OVER 和 Partition by 来获取数据集的 mindate 和最大日期。
|ResdetId | bookingdate | Amount | AmountExcl |
-----------------------------------------------
|120106 | 2018-02-04 | 75.00 | 70.7547 |
|120106 | 2018-02-05 | 75.00 | 70.7547 |
|120106 | 2018-02-06 | 90.00 | 84.9057 |
|120106 | 2018-02-08 | 75.00 | 70.7547 |
|120106 | 2018-02-09 | 75.00 | 70.7547 |
我正在使用这个查询
select distinct ResDetId, Amount, AmountExcl,
min(Bookingdate) OVER(Partition by ResDetId, Amount, AmountExcl) as Mindate,
max(Bookingdate) OVER(Partition by ResDetId, Amount, AmountExcl) as MaxDate
from @Cumulatedbookingdetails
我得到了这个结果
|ResdetId | Amount | AmountExcl | MinDate | MaxDate |
------------------------------------------------------------
|120106 | 75.00 | 70.7547 | 2018-02-04 | 2018-02-09 |
|120106 | 90.00 | 84.9057 | 2018-02-06 | 2018-02-06 |
如我们所见,数据集中缺少日期 2018-02-07 的记录。所以,我需要这样的结果
|ResdetId | Amount | AmountExcl | MinDate | MaxDate |
------------------------------------------------------------
|120106 | 75.00 | 70.7547 | 2018-02-04 | 2018-02-05 |
|120106 | 75.00 | 70.7547 | 2018-02-08 | 2018-02-09 |
|120106 | 90.00 | 84.9057 | 2018-02-06 | 2018-02-06 |
您没有看到 2018-02-07,因为预订日期不在您的分区中,所以
|ResdetId | Amount | AmountExcl
--------------------------------
|120106 | 75.00 | 70.7547
|120106 | 90.00 | 84.9057
在您的分区中是独一无二的。所以它就像一把钥匙。您需要另一个属性来区分相同的数据:
|ResdetId | Amount | AmountExcl
--------------------------------
|120106 | 75.00 | 70.7547
使用 GROUP BY
会容易得多。 OVER
和 DISTINCT
有很多 "harder" 方法来执行相同的查询:
WITH VTE AS(
SELECT ResdetId,
CONVERT(date,bookingdate) AS bookingdate,
Amount,
AmountExcl
FROM (VALUES (120106,'20180204',75.00,70.7547),
(120106,'20180205',75.00,70.7547),
(120106,'20180206',90.00,84.9057),
(120106,'20180208',75.00,70.7547),
(120106,'20180209',75.00,70.7547)) V(ResdetId,bookingdate,Amount,AmountExcl))
SELECT ResdetId,Amount,AmountExcl,
MIN(bookingdate) AS MinBookingDate,
MAX(bookingdate) AS MaxBookingDate
FROM VTE
GROUP BY ResdetId,Amount,AmountExcl;
正如我的 Sami 所说,我读错了结果,这是一个 Gaps and Island 问题:
WITH VTE AS(
SELECT ResdetId,
CONVERT(date,bookingdate) AS bookingdate,
Amount,
AmountExcl
FROM (VALUES (120106,'20180204',75.00,70.7547),
(120106,'20180205',75.00,70.7547),
(120106,'20180206',90.00,84.9057),
(120106,'20180208',75.00,70.7547),
(120106,'20180209',75.00,70.7547)) V(ResdetId,bookingdate,Amount,AmountExcl)),
Grps AS(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY ResdetId ORDER BY V.bookingdate) -
ROW_NUMBER() OVER (PARTITION BY ResdetId, Amount ORDER BY V.bookingdate) AS Grp
FROM VTE V)
SELECT ResdetId,
Amount,
AmountExcl,
MIN(bookingdate) AS MinBookingDate,
MAX(bookingdate) AS MaxBookingDate
FROM Grps
GROUP BY ResdetId,
Amount,
AmountExcl,
Grp
ORDER BY ResdetId,
Amount,
MinBookingDate;
解决诸如此类的 "Islands and Gaps" 问题的一种方法是使用递归 CTE 来构建孤岛。我们让非递归部分(在union
之上)找到标记每个岛开始的行,递归部分使每个岛一次增长一个匹配项。
不幸的是,CTE 的最终结果包含了用于构建岛屿的所有中间行,因此您需要一个最终的 GROUP
到 select 最终的岛屿:
declare @t table (ResdetId int, bookingdate date, Amount decimal(9,3), AmountExcl decimal (9,3))
insert into @t(ResdetId,bookingdate,Amount,AmountExcl) values
(120106,'20180204',75.00,70.7547),
(120106,'20180205',75.00,70.7547),
(120106,'20180206',90.00,84.9057),
(120106,'20180208',75.00,70.7547),
(120106,'20180209',75.00,70.7547)
;With Islands as (
select ResdetId, Amount, AmountExcl,bookingdate as MinDate,bookingDate as MaxDate
from @t t
where not exists (select * from @t t2
where t2.ResdetId = t.ResdetId
and t2.Amount = t.Amount
and t2.AmountExcl = t.AmountExcl
and t2.bookingdate = DATEADD(day,-1,t.BookingDate))
union all
select i.ResdetId, i.Amount,i.AmountExcl,i.MinDate,t.bookingDate
from Islands i
inner join
@t t
on t.ResdetId = i.ResdetId
and t.Amount = i.Amount
and t.AmountExcl = i.AmountExcl
and t.bookingdate = DATEADD(day,1,i.MaxDate)
)
select
ResdetId, Amount, AmountExcl,MinDate,MAX(MaxDate) as MaxDate
from
Islands
group by ResdetId, Amount, AmountExcl,MinDate
结果:
ResdetId Amount AmountExcl MinDate MaxDate
----------- --------- ------------ ---------- ----------
120106 75.000 70.755 2018-02-04 2018-02-05
120106 75.000 70.755 2018-02-08 2018-02-09
120106 90.000 84.906 2018-02-06 2018-02-06
试试这个,它使用行号差异技术:
declare @tbl table(ResdetId int, bookingdate date, Amount float, AmountExcl float);
insert into @tbl values
(120106 , '2018-02-04' , 75.00 , 70.7547 ),
(120106 , '2018-02-05' , 75.00 , 70.7547 ),
(120106 , '2018-02-06' , 90.00 , 84.9057 ),
(120106 , '2018-02-08' , 75.00 , 70.7547 ),
(120106 , '2018-02-09' , 75.00 , 70.7547 );
select MIN(bookingDate), MAX(bookingDate), Amount, AmountExcl
from (
select *,
ROW_NUMBER() over (order by bookingDate) -
ROW_NUMBER() over (partition by amount, AmountExcl order by bookingDate) rn
from @tbl
) a group by Amount, AmountExcl, rn
我正在使用 OVER 和 Partition by 来获取数据集的 mindate 和最大日期。
|ResdetId | bookingdate | Amount | AmountExcl |
-----------------------------------------------
|120106 | 2018-02-04 | 75.00 | 70.7547 |
|120106 | 2018-02-05 | 75.00 | 70.7547 |
|120106 | 2018-02-06 | 90.00 | 84.9057 |
|120106 | 2018-02-08 | 75.00 | 70.7547 |
|120106 | 2018-02-09 | 75.00 | 70.7547 |
我正在使用这个查询
select distinct ResDetId, Amount, AmountExcl,
min(Bookingdate) OVER(Partition by ResDetId, Amount, AmountExcl) as Mindate,
max(Bookingdate) OVER(Partition by ResDetId, Amount, AmountExcl) as MaxDate
from @Cumulatedbookingdetails
我得到了这个结果
|ResdetId | Amount | AmountExcl | MinDate | MaxDate |
------------------------------------------------------------
|120106 | 75.00 | 70.7547 | 2018-02-04 | 2018-02-09 |
|120106 | 90.00 | 84.9057 | 2018-02-06 | 2018-02-06 |
如我们所见,数据集中缺少日期 2018-02-07 的记录。所以,我需要这样的结果
|ResdetId | Amount | AmountExcl | MinDate | MaxDate |
------------------------------------------------------------
|120106 | 75.00 | 70.7547 | 2018-02-04 | 2018-02-05 |
|120106 | 75.00 | 70.7547 | 2018-02-08 | 2018-02-09 |
|120106 | 90.00 | 84.9057 | 2018-02-06 | 2018-02-06 |
您没有看到 2018-02-07,因为预订日期不在您的分区中,所以
|ResdetId | Amount | AmountExcl
--------------------------------
|120106 | 75.00 | 70.7547
|120106 | 90.00 | 84.9057
在您的分区中是独一无二的。所以它就像一把钥匙。您需要另一个属性来区分相同的数据:
|ResdetId | Amount | AmountExcl
--------------------------------
|120106 | 75.00 | 70.7547
使用 GROUP BY
会容易得多。 OVER
和 DISTINCT
有很多 "harder" 方法来执行相同的查询:
WITH VTE AS(
SELECT ResdetId,
CONVERT(date,bookingdate) AS bookingdate,
Amount,
AmountExcl
FROM (VALUES (120106,'20180204',75.00,70.7547),
(120106,'20180205',75.00,70.7547),
(120106,'20180206',90.00,84.9057),
(120106,'20180208',75.00,70.7547),
(120106,'20180209',75.00,70.7547)) V(ResdetId,bookingdate,Amount,AmountExcl))
SELECT ResdetId,Amount,AmountExcl,
MIN(bookingdate) AS MinBookingDate,
MAX(bookingdate) AS MaxBookingDate
FROM VTE
GROUP BY ResdetId,Amount,AmountExcl;
正如我的 Sami 所说,我读错了结果,这是一个 Gaps and Island 问题:
WITH VTE AS(
SELECT ResdetId,
CONVERT(date,bookingdate) AS bookingdate,
Amount,
AmountExcl
FROM (VALUES (120106,'20180204',75.00,70.7547),
(120106,'20180205',75.00,70.7547),
(120106,'20180206',90.00,84.9057),
(120106,'20180208',75.00,70.7547),
(120106,'20180209',75.00,70.7547)) V(ResdetId,bookingdate,Amount,AmountExcl)),
Grps AS(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY ResdetId ORDER BY V.bookingdate) -
ROW_NUMBER() OVER (PARTITION BY ResdetId, Amount ORDER BY V.bookingdate) AS Grp
FROM VTE V)
SELECT ResdetId,
Amount,
AmountExcl,
MIN(bookingdate) AS MinBookingDate,
MAX(bookingdate) AS MaxBookingDate
FROM Grps
GROUP BY ResdetId,
Amount,
AmountExcl,
Grp
ORDER BY ResdetId,
Amount,
MinBookingDate;
解决诸如此类的 "Islands and Gaps" 问题的一种方法是使用递归 CTE 来构建孤岛。我们让非递归部分(在union
之上)找到标记每个岛开始的行,递归部分使每个岛一次增长一个匹配项。
不幸的是,CTE 的最终结果包含了用于构建岛屿的所有中间行,因此您需要一个最终的 GROUP
到 select 最终的岛屿:
declare @t table (ResdetId int, bookingdate date, Amount decimal(9,3), AmountExcl decimal (9,3))
insert into @t(ResdetId,bookingdate,Amount,AmountExcl) values
(120106,'20180204',75.00,70.7547),
(120106,'20180205',75.00,70.7547),
(120106,'20180206',90.00,84.9057),
(120106,'20180208',75.00,70.7547),
(120106,'20180209',75.00,70.7547)
;With Islands as (
select ResdetId, Amount, AmountExcl,bookingdate as MinDate,bookingDate as MaxDate
from @t t
where not exists (select * from @t t2
where t2.ResdetId = t.ResdetId
and t2.Amount = t.Amount
and t2.AmountExcl = t.AmountExcl
and t2.bookingdate = DATEADD(day,-1,t.BookingDate))
union all
select i.ResdetId, i.Amount,i.AmountExcl,i.MinDate,t.bookingDate
from Islands i
inner join
@t t
on t.ResdetId = i.ResdetId
and t.Amount = i.Amount
and t.AmountExcl = i.AmountExcl
and t.bookingdate = DATEADD(day,1,i.MaxDate)
)
select
ResdetId, Amount, AmountExcl,MinDate,MAX(MaxDate) as MaxDate
from
Islands
group by ResdetId, Amount, AmountExcl,MinDate
结果:
ResdetId Amount AmountExcl MinDate MaxDate
----------- --------- ------------ ---------- ----------
120106 75.000 70.755 2018-02-04 2018-02-05
120106 75.000 70.755 2018-02-08 2018-02-09
120106 90.000 84.906 2018-02-06 2018-02-06
试试这个,它使用行号差异技术:
declare @tbl table(ResdetId int, bookingdate date, Amount float, AmountExcl float);
insert into @tbl values
(120106 , '2018-02-04' , 75.00 , 70.7547 ),
(120106 , '2018-02-05' , 75.00 , 70.7547 ),
(120106 , '2018-02-06' , 90.00 , 84.9057 ),
(120106 , '2018-02-08' , 75.00 , 70.7547 ),
(120106 , '2018-02-09' , 75.00 , 70.7547 );
select MIN(bookingDate), MAX(bookingDate), Amount, AmountExcl
from (
select *,
ROW_NUMBER() over (order by bookingDate) -
ROW_NUMBER() over (partition by amount, AmountExcl order by bookingDate) rn
from @tbl
) a group by Amount, AmountExcl, rn