SQL - LEAD 和 LAG 查询
SQL - LEAD and LAG Query
我不确定如何给这个问题起标题...希望没问题。
我有一个显示速率值的相当基本的数据集:
从这个逻辑:
SELECT HotelId, CompetitorId,
RateShopDate AS ShopDate,
Rate, RateRemark,
RowNumber
FROM
(
SELECT HotelId, CompetitorId, RateShopDate, ChannelId, RequestedRateType, Rate, RateRemark,
PreviousRate, PreviousRateRemark, NextRate, NextRateRemark,
ROW_NUMBER() OVER ( ORDER BY HotelId, CompetitorId, ChannelId, RequestedRateType, RateShopDate ) AS RowNumber
FROM
(
SELECT HotelId, CompetitorId, RateShopDate, ChannelId, RequestedRateType, Rate, RateRemark, RowNumber,
LAG( Rate ) OVER
(
PARTITION BY HotelId, CompetitorId, ChannelId, RequestedRateType
ORDER BY RowNumber
) AS PreviousRate,
LAG( RateRemark ) OVER
(
PARTITION BY HotelId, CompetitorId, ChannelId, RequestedRateType
ORDER BY RowNumber
) AS PreviousRateRemark,
LEAD( Rate ) OVER
(
PARTITION BY HotelId, CompetitorId, ChannelId, RequestedRateType
ORDER BY RowNumber
) AS NextRate,
LEAD( RateRemark ) OVER
(
PARTITION BY HotelId, CompetitorId, ChannelId, RequestedRateType
ORDER BY RowNumber
) AS NextRateRemark
FROM
(
SELECT HotelId, CompetitorId, RateShopDate, ChannelId, RequestedRateType, Rate, RateRemark,
ROW_NUMBER() OVER ( ORDER BY HotelId, CompetitorId, ChannelId, RequestedRateType, RateShopDate ) AS RowNumber
FROM #TempRateShop
) Rates
) PrevNextRates
WHERE COALESCE(Rate, 0) <> COALESCE(PreviousRate, 0)
OR COALESCE(RateRemark, '') <> COALESCE(PreviousRateRemark, '')
OR COALESCE(Rate, 0) <> COALESCE(NextRate, 0)
OR COALESCE(RateRemark, '') <> COALESCE(NextRateRemark, '')
) FilteredRates
ORDER BY RateShopDate
我正在尝试获取每次汇率变化的开始和结束 ShopDate。但是由于利率有可能从一个变为另一个并回到原来的状态,我遇到了一点困难;例如价格在一段时间内从 90 美元涨到 95 美元,然后又回落到 90 美元。
我想得到的是:
我想出的唯一解决方案是使用 LEAD/LAG 并按 RowNumber 列排序,然后按 odd/even 数字过滤...但我真的不认为是最好的解决方案。
LEAD ( RateShopDate ) OVER
(
ORDER BY RowNumber
) AS ShopEndDate
这是一种空隙和孤岛问题。
解决方法有很多,这里是一个:
- 使用
LAG
来识别每组开头的行
- 一个 运行 条件
COUNT
给我们每个组的 ID
- 然后简单地按那个 ID 分组
WITH PrevValues AS (
SELECT *,
IsStart = CASE WHEN LAG(rs.Rate, 1, -1) OVER (PARTITION BY rs.HotelId, rs.CompetitorId
ORDER BY rs.RateShopDate) <> rs.Rate THEN 1 END
FROM #RateShop rs
),
Groupings AS (
SELECT *,
GroupId = COUNT(IsStart) OVER (PARTITION BY rs.HotelId, rs.CompetitorId
ORDER BY rs.RateShopDate ROWS UNBOUNDED PRECEDING)
FROM PrevValues rs
)
SELECT
rs.HotelId,
rs.CompetitorId,
StartDate = MIN(rs.RateShopDate),
EndDate = MAX(rs.RateShopDate),
Rate = MIN(rs.Rate),
RateRemark = STRING_AGG(rs.RateRemark, '; ')
FROM Groupings rs
GROUP BY
rs.HotelId,
rs.CompetitorId,
rs.GroupId;
我不确定如何给这个问题起标题...希望没问题。
我有一个显示速率值的相当基本的数据集:
从这个逻辑:
SELECT HotelId, CompetitorId,
RateShopDate AS ShopDate,
Rate, RateRemark,
RowNumber
FROM
(
SELECT HotelId, CompetitorId, RateShopDate, ChannelId, RequestedRateType, Rate, RateRemark,
PreviousRate, PreviousRateRemark, NextRate, NextRateRemark,
ROW_NUMBER() OVER ( ORDER BY HotelId, CompetitorId, ChannelId, RequestedRateType, RateShopDate ) AS RowNumber
FROM
(
SELECT HotelId, CompetitorId, RateShopDate, ChannelId, RequestedRateType, Rate, RateRemark, RowNumber,
LAG( Rate ) OVER
(
PARTITION BY HotelId, CompetitorId, ChannelId, RequestedRateType
ORDER BY RowNumber
) AS PreviousRate,
LAG( RateRemark ) OVER
(
PARTITION BY HotelId, CompetitorId, ChannelId, RequestedRateType
ORDER BY RowNumber
) AS PreviousRateRemark,
LEAD( Rate ) OVER
(
PARTITION BY HotelId, CompetitorId, ChannelId, RequestedRateType
ORDER BY RowNumber
) AS NextRate,
LEAD( RateRemark ) OVER
(
PARTITION BY HotelId, CompetitorId, ChannelId, RequestedRateType
ORDER BY RowNumber
) AS NextRateRemark
FROM
(
SELECT HotelId, CompetitorId, RateShopDate, ChannelId, RequestedRateType, Rate, RateRemark,
ROW_NUMBER() OVER ( ORDER BY HotelId, CompetitorId, ChannelId, RequestedRateType, RateShopDate ) AS RowNumber
FROM #TempRateShop
) Rates
) PrevNextRates
WHERE COALESCE(Rate, 0) <> COALESCE(PreviousRate, 0)
OR COALESCE(RateRemark, '') <> COALESCE(PreviousRateRemark, '')
OR COALESCE(Rate, 0) <> COALESCE(NextRate, 0)
OR COALESCE(RateRemark, '') <> COALESCE(NextRateRemark, '')
) FilteredRates
ORDER BY RateShopDate
我正在尝试获取每次汇率变化的开始和结束 ShopDate。但是由于利率有可能从一个变为另一个并回到原来的状态,我遇到了一点困难;例如价格在一段时间内从 90 美元涨到 95 美元,然后又回落到 90 美元。
我想得到的是:
我想出的唯一解决方案是使用 LEAD/LAG 并按 RowNumber 列排序,然后按 odd/even 数字过滤...但我真的不认为是最好的解决方案。
LEAD ( RateShopDate ) OVER
(
ORDER BY RowNumber
) AS ShopEndDate
这是一种空隙和孤岛问题。
解决方法有很多,这里是一个:
- 使用
LAG
来识别每组开头的行 - 一个 运行 条件
COUNT
给我们每个组的 ID - 然后简单地按那个 ID 分组
WITH PrevValues AS (
SELECT *,
IsStart = CASE WHEN LAG(rs.Rate, 1, -1) OVER (PARTITION BY rs.HotelId, rs.CompetitorId
ORDER BY rs.RateShopDate) <> rs.Rate THEN 1 END
FROM #RateShop rs
),
Groupings AS (
SELECT *,
GroupId = COUNT(IsStart) OVER (PARTITION BY rs.HotelId, rs.CompetitorId
ORDER BY rs.RateShopDate ROWS UNBOUNDED PRECEDING)
FROM PrevValues rs
)
SELECT
rs.HotelId,
rs.CompetitorId,
StartDate = MIN(rs.RateShopDate),
EndDate = MAX(rs.RateShopDate),
Rate = MIN(rs.Rate),
RateRemark = STRING_AGG(rs.RateRemark, '; ')
FROM Groupings rs
GROUP BY
rs.HotelId,
rs.CompetitorId,
rs.GroupId;