T-SQL:选择最小值和最大值的每个实例
T-SQL: Selecting every instance of min and max values
我有一个叫 #TimeAtHome
的 table。它包括一个 address
、date
和一个标志 atHome
,以指示该人当天是否在家。我需要为每个 address
.
的人不在家(0
)的每个分组捕获 min
和 max
date
下面是一些示例代码:
create table #TimeAtHome (
[address] varchar(100),
[date] date,
[atHome] bit
)
insert into #TimeAtHome
values ('123 ABC Street', '2020-01-01', '1'),
('123 ABC Street', '2020-01-02', '1'),
('123 ABC Street', '2020-01-03', '0'),
('123 ABC Street', '2020-01-04', '0'),
('123 ABC Street', '2020-01-05', '0'),
('123 ABC Street', '2020-01-06', '0'),
('123 ABC Street', '2020-01-07', '1'),
('123 ABC Street', '2020-01-08', '0'),
('123 ABC Street', '2020-01-09', '0'),
('123 ABC Street', '2020-01-10', '1'),
('777 Hello Ct', '2020-01-01', '1'),
('777 Hello Ct', '2020-01-02', '1'),
('777 Hello Ct', '2020-01-03', '1'),
('777 Hello Ct', '2020-01-04', '0'),
('777 Hello Ct', '2020-01-05', '1'),
('777 Hello Ct', '2020-01-06', '1')
这是我想要的结果:
我们可以尝试以下方法:
- 获取所有
minDate
值将 table 与其自身连接起来,并检查当前日期此人是否在家以及下一个日期不在家(将是子查询 1)。
- 以与第 1 点相同的方式获取所有
maxDate
,除了检查该人是否在下一个日期回来(子查询 2)。
- 每个地址首先
minDate
与第一个 maxDate
匹配,第二个 minDate
与第二个 maxDate
匹配,依此类推(加入子查询 1 和 2)。
SELECT q1.address,
q1.minDate,
q2.maxDate
FROM (
SELECT ROW_NUMBER() OVER(
PARTITION BY t2.address
ORDER BY t2.date
) as row,
t2.address,
t2.date as minDate
FROM #TimeAtHome t1 inner join #TimeAtHome t2 ON t1.address = t2.address and t1.date = DATEADD(DAY, -1, t2.date)
WHERE t1.atHome = 1
AND t2.atHome = 0
) q1
INNER JOIN (
SELECT ROW_NUMBER() OVER(
PARTITION BY t1.address
ORDER BY t1.date
) as row,
t1.address,
t1.date as maxDate
FROM #TimeAtHome t1 INNER JOIN #TimeAtHome t2 ON t1.address = t2.address and t1.date = DATEADD(DAY, -1, t2.date)
WHERE t1.atHome = 0
AND t2.atHome = 1
) q2 ON q1.address = q2.address
AND q1.row = q2.row
请注意此查询的限制条件
- table 中的日期应该是连续的,因此要在 table 中找到下一条记录,我们可以简单地减去一天
t1.date = DATEADD(DAY, -1, t2.date)
。
- 这个人从家里开始,所以他外出时的第一个
minDate
与他回来时的第一个 maxDate
相匹配。
查询是这样的,Cte1用于获取下一步将使用的数据的完整视图。 Cte2用于查找mindate,Cte3用于获取maxDate,最后使用Rank func加入
;WITH cte1
AS
(
SELECT *,
LEAD(date) OVER (PARTITION BY address ORDER BY date) AS nextDate,
LEAD(atHome) OVER (PARTITION BY address ORDER BY date) AS NextAtHome
FROM #TimeAtHome
--ORDER BY address, date
),
CTE2 AS
(
SELECT
address,
cte1.nextDate AS minDate,
ROW_NUMBER() OVER (ORDER BY cte1.address , cte1.date) AS R1
FROM cte1
WHERE cte1.atHome = 1 AND cte1.NextAtHome = 0
),
CTE3 AS
(
SELECT
address,
date AS maxDate,
ROW_NUMBER() OVER (ORDER BY cte1.address, cte1.date) AS R2
FROM cte1
WHERE cte1.atHome = 0 AND cte1.NextAtHome = 1
)
SELECT CTE2.address,CTE2.minDate,CTE3.maxDate
FROM cte2
INNER JOIN cte3 ON cte2.R1 = Cte3.R2
我想我使用了一个更简单的解决方案,因为这看起来像是一个缺口和孤岛问题。因此,我使用 LAG() 函数根据 AtHome 标志查找岛屿的起点和终点。然后我使用 SUM() 函数创建一个组并从那里聚合日期:
SELECT Address,Min(Date) minDate, Max(date) maxDate
FROM
(
SELECT *, SUM(CASE WHEN AtHome <> PrevAtHome THEN 1 ELSE 0 END) OVER(PARTITION BY Address order by date) Grp
FROM(
SELECT *, LAG(ATHome,1,AtHome) OVER(PARTITION BY address order by date) PrevAtHome
from #TimeAtHome
) T
) Final
WHERE Athome = 0
GROUP BY Address,Grp
ORDER BY Address
还有一种可能性:
create table #TimeAtHome (
[address] varchar(100),
[date] date,
[atHome] bit
)
insert into #TimeAtHome
values ('123 ABC Street', '2020-01-01', '1'),
('123 ABC Street', '2020-01-02', '1'),
('123 ABC Street', '2020-01-03', '0'),
('123 ABC Street', '2020-01-04', '0'),
('123 ABC Street', '2020-01-05', '0'),
('123 ABC Street', '2020-01-06', '0'),
('123 ABC Street', '2020-01-07', '1'),
('123 ABC Street', '2020-01-08', '0'),
('123 ABC Street', '2020-01-09', '0'),
('123 ABC Street', '2020-01-10', '1'),
('777 Hello Ct', '2020-01-01', '1'),
('777 Hello Ct', '2020-01-02', '1'),
('777 Hello Ct', '2020-01-03', '1'),
('777 Hello Ct', '2020-01-04', '0'),
('777 Hello Ct', '2020-01-05', '1'),
('777 Hello Ct', '2020-01-06', '1')
SELECT dt.address,
MIN(dt.Dt) AS minDate,
MAX(dt.Dt) AS maxDate
FROM (
SELECT address,
t.Date AS Dt,
DATEDIFF(D, ROW_NUMBER() OVER(partition by t.address ORDER BY t.Date),
t.Date) AS DtRange
FROM #TimeAtHome t
WHERE t.atHome = 0
) AS dt
GROUP BY dt.address, dt.DtRange
ORDER BY address, minDate;
还有另一种方法
SELECT
*
FROM
(
SELECT
*
,ROW_NUMBER() OVER(PARTITION BY address order by date) PrevAtHome_A
,ROW_NUMBER() OVER(PARTITION BY address order by date DESC) PrevAtHome_D
from #TimeAtHome
WHERE AtHome = 0
)A
WHERE PrevAtHome_A =1 OR PrevAtHome_D =1
ORDER BY [address], [date]
我有一个叫 #TimeAtHome
的 table。它包括一个 address
、date
和一个标志 atHome
,以指示该人当天是否在家。我需要为每个 address
.
0
)的每个分组捕获 min
和 max
date
下面是一些示例代码:
create table #TimeAtHome (
[address] varchar(100),
[date] date,
[atHome] bit
)
insert into #TimeAtHome
values ('123 ABC Street', '2020-01-01', '1'),
('123 ABC Street', '2020-01-02', '1'),
('123 ABC Street', '2020-01-03', '0'),
('123 ABC Street', '2020-01-04', '0'),
('123 ABC Street', '2020-01-05', '0'),
('123 ABC Street', '2020-01-06', '0'),
('123 ABC Street', '2020-01-07', '1'),
('123 ABC Street', '2020-01-08', '0'),
('123 ABC Street', '2020-01-09', '0'),
('123 ABC Street', '2020-01-10', '1'),
('777 Hello Ct', '2020-01-01', '1'),
('777 Hello Ct', '2020-01-02', '1'),
('777 Hello Ct', '2020-01-03', '1'),
('777 Hello Ct', '2020-01-04', '0'),
('777 Hello Ct', '2020-01-05', '1'),
('777 Hello Ct', '2020-01-06', '1')
这是我想要的结果:
我们可以尝试以下方法:
- 获取所有
minDate
值将 table 与其自身连接起来,并检查当前日期此人是否在家以及下一个日期不在家(将是子查询 1)。 - 以与第 1 点相同的方式获取所有
maxDate
,除了检查该人是否在下一个日期回来(子查询 2)。 - 每个地址首先
minDate
与第一个maxDate
匹配,第二个minDate
与第二个maxDate
匹配,依此类推(加入子查询 1 和 2)。
SELECT q1.address,
q1.minDate,
q2.maxDate
FROM (
SELECT ROW_NUMBER() OVER(
PARTITION BY t2.address
ORDER BY t2.date
) as row,
t2.address,
t2.date as minDate
FROM #TimeAtHome t1 inner join #TimeAtHome t2 ON t1.address = t2.address and t1.date = DATEADD(DAY, -1, t2.date)
WHERE t1.atHome = 1
AND t2.atHome = 0
) q1
INNER JOIN (
SELECT ROW_NUMBER() OVER(
PARTITION BY t1.address
ORDER BY t1.date
) as row,
t1.address,
t1.date as maxDate
FROM #TimeAtHome t1 INNER JOIN #TimeAtHome t2 ON t1.address = t2.address and t1.date = DATEADD(DAY, -1, t2.date)
WHERE t1.atHome = 0
AND t2.atHome = 1
) q2 ON q1.address = q2.address
AND q1.row = q2.row
请注意此查询的限制条件
- table 中的日期应该是连续的,因此要在 table 中找到下一条记录,我们可以简单地减去一天
t1.date = DATEADD(DAY, -1, t2.date)
。 - 这个人从家里开始,所以他外出时的第一个
minDate
与他回来时的第一个maxDate
相匹配。
查询是这样的,Cte1用于获取下一步将使用的数据的完整视图。 Cte2用于查找mindate,Cte3用于获取maxDate,最后使用Rank func加入
;WITH cte1
AS
(
SELECT *,
LEAD(date) OVER (PARTITION BY address ORDER BY date) AS nextDate,
LEAD(atHome) OVER (PARTITION BY address ORDER BY date) AS NextAtHome
FROM #TimeAtHome
--ORDER BY address, date
),
CTE2 AS
(
SELECT
address,
cte1.nextDate AS minDate,
ROW_NUMBER() OVER (ORDER BY cte1.address , cte1.date) AS R1
FROM cte1
WHERE cte1.atHome = 1 AND cte1.NextAtHome = 0
),
CTE3 AS
(
SELECT
address,
date AS maxDate,
ROW_NUMBER() OVER (ORDER BY cte1.address, cte1.date) AS R2
FROM cte1
WHERE cte1.atHome = 0 AND cte1.NextAtHome = 1
)
SELECT CTE2.address,CTE2.minDate,CTE3.maxDate
FROM cte2
INNER JOIN cte3 ON cte2.R1 = Cte3.R2
我想我使用了一个更简单的解决方案,因为这看起来像是一个缺口和孤岛问题。因此,我使用 LAG() 函数根据 AtHome 标志查找岛屿的起点和终点。然后我使用 SUM() 函数创建一个组并从那里聚合日期:
SELECT Address,Min(Date) minDate, Max(date) maxDate
FROM
(
SELECT *, SUM(CASE WHEN AtHome <> PrevAtHome THEN 1 ELSE 0 END) OVER(PARTITION BY Address order by date) Grp
FROM(
SELECT *, LAG(ATHome,1,AtHome) OVER(PARTITION BY address order by date) PrevAtHome
from #TimeAtHome
) T
) Final
WHERE Athome = 0
GROUP BY Address,Grp
ORDER BY Address
还有一种可能性:
create table #TimeAtHome (
[address] varchar(100),
[date] date,
[atHome] bit
)
insert into #TimeAtHome
values ('123 ABC Street', '2020-01-01', '1'),
('123 ABC Street', '2020-01-02', '1'),
('123 ABC Street', '2020-01-03', '0'),
('123 ABC Street', '2020-01-04', '0'),
('123 ABC Street', '2020-01-05', '0'),
('123 ABC Street', '2020-01-06', '0'),
('123 ABC Street', '2020-01-07', '1'),
('123 ABC Street', '2020-01-08', '0'),
('123 ABC Street', '2020-01-09', '0'),
('123 ABC Street', '2020-01-10', '1'),
('777 Hello Ct', '2020-01-01', '1'),
('777 Hello Ct', '2020-01-02', '1'),
('777 Hello Ct', '2020-01-03', '1'),
('777 Hello Ct', '2020-01-04', '0'),
('777 Hello Ct', '2020-01-05', '1'),
('777 Hello Ct', '2020-01-06', '1')
SELECT dt.address,
MIN(dt.Dt) AS minDate,
MAX(dt.Dt) AS maxDate
FROM (
SELECT address,
t.Date AS Dt,
DATEDIFF(D, ROW_NUMBER() OVER(partition by t.address ORDER BY t.Date),
t.Date) AS DtRange
FROM #TimeAtHome t
WHERE t.atHome = 0
) AS dt
GROUP BY dt.address, dt.DtRange
ORDER BY address, minDate;
还有另一种方法
SELECT
*
FROM
(
SELECT
*
,ROW_NUMBER() OVER(PARTITION BY address order by date) PrevAtHome_A
,ROW_NUMBER() OVER(PARTITION BY address order by date DESC) PrevAtHome_D
from #TimeAtHome
WHERE AtHome = 0
)A
WHERE PrevAtHome_A =1 OR PrevAtHome_D =1
ORDER BY [address], [date]