查找分区中的第一个非空值
Finding the first non-null in a partition
我正在使用 SQL 服务器。我有一个数据库,其中有一个人和一年(组合创建唯一性),其中另一列(我们称之为已婚状态)具有空值。我想归因于这些空值。我认为由于此列通常不会经常更改,所以我将为该人取下一个非空值,或者如果它位于数据末尾,则取前一个非空值。例如:
人
年
婚姻状况
萌
2001
空
萌
2002
空
萌
2003
已婚
拉里
2001
单身
拉里
2002
空
拉里
2003
空
卷发
2001
单身
卷发
2002
空
卷发
2003
已婚
Moe 的 null 应更改为已婚,Larry 的 null 应更改为单身,Curly 的 null 应更改为已婚。
我的想法是像这样使用 coalesce with over(使用类似的逻辑来选择前面的空值):
select
Person,
Year,
coalesce(MaritalStatus) over (partition by Person order by Year rows between current row and unbounded following)
from mytable
它似乎不适用于 coalesce
。有没有一些简单的方法可以在没有 CTE 或子查询的情况下执行此操作(如果可能,我会尽量避免这种情况,因为这会让下一个人更难理解)。
编辑:
根据蒂姆的回答,我想我有一些东西:
cte AS (
SELECT
*,
ROW_NUMBER() OVER
(PARTITION BY Person,
CASE WHEN MaritalStatus IS NULL THEN 0 ELSE 1 END
ORDER BY Year DESC) rn
FROM mytable
),
cte2 as (
SELECT
t1.Person,
t1.Year,
max(t2.rn) as maxrn,
min(t3.rn) as minrn
FROM mytable t1
LEFT JOIN cte t2
ON t2.Person = t1.Person AND
t2.MaritalStatus IS NOT NULL and
t1.year<t2.year
LEFT JOIN cte t3
ON t3.Person = t1.Person AND
t3.MaritalStatus IS NOT NULL and
t1.year>t3.year
group by t1.Person,t1.Year
),
cte3 as(
select
t1.person,
t1.year,
coalesce(t1.maritalstatus,t4.maritalstatus,t3.maritalstatus) as maritalstatus
from mytable t1
left join cte2 t2
on t1.person=t2.person and
t1.year=t2.year
left join cte t3
on t1.person=t3.person and
t3.maritalstatus is not null and
t2.maxrn=t3.rn
left join cte t4
on t1.person=t4.person and
t4.maritalstatus is not null and
t2.minrn=t4.rn
)
select * from cte3
我们可以试试下面的方法。在这里,我们应用 ROW_NUMBER
对人进行分区, 也 对婚姻状况值是否为 NULL
进行分区。然后,我们使用每人最近的非 NULL
婚姻状况值来填写任何 NULL
缺失的婚姻状况值。
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (
PARTITION BY Person,
CASE WHEN MaritalStatus IS NULL THEN 0 ELSE 1 END
ORDER BY Year DESC) rn
FROM mytable
)
SELECT t1.Person, t1.Year, COALESCE(t1.MaritalStatus, t2.MaritalStatus) AS MaritalStatus
FROM mytable t1
LEFT JOIN cte t2
ON t2.Person = t1.Person AND
t2.MaritalStatus IS NOT NULL AND
t2.rn = 1;
您只能使用 window 函数来完成此操作。关键想法是获得有婚姻状况的第一年。然后将当年的婚姻状况散布在所有行上:
SELECT t.*,
MAX(CASE WHEN year = first_year_ms THEN MaritalStatus END) OVER (PARTITION BY person) as first_marital_status
FROM (SELECT t.*,
MIN(CASE WHEN MaritalStatus IS NOT NULL THEN year END) OVER (PARTITION BY person) as first_year_ms
FROM t
) t
ORDER BY person, year;
一个更简单的方法可能是使用横向连接:
select *
from t outer apply
(select top (1) t2.maritalstatus
from t t2
where t2.person = t.person and t2.maritalstatus is not null
order by t2.year asc
) t2;
使用 (person, maritalstatus, year)
上的索引,这可能是最快的方法。
Here 是一个 db<>fiddle.
编辑:
您仍然可以使用 window 函数执行此操作:
SELECT t.*,
COALESCE(MAX(MaritalStatus) OVER (PARTITION BY person, grp_after),
MAX(MaritalStatus) OVER (PARTITION BY person, grp_before)
) as next_marital_status
FROM (SELECT t.*,
COUNT(MaritalStatus) OVER (PARTITION BY person ORDER BY year DESC) as grp_after,
COUNT(MaritalStatus) OVER (PARTITION BY person ORDER BY year ASC) as grp_before
FROM t
) t
ORDER BY person, year;
或使用apply
:
select *
from t outer apply
(select top (1) t2.maritalstatus
from t t2
where t2.person = t.person and t2.maritalstatus is not null
order by (case when t2.year >= t.year then 1 else 2 end),
(case when t2.year >= t.year then t2.year end) asc,
t2.year desc
) t2
Here 是 SQL Fiddle
我正在使用 SQL 服务器。我有一个数据库,其中有一个人和一年(组合创建唯一性),其中另一列(我们称之为已婚状态)具有空值。我想归因于这些空值。我认为由于此列通常不会经常更改,所以我将为该人取下一个非空值,或者如果它位于数据末尾,则取前一个非空值。例如:
人 | 年 | 婚姻状况 |
---|---|---|
萌 | 2001 | 空 |
萌 | 2002 | 空 |
萌 | 2003 | 已婚 |
拉里 | 2001 | 单身 |
拉里 | 2002 | 空 |
拉里 | 2003 | 空 |
卷发 | 2001 | 单身 |
卷发 | 2002 | 空 |
卷发 | 2003 | 已婚 |
Moe 的 null 应更改为已婚,Larry 的 null 应更改为单身,Curly 的 null 应更改为已婚。
我的想法是像这样使用 coalesce with over(使用类似的逻辑来选择前面的空值):
select
Person,
Year,
coalesce(MaritalStatus) over (partition by Person order by Year rows between current row and unbounded following)
from mytable
它似乎不适用于 coalesce
。有没有一些简单的方法可以在没有 CTE 或子查询的情况下执行此操作(如果可能,我会尽量避免这种情况,因为这会让下一个人更难理解)。
编辑: 根据蒂姆的回答,我想我有一些东西:
cte AS (
SELECT
*,
ROW_NUMBER() OVER
(PARTITION BY Person,
CASE WHEN MaritalStatus IS NULL THEN 0 ELSE 1 END
ORDER BY Year DESC) rn
FROM mytable
),
cte2 as (
SELECT
t1.Person,
t1.Year,
max(t2.rn) as maxrn,
min(t3.rn) as minrn
FROM mytable t1
LEFT JOIN cte t2
ON t2.Person = t1.Person AND
t2.MaritalStatus IS NOT NULL and
t1.year<t2.year
LEFT JOIN cte t3
ON t3.Person = t1.Person AND
t3.MaritalStatus IS NOT NULL and
t1.year>t3.year
group by t1.Person,t1.Year
),
cte3 as(
select
t1.person,
t1.year,
coalesce(t1.maritalstatus,t4.maritalstatus,t3.maritalstatus) as maritalstatus
from mytable t1
left join cte2 t2
on t1.person=t2.person and
t1.year=t2.year
left join cte t3
on t1.person=t3.person and
t3.maritalstatus is not null and
t2.maxrn=t3.rn
left join cte t4
on t1.person=t4.person and
t4.maritalstatus is not null and
t2.minrn=t4.rn
)
select * from cte3
我们可以试试下面的方法。在这里,我们应用 ROW_NUMBER
对人进行分区, 也 对婚姻状况值是否为 NULL
进行分区。然后,我们使用每人最近的非 NULL
婚姻状况值来填写任何 NULL
缺失的婚姻状况值。
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (
PARTITION BY Person,
CASE WHEN MaritalStatus IS NULL THEN 0 ELSE 1 END
ORDER BY Year DESC) rn
FROM mytable
)
SELECT t1.Person, t1.Year, COALESCE(t1.MaritalStatus, t2.MaritalStatus) AS MaritalStatus
FROM mytable t1
LEFT JOIN cte t2
ON t2.Person = t1.Person AND
t2.MaritalStatus IS NOT NULL AND
t2.rn = 1;
您只能使用 window 函数来完成此操作。关键想法是获得有婚姻状况的第一年。然后将当年的婚姻状况散布在所有行上:
SELECT t.*,
MAX(CASE WHEN year = first_year_ms THEN MaritalStatus END) OVER (PARTITION BY person) as first_marital_status
FROM (SELECT t.*,
MIN(CASE WHEN MaritalStatus IS NOT NULL THEN year END) OVER (PARTITION BY person) as first_year_ms
FROM t
) t
ORDER BY person, year;
一个更简单的方法可能是使用横向连接:
select *
from t outer apply
(select top (1) t2.maritalstatus
from t t2
where t2.person = t.person and t2.maritalstatus is not null
order by t2.year asc
) t2;
使用 (person, maritalstatus, year)
上的索引,这可能是最快的方法。
Here 是一个 db<>fiddle.
编辑:
您仍然可以使用 window 函数执行此操作:
SELECT t.*,
COALESCE(MAX(MaritalStatus) OVER (PARTITION BY person, grp_after),
MAX(MaritalStatus) OVER (PARTITION BY person, grp_before)
) as next_marital_status
FROM (SELECT t.*,
COUNT(MaritalStatus) OVER (PARTITION BY person ORDER BY year DESC) as grp_after,
COUNT(MaritalStatus) OVER (PARTITION BY person ORDER BY year ASC) as grp_before
FROM t
) t
ORDER BY person, year;
或使用apply
:
select *
from t outer apply
(select top (1) t2.maritalstatus
from t t2
where t2.person = t.person and t2.maritalstatus is not null
order by (case when t2.year >= t.year then 1 else 2 end),
(case when t2.year >= t.year then t2.year end) asc,
t2.year desc
) t2
Here 是 SQL Fiddle