连接两个表时插入缺失值
Interpolate missing values when joining two tables
我有两个 table 的不同密度数据,我希望能够加入它们,但在较低频率 table 中插入这些值以填补空白。
除了 lag/lead 之外,我不知道如何处理这个问题,但差异是不规则的。
下面是我的设置:
CREATE TABLE #HighFreq
(MD INT NOT NULL,
LOSS float)
INSERT INTO #HighFreq
VALUES
(6710,0.5)
,(6711,0.6)
,(6712,0.6)
,(6713,0.5)
,(6714,0.5)
,(6715,0.4)
,(6716,0.9)
,(6717,0.9)
,(6718,0.9)
,(6719,1)
,(6720,0.8)
,(6721,0.9)
,(6722,0.7)
,(6723,0.7)
,(6724,0.7)
,(6725,0.7)
CREATE TABLE #LowFreq
(MD INT NOT NULL
,X FLOAT
,Y FLOAT)
INSERT INTO #LowFreq
VALUES
(6710,12,1000)
,(6711,8,1001)
,(6718,10,1007)
,(6724,8,1013)
,(6730,11,1028)
我希望我的输出看起来像这样:
这是一种使用递归 cte 和 window 函数的方法。递归 cte 从两个表中可用的值生成 md
s 的列表。然后,想法是使用间隙和岛技术将相邻的 "missing" #LowFreq
记录分组。然后,您可以在外部查询中进行插值,方法是投影组中第一个(也是唯一一个)非空值与下一个非空值之间的值。
with cte as (
select min(coalesce(h.md, l.md)) md, max(coalesce(h.md, l.md)) md_max
from #HighFreq h
full join #LowFreq l on l.md = h.md
union all
select md + 1, md_max from cte where md < md_max
)
select
md,
loss,
coalesce(x, min(x) over(partition by grp)
+ (min(lead_x) over(partition by grp) - min(x) over(partition by grp))
* (row_number() over(partition by grp order by md) - 1)
/ count(*) over(partition by grp)
) x,
coalesce(y, min(y) over(partition by grp)
+ (min(lead_y) over(partition by grp) - min(y) over(partition by grp))
* (row_number() over(partition by grp order by md) - 1)
/ count(*) over(partition by grp)
) y
from (
select
c.md,
h.loss,
l.x,
l.y,
sum(case when l.md is null then 0 else 1 end) over(order by c.md) grp,
lead(l.x) over(order by c.md) lead_x,
lead(l.y) over(order by c.md) lead_y
from cte c
left join #HighFreq h on h.md = c.md
left join #LowFreq l on l.md = c.md
) t
md | loss | x | y
---: | ---: | ---------------: | ---------------:
6710 | 0.5 | 12 | 1000
6711 | 0.6 | 8 | 1001
6712 | 0.6 | 8.28571428571429 | 1001.85714285714
6713 | 0.5 | 8.57142857142857 | 1002.71428571429
6714 | 0.5 | 8.85714285714286 | 1003.57142857143
6715 | 0.4 | 9.14285714285714 | 1004.42857142857
6716 | 0.9 | 9.42857142857143 | 1005.28571428571
6717 | 0.9 | 9.71428571428571 | 1006.14285714286
6718 | 0.9 | 10 | 1007
6719 | 1 | 9.66666666666667 | 1008
6720 | 0.8 | 9.33333333333333 | 1009
6721 | 0.9 | 9 | 1010
6722 | 0.7 | 8.66666666666667 | 1011
6723 | 0.7 | 8.33333333333333 | 1012
6724 | 0.7 | 8 | 1013
6725 | 0.7 | 8.5 | 1015.5
6726 | null | 9 | 1018
6727 | null | 9.5 | 1020.5
6728 | null | 10 | 1023
6729 | null | 10.5 | 1025.5
6730 | null | 11 | 1028
我有两个 table 的不同密度数据,我希望能够加入它们,但在较低频率 table 中插入这些值以填补空白。
除了 lag/lead 之外,我不知道如何处理这个问题,但差异是不规则的。
下面是我的设置:
CREATE TABLE #HighFreq
(MD INT NOT NULL,
LOSS float)
INSERT INTO #HighFreq
VALUES
(6710,0.5)
,(6711,0.6)
,(6712,0.6)
,(6713,0.5)
,(6714,0.5)
,(6715,0.4)
,(6716,0.9)
,(6717,0.9)
,(6718,0.9)
,(6719,1)
,(6720,0.8)
,(6721,0.9)
,(6722,0.7)
,(6723,0.7)
,(6724,0.7)
,(6725,0.7)
CREATE TABLE #LowFreq
(MD INT NOT NULL
,X FLOAT
,Y FLOAT)
INSERT INTO #LowFreq
VALUES
(6710,12,1000)
,(6711,8,1001)
,(6718,10,1007)
,(6724,8,1013)
,(6730,11,1028)
我希望我的输出看起来像这样:
这是一种使用递归 cte 和 window 函数的方法。递归 cte 从两个表中可用的值生成 md
s 的列表。然后,想法是使用间隙和岛技术将相邻的 "missing" #LowFreq
记录分组。然后,您可以在外部查询中进行插值,方法是投影组中第一个(也是唯一一个)非空值与下一个非空值之间的值。
with cte as (
select min(coalesce(h.md, l.md)) md, max(coalesce(h.md, l.md)) md_max
from #HighFreq h
full join #LowFreq l on l.md = h.md
union all
select md + 1, md_max from cte where md < md_max
)
select
md,
loss,
coalesce(x, min(x) over(partition by grp)
+ (min(lead_x) over(partition by grp) - min(x) over(partition by grp))
* (row_number() over(partition by grp order by md) - 1)
/ count(*) over(partition by grp)
) x,
coalesce(y, min(y) over(partition by grp)
+ (min(lead_y) over(partition by grp) - min(y) over(partition by grp))
* (row_number() over(partition by grp order by md) - 1)
/ count(*) over(partition by grp)
) y
from (
select
c.md,
h.loss,
l.x,
l.y,
sum(case when l.md is null then 0 else 1 end) over(order by c.md) grp,
lead(l.x) over(order by c.md) lead_x,
lead(l.y) over(order by c.md) lead_y
from cte c
left join #HighFreq h on h.md = c.md
left join #LowFreq l on l.md = c.md
) t
md | loss | x | y ---: | ---: | ---------------: | ---------------: 6710 | 0.5 | 12 | 1000 6711 | 0.6 | 8 | 1001 6712 | 0.6 | 8.28571428571429 | 1001.85714285714 6713 | 0.5 | 8.57142857142857 | 1002.71428571429 6714 | 0.5 | 8.85714285714286 | 1003.57142857143 6715 | 0.4 | 9.14285714285714 | 1004.42857142857 6716 | 0.9 | 9.42857142857143 | 1005.28571428571 6717 | 0.9 | 9.71428571428571 | 1006.14285714286 6718 | 0.9 | 10 | 1007 6719 | 1 | 9.66666666666667 | 1008 6720 | 0.8 | 9.33333333333333 | 1009 6721 | 0.9 | 9 | 1010 6722 | 0.7 | 8.66666666666667 | 1011 6723 | 0.7 | 8.33333333333333 | 1012 6724 | 0.7 | 8 | 1013 6725 | 0.7 | 8.5 | 1015.5 6726 | null | 9 | 1018 6727 | null | 9.5 | 1020.5 6728 | null | 10 | 1023 6729 | null | 10.5 | 1025.5 6730 | null | 11 | 1028