SQL:比较列以获取集合的精确匹配
SQL: Compare Columns for Exact Match of sets
我有一个 table 看起来像这样:
RN ID1 ID2 Name Source
1 W76544 945297 1_W_HO HO
2 W76544 945297 1_W_INT Int
1 W76547 945299 3_W_HO HO
2 W76547 945678 3_W_INT Int
1 W76561 NULL Dev_U_W AD
2 W76561 207283 Dev_W_HO HO
3 W76561 207283 Dev_W_INT Int
1 W76562 207284 Dev_R_HO HO
2 W76562 207284 Dev_R_INT Int
3 W76562 NULL Dev_U_R AD
1 W76563 NULL Prd_U_W AD
2 W76563 NULL Prd_W_HO HO
3 W76563 NULL Prd_W_INT Int
我正在想办法确定 ID1 和 ID2 组之间的精确匹配。例如,此示例是完全匹配:
RN ID1 ID2 Name Source
1 *W76544 945297* 1_W_HO HO
2 *W76544 945297* 1_W_INT Int
我希望结果如下所示:
RN ID1 ID2 Matched Name Source
1 W76544 945297 Yes 1_W_HO HO
2 W76544 945297 Yes 1_W_INT Int
1 W76547 945299 No 3_W_HO HO
2 W76547 945678 No 3_W_INT Int
1 W76561 NULL No Dev_U_W AD
2 W76561 207283 No Dev_W_HO HO
3 W76561 207283 No Dev_W_INT Int
1 W76562 207284 No Dev_R_HO HO
2 W76562 207284 No Dev_R_INT Int
3 W76562 NULL No Dev_U_R AD
1 W76563 NULL Empty Prd_U_W AD
2 W76563 NULL Empty Prd_W_HO HO
3 W76563 NULL Empty Prd_W_INT Int
澄清...匹配 = 'Yes' 当所有具有相同 ID1 的组匹配到所有具有相同 ID2 的组时。 Match = 'No' 当具有相同 ID1 的组不是都匹配到类似的 ID2 或某些 ID1 的匹配但集合中的其他组根本不匹配 ID2。 Match = 'Empty' 当具有相同 ID1 的所有组根本不匹配 ID2 时。
p.s。 RN row_number 按 ID1
分区和排序
谢谢!!!
@BaconBits,这似乎与您的查询不符:
1 W10151820 NULL No DEV_U_W AD
2 W10151820 212405 Yes DEV_W_HO HO
3 W10151820 212405 Yes DEV_W_INTL Int
你能做这样的事情吗:
SELECT m.RN,
m.ID1,
m.ID2,
CASE WHEN NOT EXISTS (SELECT 1 FROM MyTable WHERE ID1 = m.ID1 AND ID2 IS NOT NULL) THEN 'Empty'
ELSE CASE WHEN COUNT(*) OVER(PARTITION BY m.ID1, m.ID2) > 1 THEN 'Yes'
ELSE 'No'
END
END "Matched",
m.Name,
m.Source
FROM MyTable m
丑陋至极,但我认为这应该可行。
我认为使用解析函数可能更容易:
select id1, id2,
(case when cnt_1 = cnt2 and min_1_2 = max_1_2 and min_2_1 = max_2_1 and
cnt_1 = cnt_2_notnull
then 'Yes'
when cnt_2_notnull > 0
then 'No'
else 'Empty'
end) as flag
from (select t.id1, t.id2,
count(id1) over (partition by id2) as cnt_2_notnull,
count(*) over (partition by id1) as cnt_1,
count(*) over (partition by id2) as cnt_2,
min(id1) over (partition by id2) as min_1_2,
max(id1) over (partition by id2) as max_1_2,
min(id2) over (partition by id1) as min_2_1,
max(id2) over (partition by id1) as max_2_1
from table t
) t;
逻辑是计算每个维度上值的数量(id1
和 id2
)并比较每个维度上的最小值和最大值。
为了将来保持简洁、轻便和易于理解,我会使用 CTE 来计算 Matched
参数。
然后我会将结果合并在一起。
工作示例:link
with CTE_CheckID as ( Select ID1, [Matched] =
CASE WHEN count(distinct ID2) = 1 AND count(distinct coalesce(id2, id1)) = 1 THEN 'Yes' ELSE
CASE WHEN count(distinct ID2) = 0 AND count(distinct coalesce(id2, id1)) = 1 THEN 'Empty' ELSE 'No' END END
FROM TABLE1
group by ID1 )
select t1.RN,
t1.ID1,
t1.ID2,
cID.Matched,
t1.name,
t1.Source
from table1 t1
left join CTE_CheckID cID on t1.ID1 = cID.ID1
试试这个:
Create table t(id int, c char(1))
Insert into t values
(1, 'a'),
(1, 'a'),
(2, 'b'),
(2, null),
(3, null),
(3, null),
(4, 'c'),
(4, 'd')
;with cte as(
select id, count(*) c1, count(c) c2, count(distinct c) c3 from t
group by id)
select t.id, t.c, ca.m from t
Cross apply(select case when c2 = 0 and c3 = 0 then 'empty'
when c1 = c2 and c3 = 1 then 'yes'
else 'no' end as m
from cte where cte.id = t.id) ca
输出:
id c m
1 a yes
1 a yes
2 b no
2 (null) no
3 (null) empty
3 (null) empty
4 c no
4 d no
这个怎么样?你能把 'YourTableNameHere' 换成你的 table 名字试试吗?
WITH
A AS (
SELECT
X.ID1, X.ID2,
COUNT(1) AS Hits
FROM YourTableNameHere X
GROUP BY X.ID1, X.ID2
HAVING COUNT(1) > 1
),
B AS (
SELECT
X.ID1,
SUM(CASE WHEN X.ID2 IS NULL THEN 0 ELSE 1 END) AS Hits,
SUM(CASE WHEN X.ID2 IS NULL THEN 1 ELSE 0 END) AS Nulls
FROM YourTableNameHere X
GROUP BY X.ID1
HAVING COUNT(1) > 1
)
SELECT
X.RN, X.ID1, X.ID2,
CASE
WHEN A.Hits = B.Nulls THEN 'Empty'
WHEN A.Hits = B.Hits + B.Nulls THEN 'Yes'
ELSE 'No'
END AS [Matched],
X.[Name], X.[Source]
FROM
YourTableNameHere X
LEFT JOIN A
ON
A.ID1 = X.ID1
AND (A.ID2 = X.ID2 OR (A.ID2 IS NULL AND X.ID2 IS NULL))
LEFT JOIN B ON B.ID1 = X.ID1 ;
我有一个 table 看起来像这样:
RN ID1 ID2 Name Source
1 W76544 945297 1_W_HO HO
2 W76544 945297 1_W_INT Int
1 W76547 945299 3_W_HO HO
2 W76547 945678 3_W_INT Int
1 W76561 NULL Dev_U_W AD
2 W76561 207283 Dev_W_HO HO
3 W76561 207283 Dev_W_INT Int
1 W76562 207284 Dev_R_HO HO
2 W76562 207284 Dev_R_INT Int
3 W76562 NULL Dev_U_R AD
1 W76563 NULL Prd_U_W AD
2 W76563 NULL Prd_W_HO HO
3 W76563 NULL Prd_W_INT Int
我正在想办法确定 ID1 和 ID2 组之间的精确匹配。例如,此示例是完全匹配:
RN ID1 ID2 Name Source
1 *W76544 945297* 1_W_HO HO
2 *W76544 945297* 1_W_INT Int
我希望结果如下所示:
RN ID1 ID2 Matched Name Source
1 W76544 945297 Yes 1_W_HO HO
2 W76544 945297 Yes 1_W_INT Int
1 W76547 945299 No 3_W_HO HO
2 W76547 945678 No 3_W_INT Int
1 W76561 NULL No Dev_U_W AD
2 W76561 207283 No Dev_W_HO HO
3 W76561 207283 No Dev_W_INT Int
1 W76562 207284 No Dev_R_HO HO
2 W76562 207284 No Dev_R_INT Int
3 W76562 NULL No Dev_U_R AD
1 W76563 NULL Empty Prd_U_W AD
2 W76563 NULL Empty Prd_W_HO HO
3 W76563 NULL Empty Prd_W_INT Int
澄清...匹配 = 'Yes' 当所有具有相同 ID1 的组匹配到所有具有相同 ID2 的组时。 Match = 'No' 当具有相同 ID1 的组不是都匹配到类似的 ID2 或某些 ID1 的匹配但集合中的其他组根本不匹配 ID2。 Match = 'Empty' 当具有相同 ID1 的所有组根本不匹配 ID2 时。
p.s。 RN row_number 按 ID1
分区和排序谢谢!!!
@BaconBits,这似乎与您的查询不符:
1 W10151820 NULL No DEV_U_W AD
2 W10151820 212405 Yes DEV_W_HO HO
3 W10151820 212405 Yes DEV_W_INTL Int
你能做这样的事情吗:
SELECT m.RN,
m.ID1,
m.ID2,
CASE WHEN NOT EXISTS (SELECT 1 FROM MyTable WHERE ID1 = m.ID1 AND ID2 IS NOT NULL) THEN 'Empty'
ELSE CASE WHEN COUNT(*) OVER(PARTITION BY m.ID1, m.ID2) > 1 THEN 'Yes'
ELSE 'No'
END
END "Matched",
m.Name,
m.Source
FROM MyTable m
丑陋至极,但我认为这应该可行。
我认为使用解析函数可能更容易:
select id1, id2,
(case when cnt_1 = cnt2 and min_1_2 = max_1_2 and min_2_1 = max_2_1 and
cnt_1 = cnt_2_notnull
then 'Yes'
when cnt_2_notnull > 0
then 'No'
else 'Empty'
end) as flag
from (select t.id1, t.id2,
count(id1) over (partition by id2) as cnt_2_notnull,
count(*) over (partition by id1) as cnt_1,
count(*) over (partition by id2) as cnt_2,
min(id1) over (partition by id2) as min_1_2,
max(id1) over (partition by id2) as max_1_2,
min(id2) over (partition by id1) as min_2_1,
max(id2) over (partition by id1) as max_2_1
from table t
) t;
逻辑是计算每个维度上值的数量(id1
和 id2
)并比较每个维度上的最小值和最大值。
为了将来保持简洁、轻便和易于理解,我会使用 CTE 来计算 Matched
参数。
然后我会将结果合并在一起。
工作示例:link
with CTE_CheckID as ( Select ID1, [Matched] =
CASE WHEN count(distinct ID2) = 1 AND count(distinct coalesce(id2, id1)) = 1 THEN 'Yes' ELSE
CASE WHEN count(distinct ID2) = 0 AND count(distinct coalesce(id2, id1)) = 1 THEN 'Empty' ELSE 'No' END END
FROM TABLE1
group by ID1 )
select t1.RN,
t1.ID1,
t1.ID2,
cID.Matched,
t1.name,
t1.Source
from table1 t1
left join CTE_CheckID cID on t1.ID1 = cID.ID1
试试这个:
Create table t(id int, c char(1))
Insert into t values
(1, 'a'),
(1, 'a'),
(2, 'b'),
(2, null),
(3, null),
(3, null),
(4, 'c'),
(4, 'd')
;with cte as(
select id, count(*) c1, count(c) c2, count(distinct c) c3 from t
group by id)
select t.id, t.c, ca.m from t
Cross apply(select case when c2 = 0 and c3 = 0 then 'empty'
when c1 = c2 and c3 = 1 then 'yes'
else 'no' end as m
from cte where cte.id = t.id) ca
输出:
id c m
1 a yes
1 a yes
2 b no
2 (null) no
3 (null) empty
3 (null) empty
4 c no
4 d no
这个怎么样?你能把 'YourTableNameHere' 换成你的 table 名字试试吗?
WITH
A AS (
SELECT
X.ID1, X.ID2,
COUNT(1) AS Hits
FROM YourTableNameHere X
GROUP BY X.ID1, X.ID2
HAVING COUNT(1) > 1
),
B AS (
SELECT
X.ID1,
SUM(CASE WHEN X.ID2 IS NULL THEN 0 ELSE 1 END) AS Hits,
SUM(CASE WHEN X.ID2 IS NULL THEN 1 ELSE 0 END) AS Nulls
FROM YourTableNameHere X
GROUP BY X.ID1
HAVING COUNT(1) > 1
)
SELECT
X.RN, X.ID1, X.ID2,
CASE
WHEN A.Hits = B.Nulls THEN 'Empty'
WHEN A.Hits = B.Hits + B.Nulls THEN 'Yes'
ELSE 'No'
END AS [Matched],
X.[Name], X.[Source]
FROM
YourTableNameHere X
LEFT JOIN A
ON
A.ID1 = X.ID1
AND (A.ID2 = X.ID2 OR (A.ID2 IS NULL AND X.ID2 IS NULL))
LEFT JOIN B ON B.ID1 = X.ID1 ;