匹配 2 列中的重复值和 1 列中的 1 个唯一值 mysql
Match duplicate value from 2 colums and 1 unique value from 1 column mysql
我有这个table,这是我的db Fiddle
CREATE TABLE table1 (
`ID` VARCHAR(100),
`Val` VARCHAR(100),
`Val2` VARCHAR(100),
`Val3` VARCHAR(100)
);
INSERT INTO table1
(`ID`, `Val`, `Val2`, `Val3`)
VALUES
('1','100','200','90'),
('2','100','200','10'),
('3','100','200','20'),
('4','20','100','55'),
('5','20','100','10'),
('6','112','100','20'),
('7','112','100','20'),
('8','90','200','90'),
('9','30','90','180'),
('10','30','90','29');
我想要这个条件下的结果
- Val 必须是重复的 AND
- Val2 必须重复 AND
在我得到重复值之后,现在我需要检查重复值中的 val3,如果 val3 的 值具有来自先前聚合的唯一值
我试过这个查询
SELECT
t1.*
FROM
table1 t1
WHERE
EXISTS (
SELECT
1
FROM
table1
WHERE
ID <> t1.ID
AND Val = t1.Val
AND Val2 = t1.Val2
)
AND NOT EXISTS (
SELECT
1
FROM
table1
WHERE
Val = t1.Val
AND Val2 = t1.Val2
AND Val3 IN (
SELECT Val3
FROM table1
GROUP BY Val3
HAVING count( * ) > 1
)
)
我希望结果是这样的
ID Val Val2 Val3
1 100 200 90
2 100 200 10
3 100 200 20
4 20 100 55
5 20 100 10
9 30 90 180
10 30 90 29
但是我得到了这样的结果
ID Val Val2 Val3
9 30 90 180
10 30 90 29
示例 2
INSERT INTO table1
(`ID`, `Val`, `Val2`, `Val3`)
VALUES
('1','100','200','90'),
('2','100','200','10'),
('3','100','200','20'),
('19','100','200','20'),
('4','20','100','55'),
('5','20','100','10'),
('6','112','100','20'),
('7','112','100','20'),
('8','90','200','90'),
('9','30','90','180'),
('10','30','90','29');
预期结果2
ID Val Val2 Val3
1 100 200 90
2 100 200 10
4 20 100 55
5 20 100 10
9 30 90 180
10 30 90 29
示例 3
INSERT INTO table1
(`ID`, `Val`, `Val2`, `Val3`)
VALUES
('1','100','200','aa'),
('2','100','200','aa'),
('3','100','200','aa'),
('19','100','200','ab'),
('4','20','100','SD2'),
('5','20','100','SD1'),
('6','112','100','aa'),
('7','112','100','ab'),
('8','90','200','aa'),
('9','30','90','SF2'),
('10','30','90','SF1');
预期结果3
ID Val Val2 Val3
4 20 100 SD2
5 20 100 SD1
6 112 100 aa
7 112 100 ab
9 30 90 SF2
10 30 90 SF1
有些人可能对样本 3 感到困惑,所以这里是样本 3 的注释:
对于这种情况,示例 3 中的 ID 19 与列 val 和 val2 的 id 1、2、3(100 和 200)具有相同的值,但是这些 id (1、2、3) 具有相同的 aa
value in val3
,所以必须排除id 1,2,3,因为这些id没有匹配上一个条件(val, val2, val3) 是唯一的。 ID 19 很好,但是 val
dan val2
具有重复值的列,即 id 1,2,并且 3 已经被排除,这使得 id 19 对于列 val 和 val2 都没有重复值。如果样本 3 中有另一个数据,如 '200'、'100'、'200'、'ae',则 id 19 将包含在结果中,因为它在 id 1,2 和 3 旁边具有重复值。
对于示例 3,如果 table1 中的数据是这样的,则 ID 19 将被包含在内
示例 3(不同大小写)
INSERT INTO table1
(`ID`, `Val`, `Val2`, `Val3`)
VALUES
('1','100','200','aa'),
('2','100','200','aa'),
('3','100','200','aa'),
('19','100','200','ab'),
('200','100','200','ae'),
('4','20','100','SD2'),
('5','20','100','SD1'),
('6','112','100','aa'),
('7','112','100','ab'),
('8','90','200','aa'),
('9','30','90','SF2'),
('10','30','90','SF1');
预期的结果会是这样
ID Val Val2 Val3
4 20 100 SD2
5 20 100 SD1
19 100 200 ab
200 100 200 ae
6 112 100 aa
7 112 100 ab
9 30 90 SF2
10 30 90 SF1
据我了解您的问题,您想要 (val, val2)
元组不唯一且 (val, val2, val3)
是唯一的行。
这是一种通过使用相关子查询过滤数据集来表达这一点的方法:
select t1.*
from table1 t1
where
(
select count(*)
from table1 t2
where t2.val = t1.val and t2.val2 = t1.val2
) > 1
and (
select count(*)
from table1 t2
where t2.val = t1.val and t2.val2 = t1.val2 and t2.val3 = t1.val3
) = 1
order by id
为了性能,考虑在 (val, val1, val2)
上建立索引(索引中列的顺序在这里很重要)。
如果你有幸成为运行 MySQL 8.0,这可以使用window函数更简单有效地表达:
select id, val, val2, val3
from (
select
t1.*,
count(*) over(partition by val, val2) cnt_1,
count(*) over(partition by val, val2, val3) cnt_2
from table1 t1
) t
where cnt_1 > 1 and cnt_2 = 1
将 table 加入符合您条件的查询:
select distinct t.*
from (
select val, val2
from table1
group by val, val2
having count(*) > 1
) t1
inner join (
select val, val2, val3
from table1
group by val, val2, val3
having count(*) = 1
) t2
on t2.val = t1.val and t2.val2 = t1.val2
inner join (
select val, val2, val3
from table1
group by val, val2, val3
having count(*) = 1
) t3
on t3.val = t1.val and t3.val2 = t1.val2 and t3.val3 <> t2.val3
inner join table1 t on t2.val = t.val and t2.val2 = t.val2 and t.val3 in (t2.val3, t3.val3)
正如@GMB 在他的回答中以相当简单的方式告诉的那样,您需要 (val, val2)
元组不唯一且 (val, val2, val3)
是唯一的行。
下面的查询应该很容易完成:
select t.*
from table1 t
inner join
(
select t1.val, t1.val2
from table1 t1
inner join
(select val,val2,val3
from table1
group by val,val2,val3
having count(val3) = 1
) t2
on t1.val = t2.val and t1.val2 = t2.val2 and t1.val3 = t2.val3
group by t1.val, t1.val2
having count(distinct t1.id) > 1
) tmp
on tmp.val = t.val and tmp.val2 = t.val2
inner join
(select val,val2,val3
from table1
group by val,val2,val3
having count(val3) = 1
) t3
on t.val = t3.val and t.val2 = t3.val2 and t.val3 = t3.val3
我有这个table,这是我的db Fiddle
CREATE TABLE table1 (
`ID` VARCHAR(100),
`Val` VARCHAR(100),
`Val2` VARCHAR(100),
`Val3` VARCHAR(100)
);
INSERT INTO table1
(`ID`, `Val`, `Val2`, `Val3`)
VALUES
('1','100','200','90'),
('2','100','200','10'),
('3','100','200','20'),
('4','20','100','55'),
('5','20','100','10'),
('6','112','100','20'),
('7','112','100','20'),
('8','90','200','90'),
('9','30','90','180'),
('10','30','90','29');
我想要这个条件下的结果
- Val 必须是重复的 AND
- Val2 必须重复 AND
在我得到重复值之后,现在我需要检查重复值中的 val3,如果 val3 的 值具有来自先前聚合的唯一值
我试过这个查询
SELECT
t1.*
FROM
table1 t1
WHERE
EXISTS (
SELECT
1
FROM
table1
WHERE
ID <> t1.ID
AND Val = t1.Val
AND Val2 = t1.Val2
)
AND NOT EXISTS (
SELECT
1
FROM
table1
WHERE
Val = t1.Val
AND Val2 = t1.Val2
AND Val3 IN (
SELECT Val3
FROM table1
GROUP BY Val3
HAVING count( * ) > 1
)
)
我希望结果是这样的
ID Val Val2 Val3
1 100 200 90
2 100 200 10
3 100 200 20
4 20 100 55
5 20 100 10
9 30 90 180
10 30 90 29
但是我得到了这样的结果
ID Val Val2 Val3
9 30 90 180
10 30 90 29
示例 2
INSERT INTO table1
(`ID`, `Val`, `Val2`, `Val3`)
VALUES
('1','100','200','90'),
('2','100','200','10'),
('3','100','200','20'),
('19','100','200','20'),
('4','20','100','55'),
('5','20','100','10'),
('6','112','100','20'),
('7','112','100','20'),
('8','90','200','90'),
('9','30','90','180'),
('10','30','90','29');
预期结果2
ID Val Val2 Val3
1 100 200 90
2 100 200 10
4 20 100 55
5 20 100 10
9 30 90 180
10 30 90 29
示例 3
INSERT INTO table1
(`ID`, `Val`, `Val2`, `Val3`)
VALUES
('1','100','200','aa'),
('2','100','200','aa'),
('3','100','200','aa'),
('19','100','200','ab'),
('4','20','100','SD2'),
('5','20','100','SD1'),
('6','112','100','aa'),
('7','112','100','ab'),
('8','90','200','aa'),
('9','30','90','SF2'),
('10','30','90','SF1');
预期结果3
ID Val Val2 Val3
4 20 100 SD2
5 20 100 SD1
6 112 100 aa
7 112 100 ab
9 30 90 SF2
10 30 90 SF1
有些人可能对样本 3 感到困惑,所以这里是样本 3 的注释:
对于这种情况,示例 3 中的 ID 19 与列 val 和 val2 的 id 1、2、3(100 和 200)具有相同的值,但是这些 id (1、2、3) 具有相同的 aa
value in val3
,所以必须排除id 1,2,3,因为这些id没有匹配上一个条件(val, val2, val3) 是唯一的。 ID 19 很好,但是 val
dan val2
具有重复值的列,即 id 1,2,并且 3 已经被排除,这使得 id 19 对于列 val 和 val2 都没有重复值。如果样本 3 中有另一个数据,如 '200'、'100'、'200'、'ae',则 id 19 将包含在结果中,因为它在 id 1,2 和 3 旁边具有重复值。
对于示例 3,如果 table1 中的数据是这样的,则 ID 19 将被包含在内
示例 3(不同大小写)
INSERT INTO table1
(`ID`, `Val`, `Val2`, `Val3`)
VALUES
('1','100','200','aa'),
('2','100','200','aa'),
('3','100','200','aa'),
('19','100','200','ab'),
('200','100','200','ae'),
('4','20','100','SD2'),
('5','20','100','SD1'),
('6','112','100','aa'),
('7','112','100','ab'),
('8','90','200','aa'),
('9','30','90','SF2'),
('10','30','90','SF1');
预期的结果会是这样
ID Val Val2 Val3
4 20 100 SD2
5 20 100 SD1
19 100 200 ab
200 100 200 ae
6 112 100 aa
7 112 100 ab
9 30 90 SF2
10 30 90 SF1
据我了解您的问题,您想要 (val, val2)
元组不唯一且 (val, val2, val3)
是唯一的行。
这是一种通过使用相关子查询过滤数据集来表达这一点的方法:
select t1.*
from table1 t1
where
(
select count(*)
from table1 t2
where t2.val = t1.val and t2.val2 = t1.val2
) > 1
and (
select count(*)
from table1 t2
where t2.val = t1.val and t2.val2 = t1.val2 and t2.val3 = t1.val3
) = 1
order by id
为了性能,考虑在 (val, val1, val2)
上建立索引(索引中列的顺序在这里很重要)。
如果你有幸成为运行 MySQL 8.0,这可以使用window函数更简单有效地表达:
select id, val, val2, val3
from (
select
t1.*,
count(*) over(partition by val, val2) cnt_1,
count(*) over(partition by val, val2, val3) cnt_2
from table1 t1
) t
where cnt_1 > 1 and cnt_2 = 1
将 table 加入符合您条件的查询:
select distinct t.*
from (
select val, val2
from table1
group by val, val2
having count(*) > 1
) t1
inner join (
select val, val2, val3
from table1
group by val, val2, val3
having count(*) = 1
) t2
on t2.val = t1.val and t2.val2 = t1.val2
inner join (
select val, val2, val3
from table1
group by val, val2, val3
having count(*) = 1
) t3
on t3.val = t1.val and t3.val2 = t1.val2 and t3.val3 <> t2.val3
inner join table1 t on t2.val = t.val and t2.val2 = t.val2 and t.val3 in (t2.val3, t3.val3)
正如@GMB 在他的回答中以相当简单的方式告诉的那样,您需要 (val, val2)
元组不唯一且 (val, val2, val3)
是唯一的行。
下面的查询应该很容易完成:
select t.*
from table1 t
inner join
(
select t1.val, t1.val2
from table1 t1
inner join
(select val,val2,val3
from table1
group by val,val2,val3
having count(val3) = 1
) t2
on t1.val = t2.val and t1.val2 = t2.val2 and t1.val3 = t2.val3
group by t1.val, t1.val2
having count(distinct t1.id) > 1
) tmp
on tmp.val = t.val and tmp.val2 = t.val2
inner join
(select val,val2,val3
from table1
group by val,val2,val3
having count(val3) = 1
) t3
on t.val = t3.val and t.val2 = t3.val2 and t.val3 = t3.val3