为什么 SQL HAVING 条件下的 COUNT 不能计算出正确的值?
Why COUNT in the SQL HAVING condition doesn't calculate correct value?
当我解决这个问题时HackerRank problem
我发现我的解决方案符合条件
COUNT(total) = 1
无效:
SELECT H.hacker_id AS hacker_id, H.name AS name, COUNT(C.challenge_id) AS total
FROM Hackers H
JOIN Challenges C ON H.hacker_id = C.hacker_id
GROUP BY H.hacker_id, H.name
HAVING
COUNT(total) = 1 OR -- HERE
OR
total = (
SELECT MAX(amount) FROM (
SELECT COUNT(challenge_id) AS amount FROM Challenges
GROUP BY hacker_id
) t
)
ORDER BY total DESC, hacker_id;
输出:
5120 Julia 50
18425 Anna 50
20023 Brian 50
33625 Jason 50
41805 Benjamin 50
52462 Nicholas 50
64036 Craig 50
69471 Michelle 50
77173 Mildred 50
94278 Dennis 50
96009 Russell 50
96716 Emily 50
2689 Joe 1
3432 Linda 1
5827 Kelly 1
6011 Robin 1
7537 David 1
7961 Michelle 1
9642 Joseph 1
9901 Lawrence 1
10975 Christine 1
11715 Louise 1
13075 John 1
13905 Evelyn 1
14307 David 1
14726 Emily 1
15109 Dorothy 1
17282 Norma 1
17311 Andrew 1
17663 Benjamin 1
17846 Alan 1
18709 James 1
19032 Andrew 1
19781 Jesse 1
19962 Patricia 1
20190 Aaron 1
21350 Bobby 1
21821 Carol 1
21916 Clarence 1
22396 Wayne 1
22455 Carolyn 1
23812 Jerry 1
24047 Elizabeth 1
25684 Alan 1
25990 Mildred 1
26802 Paul 1
27797 Helen 1
28766 Paul 1
29242 Jennifer 1
29841 Charles 1
30677 Keith 1
30778 Jose 1
30908 Helen 1
31770 Ashley 1
32364 Julia 1
32735 Cheryl 1
33273 Sara 1
33489 Denise 1
37092 Jennifer 1
37764 Jimmy 1
38540 Katherine 1
42467 Ernest 1
43240 Diana 1
43398 Steve 1
43595 Adam 1
45685 Bobby 1
46144 Sharon 1
46468 Timothy 1
46604 Christina 1
47156 Kelly 1
47921 Lillian 1
50560 Brian 1
52350 Teresa 1
53451 Jeffrey 1
53597 Rose 1
53768 Douglas 1
54015 Carolyn 1
54510 Paula 1
55415 Amy 1
56039 Teresa 1
56103 Kelly 1
56338 Jose 1
57195 Beverly 1
57873 Diana 1
58086 Debra 1
58167 David 1
58543 Rachel 1
59871 Martin 1
59895 Martha 1
60177 Brian 1
61093 Mark 1
61102 Kenneth 1
61206 Lillian 1
61769 Marie 1
63263 Dorothy 1
63684 Randy 1
63730 Sarah 1
63803 Carolyn 1
63961 Anna 1
64341 Virginia 1
64882 Roy 1
68178 Gloria 1
70499 Dennis 1
72321 Julie 1
72763 Julie 1
73267 Jeremy 1
73676 Linda 1
74320 Pamela 1
78615 Kathryn 1
79612 Tina 1
81652 Albert 1
83308 Roy 1
84739 Alan 1
84938 Judy 1
85094 Matthew 1
86142 Douglas 1
87040 Craig 1
87885 Gregory 1
88069 Jean 1
88083 Anna 1
88084 Alan 1
88858 Bruce 1
89514 Jeffrey 1
89903 Katherine 1
90276 Joyce 1
90369 Christina 1
91620 Debra 1
92239 Shirley 1
92920 Louis 1
94337 Lillian 1
94676 Patrick 1
94746 Adam 1
96521 Christine 1
96773 Angela 1
97338 Amy 1
98785 Rose 1
99101 Timothy 1
99165 Nancy 1
但这行得通:
SELECT H.hacker_id AS hacker_id, H.name AS name, COUNT(C.challenge_id) AS total
FROM Hackers H
JOIN Challenges C ON H.hacker_id = C.hacker_id
GROUP BY H.hacker_id, H.name
HAVING
--
total IN
(SELECT t0.total
FROM
(SELECT count(*) AS total
FROM challenges
GROUP BY hacker_id) t0
GROUP BY t0.total
HAVING count(t0.total) = 1)
-- instead of 'COUNT(total) = 1'
OR
total = (
SELECT MAX(amount) FROM (
SELECT COUNT(challenge_id) AS amount FROM Challenges
GROUP BY hacker_id
) t
)
ORDER BY total DESC, hacker_id;
输出:
5120 Julia 50
18425 Anna 50
20023 Brian 50
33625 Jason 50
41805 Benjamin 50
52462 Nicholas 50
64036 Craig 50
69471 Michelle 50
77173 Mildred 50
94278 Dennis 50
96009 Russell 50
96716 Emily 50
72866 Eugene 42
37068 Patrick 41
12766 Jacqueline 40
86280 Beverly 37
19835 Joyce 36
38316 Walter 35
29483 Jeffrey 34
23428 Arthur 33
95437 George 32
46963 Barbara 31
87524 Norma 30
84085 Johnny 29
39582 Maria 28
65843 Thomas 27
5443 Paul 26
52965 Bobby 25
77105 Diana 24
33787 Susan 23
45855 Clarence 22
33177 Jane 21
7302 Victor 20
54461 Janet 19
42277 Sara 18
99388 Mary 16
31426 Carlos 15
95010 Victor 14
27071 Gerald 10
90267 Edward 9
72609 Bobby 8
那么,为什么我不能只使用这个
COUNT(total) = 1
而不是这个大条件:
total IN
(SELECT t0.total
FROM
(SELECT count(*) AS total
FROM challenges
GROUP BY hacker_id) t0
GROUP BY t0.total
HAVING count(t0.total) = 1)
因为在您的上下文中 total 只是聚合结果的别名,而不是值。一般来说,您必须重复 COUNT(C.challenge_id)
部分,但这对您没有帮助,因为 COUNT(COUNT(C.challenge_id)) = 1
显然是错误的。
有一些解决方案,例如将聚合数据集结果放入临时 table/table 变量或使用 CTE,这就是子查询起作用的原因。
免责声明:不要检查您的查询是否正确,但 IN 听起来很难看,使用 CTE 可能是更好的方法。
下面是一个使用 MS SQL 的示例解决方案,抱歉,现在不要 MySql 运行 nut 不要使用 CTE =)
create table dbo.HACKER
(
hacker_id int,
name varchar(100)
)
GO
create table dbo.CHALLENGE
(
challenge_id int,
hacker_id int
)
GO
insert into dbo.HACKER
(hacker_id,name)
values
(5077,'Rose')
,(21283,'Angela')
,(62743,'Frank')
,(88255,'Patrick')
,(96196,'Lisa')
insert into dbo.CHALLENGE
(challenge_id, hacker_id)
values
(61654,5077)
,(58302,21283)
GO
--drop table #Temp
--drop table #totalsToExclude
select hk.hacker_id, hk.name, x.total
into #Temp
from dbo.HACKER hk
join (select ch.hacker_id, count(*) as total from dbo.CHALLENGE ch group by ch.hacker_id) as x
on x.hacker_id = hk.hacker_id
select * from #Temp
declare @maxTotal as int =(select max(total) from #temp)
select @maxTotal
select t.total, count(*) as [Count_total]
into #totalsToExclude
from #temp t
group by t.total
having(count(*) >1)
delete tx from #totalsToExclude tx where tx.total = @maxTotal
select * from #totalsToExclude
select * from #Temp t
where t.total not in (select t.total from #totalsToExclude)
order by t.total desc, t.hacker_id
当我解决这个问题时HackerRank problem
我发现我的解决方案符合条件
COUNT(total) = 1
无效:
SELECT H.hacker_id AS hacker_id, H.name AS name, COUNT(C.challenge_id) AS total
FROM Hackers H
JOIN Challenges C ON H.hacker_id = C.hacker_id
GROUP BY H.hacker_id, H.name
HAVING
COUNT(total) = 1 OR -- HERE
OR
total = (
SELECT MAX(amount) FROM (
SELECT COUNT(challenge_id) AS amount FROM Challenges
GROUP BY hacker_id
) t
)
ORDER BY total DESC, hacker_id;
输出:
5120 Julia 50
18425 Anna 50
20023 Brian 50
33625 Jason 50
41805 Benjamin 50
52462 Nicholas 50
64036 Craig 50
69471 Michelle 50
77173 Mildred 50
94278 Dennis 50
96009 Russell 50
96716 Emily 50
2689 Joe 1
3432 Linda 1
5827 Kelly 1
6011 Robin 1
7537 David 1
7961 Michelle 1
9642 Joseph 1
9901 Lawrence 1
10975 Christine 1
11715 Louise 1
13075 John 1
13905 Evelyn 1
14307 David 1
14726 Emily 1
15109 Dorothy 1
17282 Norma 1
17311 Andrew 1
17663 Benjamin 1
17846 Alan 1
18709 James 1
19032 Andrew 1
19781 Jesse 1
19962 Patricia 1
20190 Aaron 1
21350 Bobby 1
21821 Carol 1
21916 Clarence 1
22396 Wayne 1
22455 Carolyn 1
23812 Jerry 1
24047 Elizabeth 1
25684 Alan 1
25990 Mildred 1
26802 Paul 1
27797 Helen 1
28766 Paul 1
29242 Jennifer 1
29841 Charles 1
30677 Keith 1
30778 Jose 1
30908 Helen 1
31770 Ashley 1
32364 Julia 1
32735 Cheryl 1
33273 Sara 1
33489 Denise 1
37092 Jennifer 1
37764 Jimmy 1
38540 Katherine 1
42467 Ernest 1
43240 Diana 1
43398 Steve 1
43595 Adam 1
45685 Bobby 1
46144 Sharon 1
46468 Timothy 1
46604 Christina 1
47156 Kelly 1
47921 Lillian 1
50560 Brian 1
52350 Teresa 1
53451 Jeffrey 1
53597 Rose 1
53768 Douglas 1
54015 Carolyn 1
54510 Paula 1
55415 Amy 1
56039 Teresa 1
56103 Kelly 1
56338 Jose 1
57195 Beverly 1
57873 Diana 1
58086 Debra 1
58167 David 1
58543 Rachel 1
59871 Martin 1
59895 Martha 1
60177 Brian 1
61093 Mark 1
61102 Kenneth 1
61206 Lillian 1
61769 Marie 1
63263 Dorothy 1
63684 Randy 1
63730 Sarah 1
63803 Carolyn 1
63961 Anna 1
64341 Virginia 1
64882 Roy 1
68178 Gloria 1
70499 Dennis 1
72321 Julie 1
72763 Julie 1
73267 Jeremy 1
73676 Linda 1
74320 Pamela 1
78615 Kathryn 1
79612 Tina 1
81652 Albert 1
83308 Roy 1
84739 Alan 1
84938 Judy 1
85094 Matthew 1
86142 Douglas 1
87040 Craig 1
87885 Gregory 1
88069 Jean 1
88083 Anna 1
88084 Alan 1
88858 Bruce 1
89514 Jeffrey 1
89903 Katherine 1
90276 Joyce 1
90369 Christina 1
91620 Debra 1
92239 Shirley 1
92920 Louis 1
94337 Lillian 1
94676 Patrick 1
94746 Adam 1
96521 Christine 1
96773 Angela 1
97338 Amy 1
98785 Rose 1
99101 Timothy 1
99165 Nancy 1
但这行得通:
SELECT H.hacker_id AS hacker_id, H.name AS name, COUNT(C.challenge_id) AS total
FROM Hackers H
JOIN Challenges C ON H.hacker_id = C.hacker_id
GROUP BY H.hacker_id, H.name
HAVING
--
total IN
(SELECT t0.total
FROM
(SELECT count(*) AS total
FROM challenges
GROUP BY hacker_id) t0
GROUP BY t0.total
HAVING count(t0.total) = 1)
-- instead of 'COUNT(total) = 1'
OR
total = (
SELECT MAX(amount) FROM (
SELECT COUNT(challenge_id) AS amount FROM Challenges
GROUP BY hacker_id
) t
)
ORDER BY total DESC, hacker_id;
输出:
5120 Julia 50
18425 Anna 50
20023 Brian 50
33625 Jason 50
41805 Benjamin 50
52462 Nicholas 50
64036 Craig 50
69471 Michelle 50
77173 Mildred 50
94278 Dennis 50
96009 Russell 50
96716 Emily 50
72866 Eugene 42
37068 Patrick 41
12766 Jacqueline 40
86280 Beverly 37
19835 Joyce 36
38316 Walter 35
29483 Jeffrey 34
23428 Arthur 33
95437 George 32
46963 Barbara 31
87524 Norma 30
84085 Johnny 29
39582 Maria 28
65843 Thomas 27
5443 Paul 26
52965 Bobby 25
77105 Diana 24
33787 Susan 23
45855 Clarence 22
33177 Jane 21
7302 Victor 20
54461 Janet 19
42277 Sara 18
99388 Mary 16
31426 Carlos 15
95010 Victor 14
27071 Gerald 10
90267 Edward 9
72609 Bobby 8
那么,为什么我不能只使用这个
COUNT(total) = 1
而不是这个大条件:
total IN
(SELECT t0.total
FROM
(SELECT count(*) AS total
FROM challenges
GROUP BY hacker_id) t0
GROUP BY t0.total
HAVING count(t0.total) = 1)
因为在您的上下文中 total 只是聚合结果的别名,而不是值。一般来说,您必须重复 COUNT(C.challenge_id)
部分,但这对您没有帮助,因为 COUNT(COUNT(C.challenge_id)) = 1
显然是错误的。
有一些解决方案,例如将聚合数据集结果放入临时 table/table 变量或使用 CTE,这就是子查询起作用的原因。
免责声明:不要检查您的查询是否正确,但 IN 听起来很难看,使用 CTE 可能是更好的方法。
下面是一个使用 MS SQL 的示例解决方案,抱歉,现在不要 MySql 运行 nut 不要使用 CTE =)
create table dbo.HACKER
(
hacker_id int,
name varchar(100)
)
GO
create table dbo.CHALLENGE
(
challenge_id int,
hacker_id int
)
GO
insert into dbo.HACKER
(hacker_id,name)
values
(5077,'Rose')
,(21283,'Angela')
,(62743,'Frank')
,(88255,'Patrick')
,(96196,'Lisa')
insert into dbo.CHALLENGE
(challenge_id, hacker_id)
values
(61654,5077)
,(58302,21283)
GO
--drop table #Temp
--drop table #totalsToExclude
select hk.hacker_id, hk.name, x.total
into #Temp
from dbo.HACKER hk
join (select ch.hacker_id, count(*) as total from dbo.CHALLENGE ch group by ch.hacker_id) as x
on x.hacker_id = hk.hacker_id
select * from #Temp
declare @maxTotal as int =(select max(total) from #temp)
select @maxTotal
select t.total, count(*) as [Count_total]
into #totalsToExclude
from #temp t
group by t.total
having(count(*) >1)
delete tx from #totalsToExclude tx where tx.total = @maxTotal
select * from #totalsToExclude
select * from #Temp t
where t.total not in (select t.total from #totalsToExclude)
order by t.total desc, t.hacker_id