Select 行按具有最大聚合的列分组
Select rows grouped by a column having max aggregate
给定以下数据集,我如何找到最多 ApplicationID
有 "Accepted" 决定的参考电子邮件地址?
CREATE TABLE IF NOT EXISTS `EmailReferences` (
`ApplicationID` INT NOT NULL,
`Email` VARCHAR(45) NOT NULL,
PRIMARY KEY (`ApplicationID`, `Email`)
);
INSERT INTO EmailReferences (ApplicationID, Email)
VALUES
(1, 'ref10@test.org'), (1, 'ref11@test.org'), (1, 'ref12@test.org'),
(2, 'ref20@test.org'), (2, 'ref21@test.org'), (2, 'ref22@test.org'),
(3, 'ref11@test.org'), (3, 'ref31@test.org'), (3, 'ref32@test.org'),
(4, 'ref40@test.org'), (4, 'ref41@test.org'), (4, 'ref42@test.org'),
(5, 'ref50@test.org'), (5, 'ref51@test.org'), (5, 'ref52@test.org'),
(6, 'ref60@test.org'), (6, 'ref11@test.org'), (6, 'ref62@test.org'),
(7, 'ref70@test.org'), (7, 'ref71@test.org'), (7, 'ref72@test.org'),
(8, 'ref10@test.org'), (8, 'ref81@test.org'), (8, 'ref82@test.org')
;
CREATE TABLE IF NOT EXISTS `FinalDecision` (
`ApplicationID` INT NOT NULL,
`Decision` ENUM('Accepted', 'Denied') NOT NULL,
PRIMARY KEY (`ApplicationID`)
);
INSERT INTO FinalDecision (ApplicationID, Decision)
VALUES
(1, 'Accepted'), (2, 'Denied'),
(3, 'Accepted'), (4, 'Denied'),
(5, 'Denied'), (6, 'Denied'),
(7, 'Denied'), (8, 'Accepted')
;
Fiddle 相同:http://sqlfiddle.com/#!9/03bcf2/1
最初,我使用 LIMIT 1
和 ORDER BY CountDecision DESC
,像这样:
SELECT er.email, COUNT(fd.Decision) AS CountDecision
FROM EmailReferences AS er
JOIN FinalDecision AS fd ON er.ApplicationID = fd.ApplicationID
WHERE fd.Decision = 'Accepted'
GROUP BY er.email
ORDER BY CountDecision DESC
LIMIT 1
;
然而,我突然想到我可以有多个电子邮件地址,这些地址涉及不同的 "most accepted" 决定(即平局,可以这么说),并且这些将被过滤掉(是正确的措辞吗?)与 LIMIT
关键字。
然后我尝试了上述查询的变体,将 ORDER BY
和 LIMIT
行替换为:
HAVING MAX(CountDecision)
但我意识到这只是陈述的一半:MAX(CountDecision)
需要与某物进行比较。我只是不知道是什么。
任何指点将不胜感激。谢谢!
注意:这是作业。
更新: 明确地说,我正在尝试从 EmailReferences
中查找 Email
的值和计数。但是,我只想要具有 FinalDecision.Decision = 'Accepted'
的行(在匹配 ApplicantID
时)。根据我的数据,结果应该是:
Email | CountDecision
---------------+--------------
ref10@test.org | 2
ref11@test.org | 2
MySQL 仍然缺少 window 功能,但是当版本 8 准备好生产时,这会变得更容易。所以为了将来的参考,或者对于像 Mariadb 这样已经有 window 函数的数据库:
CREATE TABLE IF NOT EXISTS `EmailReferences` (
`ApplicationID` INT NOT NULL,
`Email` VARCHAR(45) NOT NULL,
PRIMARY KEY (`ApplicationID`, `Email`)
);
INSERT INTO EmailReferences (ApplicationID, Email)
VALUES
(1, 'ref10@test.org'), (1, 'ref11@test.org'), (1, 'ref12@test.org'),
(2, 'ref20@test.org'), (2, 'ref21@test.org'), (2, 'ref22@test.org'),
(3, 'ref30@test.org'), (3, 'ref31@test.org'), (3, 'ref32@test.org'),
(4, 'ref40@test.org'), (4, 'ref41@test.org'), (4, 'ref42@test.org'),
(5, 'ref50@test.org'), (5, 'ref51@test.org'), (5, 'ref52@test.org'),
(6, 'ref60@test.org'), (6, 'ref11@test.org'), (6, 'ref62@test.org'),
(7, 'ref70@test.org'), (7, 'ref71@test.org'), (7, 'ref72@test.org'),
(8, 'ref10@test.org'), (8, 'ref81@test.org'), (8, 'ref82@test.org')
;
CREATE TABLE IF NOT EXISTS `FinalDecision` (
`ApplicationID` INT NOT NULL,
`Decision` ENUM('Accepted', 'Denied') NOT NULL,
PRIMARY KEY (`ApplicationID`)
);
INSERT INTO FinalDecision (ApplicationID, Decision)
VALUES
(1, 'Accepted'), (2, 'Denied'),
(3, 'Accepted'), (4, 'Denied'),
(5, 'Denied'), (6, 'Denied'),
(7, 'Denied'), (8, 'Accepted')
;
select email, CountDecision
from (
SELECT er.email, COUNT(fd.Decision) AS CountDecision
, max(COUNT(fd.Decision)) over() maxCountDecision
FROM EmailReferences AS er
JOIN FinalDecision AS fd ON er.ApplicationID = fd.ApplicationID
WHERE fd.Decision = 'Accepted'
GROUP BY er.email
) d
where CountDecision = maxCountDecision
email | CountDecision
:------------- | ------------:
ref10@test.org | 2
dbfiddle here
例如...
SELECT a.*
FROM
( SELECT x.email
, COUNT(*) total
FROM emailreferences x
JOIN finaldecision y
ON y.applicationid = x.applicationid
WHERE y.decision = 'accepted'
GROUP
BY x.email
) a
JOIN
( SELECT COUNT(*) total
FROM emailreferences x
JOIN finaldecision y
ON y.applicationid = x.applicationid
WHERE y.decision = 'accepted'
GROUP
BY x.email
ORDER
BY total DESC
LIMIT 1
) b
ON b.total = a.total;
给定以下数据集,我如何找到最多 ApplicationID
有 "Accepted" 决定的参考电子邮件地址?
CREATE TABLE IF NOT EXISTS `EmailReferences` (
`ApplicationID` INT NOT NULL,
`Email` VARCHAR(45) NOT NULL,
PRIMARY KEY (`ApplicationID`, `Email`)
);
INSERT INTO EmailReferences (ApplicationID, Email)
VALUES
(1, 'ref10@test.org'), (1, 'ref11@test.org'), (1, 'ref12@test.org'),
(2, 'ref20@test.org'), (2, 'ref21@test.org'), (2, 'ref22@test.org'),
(3, 'ref11@test.org'), (3, 'ref31@test.org'), (3, 'ref32@test.org'),
(4, 'ref40@test.org'), (4, 'ref41@test.org'), (4, 'ref42@test.org'),
(5, 'ref50@test.org'), (5, 'ref51@test.org'), (5, 'ref52@test.org'),
(6, 'ref60@test.org'), (6, 'ref11@test.org'), (6, 'ref62@test.org'),
(7, 'ref70@test.org'), (7, 'ref71@test.org'), (7, 'ref72@test.org'),
(8, 'ref10@test.org'), (8, 'ref81@test.org'), (8, 'ref82@test.org')
;
CREATE TABLE IF NOT EXISTS `FinalDecision` (
`ApplicationID` INT NOT NULL,
`Decision` ENUM('Accepted', 'Denied') NOT NULL,
PRIMARY KEY (`ApplicationID`)
);
INSERT INTO FinalDecision (ApplicationID, Decision)
VALUES
(1, 'Accepted'), (2, 'Denied'),
(3, 'Accepted'), (4, 'Denied'),
(5, 'Denied'), (6, 'Denied'),
(7, 'Denied'), (8, 'Accepted')
;
Fiddle 相同:http://sqlfiddle.com/#!9/03bcf2/1
最初,我使用 LIMIT 1
和 ORDER BY CountDecision DESC
,像这样:
SELECT er.email, COUNT(fd.Decision) AS CountDecision
FROM EmailReferences AS er
JOIN FinalDecision AS fd ON er.ApplicationID = fd.ApplicationID
WHERE fd.Decision = 'Accepted'
GROUP BY er.email
ORDER BY CountDecision DESC
LIMIT 1
;
然而,我突然想到我可以有多个电子邮件地址,这些地址涉及不同的 "most accepted" 决定(即平局,可以这么说),并且这些将被过滤掉(是正确的措辞吗?)与 LIMIT
关键字。
然后我尝试了上述查询的变体,将 ORDER BY
和 LIMIT
行替换为:
HAVING MAX(CountDecision)
但我意识到这只是陈述的一半:MAX(CountDecision)
需要与某物进行比较。我只是不知道是什么。
任何指点将不胜感激。谢谢!
注意:这是作业。
更新: 明确地说,我正在尝试从 EmailReferences
中查找 Email
的值和计数。但是,我只想要具有 FinalDecision.Decision = 'Accepted'
的行(在匹配 ApplicantID
时)。根据我的数据,结果应该是:
Email | CountDecision
---------------+--------------
ref10@test.org | 2
ref11@test.org | 2
MySQL 仍然缺少 window 功能,但是当版本 8 准备好生产时,这会变得更容易。所以为了将来的参考,或者对于像 Mariadb 这样已经有 window 函数的数据库:
CREATE TABLE IF NOT EXISTS `EmailReferences` ( `ApplicationID` INT NOT NULL, `Email` VARCHAR(45) NOT NULL, PRIMARY KEY (`ApplicationID`, `Email`) );
INSERT INTO EmailReferences (ApplicationID, Email) VALUES (1, 'ref10@test.org'), (1, 'ref11@test.org'), (1, 'ref12@test.org'), (2, 'ref20@test.org'), (2, 'ref21@test.org'), (2, 'ref22@test.org'), (3, 'ref30@test.org'), (3, 'ref31@test.org'), (3, 'ref32@test.org'), (4, 'ref40@test.org'), (4, 'ref41@test.org'), (4, 'ref42@test.org'), (5, 'ref50@test.org'), (5, 'ref51@test.org'), (5, 'ref52@test.org'), (6, 'ref60@test.org'), (6, 'ref11@test.org'), (6, 'ref62@test.org'), (7, 'ref70@test.org'), (7, 'ref71@test.org'), (7, 'ref72@test.org'), (8, 'ref10@test.org'), (8, 'ref81@test.org'), (8, 'ref82@test.org') ;
CREATE TABLE IF NOT EXISTS `FinalDecision` ( `ApplicationID` INT NOT NULL, `Decision` ENUM('Accepted', 'Denied') NOT NULL, PRIMARY KEY (`ApplicationID`) );
INSERT INTO FinalDecision (ApplicationID, Decision) VALUES (1, 'Accepted'), (2, 'Denied'), (3, 'Accepted'), (4, 'Denied'), (5, 'Denied'), (6, 'Denied'), (7, 'Denied'), (8, 'Accepted') ;
select email, CountDecision from ( SELECT er.email, COUNT(fd.Decision) AS CountDecision , max(COUNT(fd.Decision)) over() maxCountDecision FROM EmailReferences AS er JOIN FinalDecision AS fd ON er.ApplicationID = fd.ApplicationID WHERE fd.Decision = 'Accepted' GROUP BY er.email ) d where CountDecision = maxCountDecision
email | CountDecision :------------- | ------------: ref10@test.org | 2
dbfiddle here
例如...
SELECT a.*
FROM
( SELECT x.email
, COUNT(*) total
FROM emailreferences x
JOIN finaldecision y
ON y.applicationid = x.applicationid
WHERE y.decision = 'accepted'
GROUP
BY x.email
) a
JOIN
( SELECT COUNT(*) total
FROM emailreferences x
JOIN finaldecision y
ON y.applicationid = x.applicationid
WHERE y.decision = 'accepted'
GROUP
BY x.email
ORDER
BY total DESC
LIMIT 1
) b
ON b.total = a.total;