条件聚合查询
Conditional aggregate queries
我见过其他解决方案。好奇这种解决这些问题的方法是否有效。
表格:
attendance_events : 日期 | student_id |出席人数
all_students : student_id | school_id | grade_level | date_of_birth |家乡
百分之几的学生在生日那天上学?
With agg_join as (SELECT att.date as dates, att.attendance as attendance, als.date_of_birth as DOB, att.student_id as student_id
FROM attendance_events att
join all_students als on att.student_id = als.studentid)
Select count(DISTINCT student_id) as total_students,
count( Distinct case when DOB = dates and attendance = TRUE) as count_of_DOBS,
total_students/ count_of_DOBS as percent_of_student
from agg_join
从昨天到今天,哪个年级的出勤率下降幅度最大?
With agg_join as ( SELECT att.date as dates, att.attendance as attendance, als.grade_level as grade
FROM attendance_events att
join all_students als on att.student_id = als.studentid)
Select grade,
case when dates ( 'd', -1, currentdate) and attendance = True then 1
else 0 end as yesterday_att,
case when dates ( 'd', currentdate) and attendance = True then 1
else 0 end as Today_att,
(Today_att - yesterday_att) * -1 AS DIFF
from agg_join
Group by grade
Order by DIFF DESC
Limit 1
百分之几的学生在生日那天上学?
SELECT 100.0
* count(*) FILTER (WHERE EXISTS (SELECT FROM attendance_events a
WHERE a.student_id = s.student_id
AND f_mmdd(a.date) = f_mmdd(s.date_of_birth)
))
/ count(*) AS percentage
FROM all_students s;
这里定义自定义函数f_mmdd()
的地方:
- How do you do date math that ignores the year?
参见:
- Convert numeric result to percentage with decimal digits
关于聚合FILTER
条款:
- Aggregate columns with additional (distinct) filters
从昨天到今天,哪个年级的出勤率下降幅度最大?
SELECT s.grade_level
, count(*) FILTER (WHERE a.date = CURRENT_DATE - 1) AS yesterday_attendance
, count(*) FILTER (WHERE a.date = CURRENT_DATE) AS today_attendance
FROM attendance_events a
JOIN all_students s USING (student_id)
WHERE a.date IN (CURRENT_DATE, CURRENT_DATE -1) -- logically redundant
GROUP BY s.grade_level
ORDER BY today_attendance - yesterday_attendance
-- short for: yesterday_attendance - today_attendance DESC
LIMIT 1;
WHERE a.date IN (CURRENT_DATE, CURRENT_DATE -1)
在逻辑上是多余的,但查询速度更快。
阅读并尝试理解为什么这些工作,如果你不是自己解决的话。
我见过其他解决方案。好奇这种解决这些问题的方法是否有效。
表格:
attendance_events : 日期 | student_id |出席人数
all_students : student_id | school_id | grade_level | date_of_birth |家乡
百分之几的学生在生日那天上学?
With agg_join as (SELECT att.date as dates, att.attendance as attendance, als.date_of_birth as DOB, att.student_id as student_id
FROM attendance_events att
join all_students als on att.student_id = als.studentid)
Select count(DISTINCT student_id) as total_students,
count( Distinct case when DOB = dates and attendance = TRUE) as count_of_DOBS,
total_students/ count_of_DOBS as percent_of_student
from agg_join
从昨天到今天,哪个年级的出勤率下降幅度最大?
With agg_join as ( SELECT att.date as dates, att.attendance as attendance, als.grade_level as grade
FROM attendance_events att
join all_students als on att.student_id = als.studentid)
Select grade,
case when dates ( 'd', -1, currentdate) and attendance = True then 1
else 0 end as yesterday_att,
case when dates ( 'd', currentdate) and attendance = True then 1
else 0 end as Today_att,
(Today_att - yesterday_att) * -1 AS DIFF
from agg_join
Group by grade
Order by DIFF DESC
Limit 1
百分之几的学生在生日那天上学?
SELECT 100.0
* count(*) FILTER (WHERE EXISTS (SELECT FROM attendance_events a
WHERE a.student_id = s.student_id
AND f_mmdd(a.date) = f_mmdd(s.date_of_birth)
))
/ count(*) AS percentage
FROM all_students s;
这里定义自定义函数f_mmdd()
的地方:
- How do you do date math that ignores the year?
参见:
- Convert numeric result to percentage with decimal digits
关于聚合FILTER
条款:
- Aggregate columns with additional (distinct) filters
从昨天到今天,哪个年级的出勤率下降幅度最大?
SELECT s.grade_level
, count(*) FILTER (WHERE a.date = CURRENT_DATE - 1) AS yesterday_attendance
, count(*) FILTER (WHERE a.date = CURRENT_DATE) AS today_attendance
FROM attendance_events a
JOIN all_students s USING (student_id)
WHERE a.date IN (CURRENT_DATE, CURRENT_DATE -1) -- logically redundant
GROUP BY s.grade_level
ORDER BY today_attendance - yesterday_attendance
-- short for: yesterday_attendance - today_attendance DESC
LIMIT 1;
WHERE a.date IN (CURRENT_DATE, CURRENT_DATE -1)
在逻辑上是多余的,但查询速度更快。
阅读并尝试理解为什么这些工作,如果你不是自己解决的话。