Postgres SQL 查询按性别分组。如何显示行不存在的值?
Postgres SQL query grouped by gender. How to show values where a row does not exist?
我有一个查询计算每天发送的邮件总数,并按性别分组。
如果某天没有消息发送给男性,则不会有男性行。
在这种情况下,如何为男性插入计数值 0?
查询:
SELECT d.date dated, count(se.id), gender
FROM (
select to_char(date_trunc('day', (current_date - offs)), 'YYYY-MM-DD') AS date
FROM generate_series(0, 365, 1) AS offs
) d
LEFT OUTER JOIN (
SELECT id, customer_id, client_report.insert_time, profile.gender, profile.house_income, profile.address AS postcode, profile.age AS age_group, profile.is_employed, profile.is_married, profile.no_children, profile.no_cars, profile.shopping_frequency
FROM common.client_report
JOIN common.profile
ON client_report.profile_id = profile.uuid
WHERE sms_status = 'SUCCESS'
) se
ON (d.date=to_char(date_trunc('day', se.insert_time), 'YYYY-MM-DD'))
WHERE customer_id = 25::int AND d.date::date BETWEEN '2017-07-03'::date AND '2017-08-01'::date
GROUP BY d.date, gender
ORDER BY d.date ASC
我希望结果一致,比如:
day, count, gender
2017-07-01, 10, Female
2017-07-01, 5, Male
2017-07-02, 0, Female
2017-07-02, 8, Male
您应该 cross join
日期列表和性别列表。
类似于:
SELECT d.date dated, count(se.id), d.gender
FROM (
select to_char(date_trunc('day', (current_date - offs)), 'YYYY-MM-DD') AS date, gender
FROM generate_series(0, 365, 1) AS offs
CROSS JOIN (VALUES('Male'), ('Female')) AS genders(gender)
) d
LEFT OUTER JOIN (
SELECT id, customer_id, client_report.insert_time, profile.gender, profile.house_income, profile.address AS postcode, profile.age AS age_group, profile.is_employed, profile.is_married, profile.no_children, profile.no_cars, profile.shopping_frequency
FROM common.client_report
JOIN common.profile
ON client_report.profile_id = profile.uuid
WHERE sms_status = 'SUCCESS'
) se
ON (d.date=to_char(date_trunc('day', se.insert_time), 'YYYY-MM-DD')) AND d.gender = se.gender
WHERE customer_id = 25::int AND d.date::date BETWEEN '2017-07-03'::date AND '2017-08-01'::date
GROUP BY d.date, d.gender
ORDER BY d.date ASC
为了简单起见,我们假设您使用 genders
:
创建了一个 table
CREATE TABLE genders
(
gender text primary key
) ;
INSERT INTO genders (gender)
VALUES
('MALE'),
('FEMALE') ;
如果您想要所有 dates/genders 的答案,您需要 cross join
所有可能的日期和性别,然后 LEFT JOIN
使用实际回答您问题的数据:
SELECT
calendar.cal_date AS date, coalesce(gender_count, 0) AS gender_count, genders.gender
FROM
-- Virtual table with all dates/genders
genders
CROSS JOIN
(SELECT
generate_series(/*now()*/ date '2017-07-10' - interval '7 days' /* or '1 year' */,
/*now()*/ date '2017-07-10',
interval '1 day')::date AS cal_date
) AS calendar
-- The table with the actual data
LEFT JOIN
(
SELECT
date, gender, count(gender) AS gender_count
FROM
client_report
JOIN profile ON profile.uuid = client_report.profile_id
WHERE
-- conditions to actually select data
customer_id = 25
AND sms_status = 'SUCCESS'
GROUP BY
date, gender
) AS counts
ON counts.date = calendar.cal_date AND counts.gender = genders.gender
ORDER BY
calendar.cal_date, genders.gender ;
您会得到如下答案:
date | gender_count | gender
:--------- | -----------: | :-----
2017-07-03 | 1 | FEMALE
2017-07-03 | 2 | MALE
2017-07-04 | 2 | FEMALE
2017-07-04 | 0 | MALE
2017-07-05 | 0 | FEMALE
2017-07-05 | 0 | MALE
2017-07-06 | 0 | FEMALE
2017-07-06 | 2 | MALE
2017-07-07 | 2 | FEMALE
2017-07-07 | 0 | MALE
2017-07-08 | 0 | FEMALE
2017-07-08 | 1 | MALE
2017-07-09 | 0 | FEMALE
2017-07-09 | 0 | MALE
2017-07-10 | 0 | FEMALE
2017-07-10 | 0 | MALE
请注意,我没有用一整年,而是只用了 8 天。
您可以在 dbfiddle here
查看带有示例数据的完整示例
注意:我忽略了insert_date,只关注日期,因为我不知道每个都有哪个确切的含义。我认为你的观点是如何使用交叉连接,其余的与场景无关。
我有一个查询计算每天发送的邮件总数,并按性别分组。
如果某天没有消息发送给男性,则不会有男性行。
在这种情况下,如何为男性插入计数值 0?
查询:
SELECT d.date dated, count(se.id), gender
FROM (
select to_char(date_trunc('day', (current_date - offs)), 'YYYY-MM-DD') AS date
FROM generate_series(0, 365, 1) AS offs
) d
LEFT OUTER JOIN (
SELECT id, customer_id, client_report.insert_time, profile.gender, profile.house_income, profile.address AS postcode, profile.age AS age_group, profile.is_employed, profile.is_married, profile.no_children, profile.no_cars, profile.shopping_frequency
FROM common.client_report
JOIN common.profile
ON client_report.profile_id = profile.uuid
WHERE sms_status = 'SUCCESS'
) se
ON (d.date=to_char(date_trunc('day', se.insert_time), 'YYYY-MM-DD'))
WHERE customer_id = 25::int AND d.date::date BETWEEN '2017-07-03'::date AND '2017-08-01'::date
GROUP BY d.date, gender
ORDER BY d.date ASC
我希望结果一致,比如:
day, count, gender
2017-07-01, 10, Female
2017-07-01, 5, Male
2017-07-02, 0, Female
2017-07-02, 8, Male
您应该 cross join
日期列表和性别列表。
类似于:
SELECT d.date dated, count(se.id), d.gender
FROM (
select to_char(date_trunc('day', (current_date - offs)), 'YYYY-MM-DD') AS date, gender
FROM generate_series(0, 365, 1) AS offs
CROSS JOIN (VALUES('Male'), ('Female')) AS genders(gender)
) d
LEFT OUTER JOIN (
SELECT id, customer_id, client_report.insert_time, profile.gender, profile.house_income, profile.address AS postcode, profile.age AS age_group, profile.is_employed, profile.is_married, profile.no_children, profile.no_cars, profile.shopping_frequency
FROM common.client_report
JOIN common.profile
ON client_report.profile_id = profile.uuid
WHERE sms_status = 'SUCCESS'
) se
ON (d.date=to_char(date_trunc('day', se.insert_time), 'YYYY-MM-DD')) AND d.gender = se.gender
WHERE customer_id = 25::int AND d.date::date BETWEEN '2017-07-03'::date AND '2017-08-01'::date
GROUP BY d.date, d.gender
ORDER BY d.date ASC
为了简单起见,我们假设您使用 genders
:
CREATE TABLE genders
(
gender text primary key
) ;
INSERT INTO genders (gender)
VALUES
('MALE'),
('FEMALE') ;
如果您想要所有 dates/genders 的答案,您需要 cross join
所有可能的日期和性别,然后 LEFT JOIN
使用实际回答您问题的数据:
SELECT
calendar.cal_date AS date, coalesce(gender_count, 0) AS gender_count, genders.gender
FROM
-- Virtual table with all dates/genders
genders
CROSS JOIN
(SELECT
generate_series(/*now()*/ date '2017-07-10' - interval '7 days' /* or '1 year' */,
/*now()*/ date '2017-07-10',
interval '1 day')::date AS cal_date
) AS calendar
-- The table with the actual data
LEFT JOIN
(
SELECT
date, gender, count(gender) AS gender_count
FROM
client_report
JOIN profile ON profile.uuid = client_report.profile_id
WHERE
-- conditions to actually select data
customer_id = 25
AND sms_status = 'SUCCESS'
GROUP BY
date, gender
) AS counts
ON counts.date = calendar.cal_date AND counts.gender = genders.gender
ORDER BY
calendar.cal_date, genders.gender ;
您会得到如下答案:
date | gender_count | gender :--------- | -----------: | :----- 2017-07-03 | 1 | FEMALE 2017-07-03 | 2 | MALE 2017-07-04 | 2 | FEMALE 2017-07-04 | 0 | MALE 2017-07-05 | 0 | FEMALE 2017-07-05 | 0 | MALE 2017-07-06 | 0 | FEMALE 2017-07-06 | 2 | MALE 2017-07-07 | 2 | FEMALE 2017-07-07 | 0 | MALE 2017-07-08 | 0 | FEMALE 2017-07-08 | 1 | MALE 2017-07-09 | 0 | FEMALE 2017-07-09 | 0 | MALE 2017-07-10 | 0 | FEMALE 2017-07-10 | 0 | MALE
请注意,我没有用一整年,而是只用了 8 天。
您可以在 dbfiddle here
查看带有示例数据的完整示例注意:我忽略了insert_date,只关注日期,因为我不知道每个都有哪个确切的含义。我认为你的观点是如何使用交叉连接,其余的与场景无关。