旋转聚合:得到错误的结果
Pivot the aggregate: getting wrong result
Table 1:
CREATE TABLE Main (patient VARCHAR(5), surgery_date date);
INSERT INTO Main VALUES
('P01','2018-01-01'),
('P02','2019-01-02'),
('P03','2019-01-03'),
('P04','2020-07-07');
Table 2:
CREATE TABLE Additional (patient VARCHAR(5), record_date date, bpm integer);
INSERT INTO Additional VALUES
('P01','2018-01-01',60),
('P01','2018-02-01',49),
('P01','2018-02-10',52),
('P01','2018-03-10',57),
('P01','2018-04-10',57),
('P02','2019-01-02',46),
('P02','2019-02-02',55),
('P02','2019-03-02',55),
('P03','2019-01-03',60),
('P03','2019-02-03',49),
('P03','2019-03-03',49),
('P04','2021-07-07',71),
('P04','2021-08-07',49),
('P04','2021-09-07',49);
第一个 table 有手术日期,第二个 table 有 bpm 读数 post 手术......我想让 % 的患者有平均 bpm(日历上的平均值)个月)> 50 在第一个和第二个日历月 post 手术。 (需要忽略手术后第二个日历月的所有数据)
这是我的尝试:
SELECT
FORMAT(surgery_date,'yyyy-MM') as [Month],
count(*) as [New_Surgery],
(100*SUM(COALESCE(CASE WHEN b1.bpm>50 THEN 1 ELSE 0 END,0))) as [Month1_percentage],
(100*SUM(COALESCE(CASE WHEN b2.bpm>50 THEN 1 ELSE 0 END,0))) as [Month2_percentage]
FROM Main a
LEFT JOIN Additional b1 On b1.patient=a.patient and b1.record_date=a.surgery_date
LEFT JOIN Additional b2 On b2.patient=a.patient and b2.record_date=DATEADD(M,1,a.surgery_date)
GROUP BY FORMAT(surgery_date,'yyyy-MM')
但这并没有给我应有的效果。
我的期望是:
Month New_Surgery Month1_percentage Month2_percentage
2018-01 1 100 100
2019-01 2 50 50
2020-07 1 100 0
您的 JOIN 条件仅在恰好一个月或恰好 2 个月返回记录 months.I 已将其更改为包括第一个月的所有测试和第二个月的所有测试。此外,您忘记了将案例总和除以案例数。我添加了 /COUNT(*)。
小数据集意味着结果非常simple:0, 50 or 100%.
SELECT
FORMAT(surgery_date,'yyyy-MM') as [Month],
count(*) as [New_Surgery],
(100*SUM(COALESCE(CASE WHEN b1.bpm>50 THEN 1 ELSE 0 END,0)))/COUNT(*) as [Month1_percentage],
(100*SUM(COALESCE(CASE WHEN b2.bpm>50 THEN 1 ELSE 0 END,0)))/COUNT(*) as [Month2_percentage]
FROM Main a
LEFT JOIN Additional b1 On b1.patient=a.patient
and b1.record_date <= DATEADD(month, 1 , surgery_date)
LEFT JOIN Additional b2 On b2.patient=a.patient
and b2.record_date<=DATEADD(M,2,a.surgery_date)
and b2.record_date>DATEADD(month, 1 , surgery_date)
GROUP BY FORMAT(surgery_date,'yyyy-MM')
这给出:
Month | New_Surgery | Month1_percentage | Month2_percentage
:------ | ----------: | ----------------: | ----------------:
2018-01 | 2 | 50 | 100
2019-01 | 4 | 50 | 50
2020-07 | 1 | 0 | 0
我觉得我 over-complicated 通过使用几个 CTE 分成几个阶段,首先将行限制为仅需要的行,然后在计算之前旋转 bpm
列百分比,但它确实 return 您想要的结果。
您的 main table 对于这些结果来说是多余的,因为所有数据都存在于 Additional
table 中;如果这里省略了要检索的其他列,您当然可以加入 main table。
with d as (
select *,
First_Value(record_date) over(partition by patient order by record_date) Sdate,
Avg(bpm) over(partition by patient, Year(record_date), Month(record_date)) av
from Additional
), m as (
select patient, record_date, Format(sdate,'yyyy-MM') [Month],
case when Month(record_date)=Month(sdate) and av >= 50 then av end m1,
case when Month(record_date)>Month(sdate) and av >= 50 then av end m2
from d
where DateDiff(month, sdate, record_date) < 2
), p as (
select Min([Month])[Month],
Count(m1) * 1.0 / Count(*) * 100 M1Percent,
Count(m2) * 1.0 / Count(*) * 100 M2Percent,
Count(distinct patient) [New_Surgery]
from m
group by Year(record_date), Month(record_date)
)
select [month], Max([New_Surgery]) [New_Surgery],
Max(M1Percent) Month1_Percentage,
Max(M2Percent) Month2_Percentage
from p
group by [Month]
order by [Month]
Table 1:
CREATE TABLE Main (patient VARCHAR(5), surgery_date date);
INSERT INTO Main VALUES
('P01','2018-01-01'),
('P02','2019-01-02'),
('P03','2019-01-03'),
('P04','2020-07-07');
Table 2:
CREATE TABLE Additional (patient VARCHAR(5), record_date date, bpm integer);
INSERT INTO Additional VALUES
('P01','2018-01-01',60),
('P01','2018-02-01',49),
('P01','2018-02-10',52),
('P01','2018-03-10',57),
('P01','2018-04-10',57),
('P02','2019-01-02',46),
('P02','2019-02-02',55),
('P02','2019-03-02',55),
('P03','2019-01-03',60),
('P03','2019-02-03',49),
('P03','2019-03-03',49),
('P04','2021-07-07',71),
('P04','2021-08-07',49),
('P04','2021-09-07',49);
第一个 table 有手术日期,第二个 table 有 bpm 读数 post 手术......我想让 % 的患者有平均 bpm(日历上的平均值)个月)> 50 在第一个和第二个日历月 post 手术。 (需要忽略手术后第二个日历月的所有数据)
这是我的尝试:
SELECT
FORMAT(surgery_date,'yyyy-MM') as [Month],
count(*) as [New_Surgery],
(100*SUM(COALESCE(CASE WHEN b1.bpm>50 THEN 1 ELSE 0 END,0))) as [Month1_percentage],
(100*SUM(COALESCE(CASE WHEN b2.bpm>50 THEN 1 ELSE 0 END,0))) as [Month2_percentage]
FROM Main a
LEFT JOIN Additional b1 On b1.patient=a.patient and b1.record_date=a.surgery_date
LEFT JOIN Additional b2 On b2.patient=a.patient and b2.record_date=DATEADD(M,1,a.surgery_date)
GROUP BY FORMAT(surgery_date,'yyyy-MM')
但这并没有给我应有的效果。
我的期望是:
Month New_Surgery Month1_percentage Month2_percentage
2018-01 1 100 100
2019-01 2 50 50
2020-07 1 100 0
您的 JOIN 条件仅在恰好一个月或恰好 2 个月返回记录 months.I 已将其更改为包括第一个月的所有测试和第二个月的所有测试。此外,您忘记了将案例总和除以案例数。我添加了 /COUNT(*)。 小数据集意味着结果非常simple:0, 50 or 100%.
SELECT
FORMAT(surgery_date,'yyyy-MM') as [Month],
count(*) as [New_Surgery],
(100*SUM(COALESCE(CASE WHEN b1.bpm>50 THEN 1 ELSE 0 END,0)))/COUNT(*) as [Month1_percentage],
(100*SUM(COALESCE(CASE WHEN b2.bpm>50 THEN 1 ELSE 0 END,0)))/COUNT(*) as [Month2_percentage]
FROM Main a
LEFT JOIN Additional b1 On b1.patient=a.patient
and b1.record_date <= DATEADD(month, 1 , surgery_date)
LEFT JOIN Additional b2 On b2.patient=a.patient
and b2.record_date<=DATEADD(M,2,a.surgery_date)
and b2.record_date>DATEADD(month, 1 , surgery_date)
GROUP BY FORMAT(surgery_date,'yyyy-MM')
这给出:
Month | New_Surgery | Month1_percentage | Month2_percentage :------ | ----------: | ----------------: | ----------------: 2018-01 | 2 | 50 | 100 2019-01 | 4 | 50 | 50 2020-07 | 1 | 0 | 0
我觉得我 over-complicated 通过使用几个 CTE 分成几个阶段,首先将行限制为仅需要的行,然后在计算之前旋转 bpm
列百分比,但它确实 return 您想要的结果。
您的 main table 对于这些结果来说是多余的,因为所有数据都存在于 Additional
table 中;如果这里省略了要检索的其他列,您当然可以加入 main table。
with d as (
select *,
First_Value(record_date) over(partition by patient order by record_date) Sdate,
Avg(bpm) over(partition by patient, Year(record_date), Month(record_date)) av
from Additional
), m as (
select patient, record_date, Format(sdate,'yyyy-MM') [Month],
case when Month(record_date)=Month(sdate) and av >= 50 then av end m1,
case when Month(record_date)>Month(sdate) and av >= 50 then av end m2
from d
where DateDiff(month, sdate, record_date) < 2
), p as (
select Min([Month])[Month],
Count(m1) * 1.0 / Count(*) * 100 M1Percent,
Count(m2) * 1.0 / Count(*) * 100 M2Percent,
Count(distinct patient) [New_Surgery]
from m
group by Year(record_date), Month(record_date)
)
select [month], Max([New_Surgery]) [New_Surgery],
Max(M1Percent) Month1_Percentage,
Max(M2Percent) Month2_Percentage
from p
group by [Month]
order by [Month]