是否有一个 sql 查询来计算特定年份的人数,知道每个人的出生日期和死亡日期?
Is there an sql query to count the number of people in a particular year, knowing the date of birth and the date of death of each person?
我有一个 table 显示姓名、出生日期和死亡日期 (1900-2000)。我需要知道某个时期每年的人口数量,例如,1940年人口为23亿,1941年为24亿,1942年为22亿,以此类推,直到1950年。
我在 SAS Enterprise Guide 工作,代码看起来可能与正常代码略有不同 sql。至少我想看到这样的东西:
~
人数 |年
2.300.000.000 |1940
2.400.000.000 |1941
.....................
select
count(name),
from db
where bd<1jan1940 and dd>=1jan1940 and dd=<31dec1940
group by month
首先你要知道1899年底的初始人口,假设是20亿。然后加上每年的出生人数减去死亡人数。 (您必须访问 table 两次才能执行此操作,一次用于出生,一次用于死亡。)使用 SUM OVER
获得 运行 总数。
我不确定您实际使用的是哪个 DBMS,但这是非常标准的 SQL:
select yr, 2000000000 + sum(births.cnt - deaths.cnt) over (order by yr)
from
(
select extract(year from bd) as yr, count(*) as cnt
from db
group by extract(year from bd)
) births
join
(
select extract(year from dd) as yr, count(*) as cnt
from db
group by extract(year from dd)
) deaths using (yr)
order by yr;
data dob_data;
do i = 1 to 10000;
num = ceil(rand('UNIFORM',0,10));
dob = intnx('day','01JAN1899'd,ceil(rand('UNIFORM',1,36865)));
select (num);
when (1) dod = intnx('day',dob,ceil(rand('UNIFORM',1,36865)));
otherwise dod = .;
end;
output;
end;
format dob dod date9.;
drop num;
run;
data calendar;
do i=0 to 100;
year = 1900+i;
soy = intnx('year','01JAN1900'd,i,'s');
eoy = intnx('year','01JAN1900'd,i,'e');
output;
end;
format soy eoy date9.;
run;
proc sql;
create table pop as
select year,
sum(case when DOB < soy and coalesce(DOD,'31DEC2200'd) ge soy then 1 else 0 end) as Alive_At_Start,
sum(case when DOB between soy and eoy then 1 else 0 end) as Born_During,
sum(case when coalesce(DOD,'31DEC2200'd) between soy and eoy then -1 else 0 end) as Passed,
sum(case when DOB le eoy and coalesce(DOD,'31DEC2200'd) > eoy then 1 else 0 end) as Alive_At_End
from dob_data t1, calendar t2
group by year;
quit;
我有一个 table 显示姓名、出生日期和死亡日期 (1900-2000)。我需要知道某个时期每年的人口数量,例如,1940年人口为23亿,1941年为24亿,1942年为22亿,以此类推,直到1950年。
我在 SAS Enterprise Guide 工作,代码看起来可能与正常代码略有不同 sql。至少我想看到这样的东西:
~ 人数 |年
2.300.000.000 |1940 2.400.000.000 |1941 .....................
select
count(name),
from db
where bd<1jan1940 and dd>=1jan1940 and dd=<31dec1940
group by month
首先你要知道1899年底的初始人口,假设是20亿。然后加上每年的出生人数减去死亡人数。 (您必须访问 table 两次才能执行此操作,一次用于出生,一次用于死亡。)使用 SUM OVER
获得 运行 总数。
我不确定您实际使用的是哪个 DBMS,但这是非常标准的 SQL:
select yr, 2000000000 + sum(births.cnt - deaths.cnt) over (order by yr)
from
(
select extract(year from bd) as yr, count(*) as cnt
from db
group by extract(year from bd)
) births
join
(
select extract(year from dd) as yr, count(*) as cnt
from db
group by extract(year from dd)
) deaths using (yr)
order by yr;
data dob_data;
do i = 1 to 10000;
num = ceil(rand('UNIFORM',0,10));
dob = intnx('day','01JAN1899'd,ceil(rand('UNIFORM',1,36865)));
select (num);
when (1) dod = intnx('day',dob,ceil(rand('UNIFORM',1,36865)));
otherwise dod = .;
end;
output;
end;
format dob dod date9.;
drop num;
run;
data calendar;
do i=0 to 100;
year = 1900+i;
soy = intnx('year','01JAN1900'd,i,'s');
eoy = intnx('year','01JAN1900'd,i,'e');
output;
end;
format soy eoy date9.;
run;
proc sql;
create table pop as
select year,
sum(case when DOB < soy and coalesce(DOD,'31DEC2200'd) ge soy then 1 else 0 end) as Alive_At_Start,
sum(case when DOB between soy and eoy then 1 else 0 end) as Born_During,
sum(case when coalesce(DOD,'31DEC2200'd) between soy and eoy then -1 else 0 end) as Passed,
sum(case when DOB le eoy and coalesce(DOD,'31DEC2200'd) > eoy then 1 else 0 end) as Alive_At_End
from dob_data t1, calendar t2
group by year;
quit;