在有条件的时间范围内滚动 ID 计数
Rolling count of IDs over time frame with condition
我有如下数据集:
data work.have;
input ID mydate date9. type;
format mydate date9.;
datalines;
1 11NOV2020 1
1 22OCT2020 1
1 24SEP2020 0
1 26SEP2019 1
2 13OCT2021 1
2 11SEP2020 1
;
run;
- 我想从 mydate 变量开始,按 ID 计算一个 ID 在过去 12 个月内出现的次数。
- 计数应从 0 开始。
- 如果在过去 12 个月内有任何观察,并且这些观察的类型 = 1,则计数应该只增加 1
我想要的结果应该是这样的:
ID mydate type Count
1 11NOV2020 1 1
1 22OCT2020 1 0
1 24SEP2020 0 1
1 26SEP2019 1 0
2 13OCT2021 1 0
2 11SEP2020 1 0
例如,第一行的 ID = 1 得到计数 = 1,因为它在过去 12 个月中只有一次观察,其中类型 = 1。
到目前为止,我已经尝试像下面那样将 table 合并到自身上,但它没有产生正确的结果。
proc sql;
create table work.want
as select
t1.ID
,t1.mydate
,max(count(t2.id)-1,0) as Count
from work.have as t1
left join
work.have as t2
on t1.id = t2.id and 0 <= intck('month', t2.mydate, t1.mydate) <= 12 and t2.Type = 1
group by 1,2
order by 1,2 desc;
Quit;
我很乐意在过程 sql 或数据步骤中得到答案。
提前致谢!
我的建议是这样的,相信一定能满足你的需求。
*sort from low to high date within ID;
proc sort data=work.have;
by ID mydate;
run;
*use "first." method to calc flags u want;
*Assuming no duplicate date rows per ID;
data outdset;
set work.have;
by ID;
retain count ;
array dates{100}; *However many dates you may have;
if first.ID then do;
indx = 1;
count = 0;
dates{indx} = mydate;
end;
else do;
indx + 1;
dates{indx} = mydate;
/* if mydate ne dates{indx-1} then dates{indx} = mydate; *in case duplicate date rows per ID - handle better;*/
if type=1 then do x=1 to indx-1 until (mydate - dates{x} >366 ); *loop until seeing dates more than a year ago;
if intck('year', dates{x}, mydate, 'c') = 0 then count +1; * add 1 if mydate is <1yr after any date before;
end;
end;
output;
drop indx dates: ;
run;
我没有运行它在 SAS 上,如果它能解决问题,请告诉我。
试试这个
data work.have;
input ID mydate date9. type;
format mydate date9.;
datalines;
1 11NOV2020 1
1 22OCT2020 1
1 24SEP2020 0
1 26SEP2019 1
2 13OCT2021 1
2 11SEP2020 1
;
run;
proc sql;
create table want as
select *,
(select n(type) from have
where id = a.id
and intnx('year', a.mydate, -1, 's') <= mydate < a.mydate
and type = 1
) as count
from have as a;
quit;
我有如下数据集:
data work.have;
input ID mydate date9. type;
format mydate date9.;
datalines;
1 11NOV2020 1
1 22OCT2020 1
1 24SEP2020 0
1 26SEP2019 1
2 13OCT2021 1
2 11SEP2020 1
;
run;
- 我想从 mydate 变量开始,按 ID 计算一个 ID 在过去 12 个月内出现的次数。
- 计数应从 0 开始。
- 如果在过去 12 个月内有任何观察,并且这些观察的类型 = 1,则计数应该只增加 1
我想要的结果应该是这样的:
ID mydate type Count
1 11NOV2020 1 1
1 22OCT2020 1 0
1 24SEP2020 0 1
1 26SEP2019 1 0
2 13OCT2021 1 0
2 11SEP2020 1 0
例如,第一行的 ID = 1 得到计数 = 1,因为它在过去 12 个月中只有一次观察,其中类型 = 1。
到目前为止,我已经尝试像下面那样将 table 合并到自身上,但它没有产生正确的结果。
proc sql;
create table work.want
as select
t1.ID
,t1.mydate
,max(count(t2.id)-1,0) as Count
from work.have as t1
left join
work.have as t2
on t1.id = t2.id and 0 <= intck('month', t2.mydate, t1.mydate) <= 12 and t2.Type = 1
group by 1,2
order by 1,2 desc;
Quit;
我很乐意在过程 sql 或数据步骤中得到答案。
提前致谢!
我的建议是这样的,相信一定能满足你的需求。
*sort from low to high date within ID;
proc sort data=work.have;
by ID mydate;
run;
*use "first." method to calc flags u want;
*Assuming no duplicate date rows per ID;
data outdset;
set work.have;
by ID;
retain count ;
array dates{100}; *However many dates you may have;
if first.ID then do;
indx = 1;
count = 0;
dates{indx} = mydate;
end;
else do;
indx + 1;
dates{indx} = mydate;
/* if mydate ne dates{indx-1} then dates{indx} = mydate; *in case duplicate date rows per ID - handle better;*/
if type=1 then do x=1 to indx-1 until (mydate - dates{x} >366 ); *loop until seeing dates more than a year ago;
if intck('year', dates{x}, mydate, 'c') = 0 then count +1; * add 1 if mydate is <1yr after any date before;
end;
end;
output;
drop indx dates: ;
run;
我没有运行它在 SAS 上,如果它能解决问题,请告诉我。
试试这个
data work.have;
input ID mydate date9. type;
format mydate date9.;
datalines;
1 11NOV2020 1
1 22OCT2020 1
1 24SEP2020 0
1 26SEP2019 1
2 13OCT2021 1
2 11SEP2020 1
;
run;
proc sql;
create table want as
select *,
(select n(type) from have
where id = a.id
and intnx('year', a.mydate, -1, 's') <= mydate < a.mydate
and type = 1
) as count
from have as a;
quit;