将列名与非零值连接起来
Concatenate column names with non zero values
有一个 table 有 ID、事件、日期。
当前脚本按id、event_type和日期统计事件,转置然后按当天事件类型的组合标记每个ID。
担心的是,如果出现新的事件类型,这会中断。
我希望找到一种方法,将所有非零计数的列名连接起来。理想情况下,此方法不涉及对事件进行硬编码。
data test;
infile datalines delimiter=':' truncover;
informat id 10. event_dt DDMMYY10. event_type . event .;
input id event_dt event_type event;
datalines;
1:01-03-2017:BB:b1
1:01-03-2017:AA:A2
1:02-03-2017:CC:C1
2:01-03-2017:CC:C2
3:03-03-2017:BB:b2
4:02-03-2017:AA:A1
;
run;
proc sql;
create table test2 as
select distinct ID, event_dt format worddate. as event_dt, event_type, count(distinct event) as event_count
from test
group by ID, event_dt, event_type;
quit;
proc transpose data=test2 out=test3 (drop = _name_);
by id event_dt;
id event_type;
run;
proc stdize data=test3 out=test3z reponly missing=0;
run;
proc sql;
create table test4 as
select event_dt,
case when AA = 0 and BB = 0 and CC = 0 then 'No Event'
when AA = 0 and BB = 0 and CC > 0 then 'CC only'
when AA = 0 and BB > 0 and CC = 0 then 'BB only'
when AA = 0 and BB > 0 and CC > 0 then 'BB & CC'
when AA > 0 and BB = 0 and CC = 0 then 'AA only'
when AA > 0 and BB = 0 and CC > 0 then 'AA & CC'
when AA > 0 and BB > 0 and CC = 0 then 'AA & BB'
when AA > 0 and BB > 0 and CC > 0 then 'AA & BB & CC'
else 'Other' end as tag, count(id) as ID_COUNT
from test3z group by
event_dt,
case when AA = 0 and BB = 0 and CC = 0 then 'No Event'
when AA = 0 and BB = 0 and CC > 0 then 'CC only'
when AA = 0 and BB > 0 and CC = 0 then 'BB only'
when AA = 0 and BB > 0 and CC > 0 then 'BB & CC'
when AA > 0 and BB = 0 and CC = 0 then 'AA only'
when AA > 0 and BB = 0 and CC > 0 then 'AA & CC'
when AA > 0 and BB > 0 and CC = 0 then 'AA & BB'
when AA > 0 and BB > 0 and CC > 0 then 'AA & BB & CC'
else 'Other' end;
quit;
谢谢
本
有人可能会说,转置和使用数组。
考虑这个替代方案——而不是通过处理一行中的显式列来分配标签值。
在遍历 id/date 组时计算标签值,在遍历 date/tag 组时对 id 进行排序和计数。
* same as in question;
proc sql;
create table test2 as
select distinct ID, event_dt format worddate. as event_dt, event_type, count(distinct event) as event_count
from test
group by ID, event_dt, event_type;
quit;
* compute tag value (I call it type_list);
data test2a;
length type_list ;
do _n_ = 1 by 1 until (last.event_dt);
set test2;
by ID event_dt;
type_list = catx(',',type_list,event_type);
end;
keep id event_dt type_list;
run;
proc sort data=test2a;
by event_dt type_list;
run;
* count number of ids with same type list on each event day;
data want;
do id_count = 1 by 1 until (last.type_list);
set test2a;
by event_dt type_list;
end;
run;
您可以添加额外的逻辑和 tranwrd
以将 type_list(csv 列表)更改为更详细的表示(包含单词 and or & or only)
有一个 table 有 ID、事件、日期。
当前脚本按id、event_type和日期统计事件,转置然后按当天事件类型的组合标记每个ID。
担心的是,如果出现新的事件类型,这会中断。
我希望找到一种方法,将所有非零计数的列名连接起来。理想情况下,此方法不涉及对事件进行硬编码。
data test;
infile datalines delimiter=':' truncover;
informat id 10. event_dt DDMMYY10. event_type . event .;
input id event_dt event_type event;
datalines;
1:01-03-2017:BB:b1
1:01-03-2017:AA:A2
1:02-03-2017:CC:C1
2:01-03-2017:CC:C2
3:03-03-2017:BB:b2
4:02-03-2017:AA:A1
;
run;
proc sql;
create table test2 as
select distinct ID, event_dt format worddate. as event_dt, event_type, count(distinct event) as event_count
from test
group by ID, event_dt, event_type;
quit;
proc transpose data=test2 out=test3 (drop = _name_);
by id event_dt;
id event_type;
run;
proc stdize data=test3 out=test3z reponly missing=0;
run;
proc sql;
create table test4 as
select event_dt,
case when AA = 0 and BB = 0 and CC = 0 then 'No Event'
when AA = 0 and BB = 0 and CC > 0 then 'CC only'
when AA = 0 and BB > 0 and CC = 0 then 'BB only'
when AA = 0 and BB > 0 and CC > 0 then 'BB & CC'
when AA > 0 and BB = 0 and CC = 0 then 'AA only'
when AA > 0 and BB = 0 and CC > 0 then 'AA & CC'
when AA > 0 and BB > 0 and CC = 0 then 'AA & BB'
when AA > 0 and BB > 0 and CC > 0 then 'AA & BB & CC'
else 'Other' end as tag, count(id) as ID_COUNT
from test3z group by
event_dt,
case when AA = 0 and BB = 0 and CC = 0 then 'No Event'
when AA = 0 and BB = 0 and CC > 0 then 'CC only'
when AA = 0 and BB > 0 and CC = 0 then 'BB only'
when AA = 0 and BB > 0 and CC > 0 then 'BB & CC'
when AA > 0 and BB = 0 and CC = 0 then 'AA only'
when AA > 0 and BB = 0 and CC > 0 then 'AA & CC'
when AA > 0 and BB > 0 and CC = 0 then 'AA & BB'
when AA > 0 and BB > 0 and CC > 0 then 'AA & BB & CC'
else 'Other' end;
quit;
谢谢 本
有人可能会说,转置和使用数组。
考虑这个替代方案——而不是通过处理一行中的显式列来分配标签值。
在遍历 id/date 组时计算标签值,在遍历 date/tag 组时对 id 进行排序和计数。
* same as in question;
proc sql;
create table test2 as
select distinct ID, event_dt format worddate. as event_dt, event_type, count(distinct event) as event_count
from test
group by ID, event_dt, event_type;
quit;
* compute tag value (I call it type_list);
data test2a;
length type_list ;
do _n_ = 1 by 1 until (last.event_dt);
set test2;
by ID event_dt;
type_list = catx(',',type_list,event_type);
end;
keep id event_dt type_list;
run;
proc sort data=test2a;
by event_dt type_list;
run;
* count number of ids with same type list on each event day;
data want;
do id_count = 1 by 1 until (last.type_list);
set test2a;
by event_dt type_list;
end;
run;
您可以添加额外的逻辑和 tranwrd
以将 type_list(csv 列表)更改为更详细的表示(包含单词 and or & or only)