将年份变量分配给 ID
Assign Year Variable to ID
我使用以下代码为每个 ID 分配年份 2017、2018、2019、2020:
proc sort data=Have;
by ID;
run;
data Have2;
set Have(keep=id);
by id ;
if first.id then do Vintage= 2017 to 2020;
output;
end;
run;
proc sort data=have2;
by id;
run;
data have3 ;
merge have2 have;
by id;
run;
使得数据集看起来像这样:
data have3;
input ID Vintage;
datalines;
1 2017
1 2018
1 2019
1 2020
2 2017
2 2018
2 2019
2 2020
3 2017
3 2018
3 2019
3 2020
4 2017
4 2018
4 2019
4 2020
;
run;
现在的问题是我正在处理一个看起来像这样的数据集
data newdata;
input ID Type;
datalines;
1 A
1 A
1 A
1 A
1 L
1 L
1 L
1 L
2 A
2 A
2 A
2 A
2 L
2 L
2 L
2 L
;
run;
现在 Vintage 追加到 2017,2018,2019,2020,2020,2020,2020
有没有一种方法可以按 ID 和类型附加到年份上,使上面的数据看起来像这样
data want;
input ID Type Vintage;
datalines;
1 A 2017
1 A 2018
1 A 2019
1 A 2020
1 L 2017
1 L 2018
1 L 2019
1 L 2020
2 A 2017
2 A 2018
2 A 2019
2 A 2020
2 L 2017
2 L 2018
2 L 2019
2 L 2020
;
run;
TIA
您可以为 type
执行与 vintage
和 id
完全相同的操作
data want;
set have3;
by id vintage;
if first.vintage then do;
do type="A","L";
output;
end;
end;
run;
proc sort data=want;
by id type vintage;
run;
SQL 解决方案将需要一个 DISTINCT 子句来删除重复项。这是因为交叉联接将创建重复记录,因为 newdata
有重复项。
proc sql noprint;
create table want as
select a.id, b.type, a.vintage
from have3 as a,
(select distinct * from newdata) as b
where a.id = b.id
order by a.id, b.type, a.vintage;
quit;
data want;
set newdata;
by id Type;
if first.Type then vintage=2017;
else vintage+1;
run;
因为您要进行大量交叉连接,只需为每一列创建具有不同值的表,然后在 PROC SQL
中将它们连接在一起
data ids;
input ID ;
datalines;
1
2
3
4
;
run;
data vintages;
input vintage ;
datalines;
2017
2018
2019
2020
;
run;
data types;
input type $;
datalines;
A
L
;
run;
proc sql noprint;
create table have3 as
select a.id, b.type
from ids a, types b;
create table want as
select a.id, b.type, c.vintage
from ids a, types b, vintages c
order by id, type, vintage;
quit;
我使用以下代码为每个 ID 分配年份 2017、2018、2019、2020:
proc sort data=Have;
by ID;
run;
data Have2;
set Have(keep=id);
by id ;
if first.id then do Vintage= 2017 to 2020;
output;
end;
run;
proc sort data=have2;
by id;
run;
data have3 ;
merge have2 have;
by id;
run;
使得数据集看起来像这样:
data have3;
input ID Vintage;
datalines;
1 2017
1 2018
1 2019
1 2020
2 2017
2 2018
2 2019
2 2020
3 2017
3 2018
3 2019
3 2020
4 2017
4 2018
4 2019
4 2020
;
run;
现在的问题是我正在处理一个看起来像这样的数据集
data newdata;
input ID Type;
datalines;
1 A
1 A
1 A
1 A
1 L
1 L
1 L
1 L
2 A
2 A
2 A
2 A
2 L
2 L
2 L
2 L
;
run;
现在 Vintage 追加到 2017,2018,2019,2020,2020,2020,2020
有没有一种方法可以按 ID 和类型附加到年份上,使上面的数据看起来像这样
data want;
input ID Type Vintage;
datalines;
1 A 2017
1 A 2018
1 A 2019
1 A 2020
1 L 2017
1 L 2018
1 L 2019
1 L 2020
2 A 2017
2 A 2018
2 A 2019
2 A 2020
2 L 2017
2 L 2018
2 L 2019
2 L 2020
;
run;
TIA
您可以为 type
vintage
和 id
完全相同的操作
data want;
set have3;
by id vintage;
if first.vintage then do;
do type="A","L";
output;
end;
end;
run;
proc sort data=want;
by id type vintage;
run;
SQL 解决方案将需要一个 DISTINCT 子句来删除重复项。这是因为交叉联接将创建重复记录,因为 newdata
有重复项。
proc sql noprint;
create table want as
select a.id, b.type, a.vintage
from have3 as a,
(select distinct * from newdata) as b
where a.id = b.id
order by a.id, b.type, a.vintage;
quit;
data want;
set newdata;
by id Type;
if first.Type then vintage=2017;
else vintage+1;
run;
因为您要进行大量交叉连接,只需为每一列创建具有不同值的表,然后在 PROC SQL
中将它们连接在一起data ids;
input ID ;
datalines;
1
2
3
4
;
run;
data vintages;
input vintage ;
datalines;
2017
2018
2019
2020
;
run;
data types;
input type $;
datalines;
A
L
;
run;
proc sql noprint;
create table have3 as
select a.id, b.type
from ids a, types b;
create table want as
select a.id, b.type, c.vintage
from ids a, types b, vintages c
order by id, type, vintage;
quit;