根据查找中的值创建 SAS 变量 table
Create SAS variable based on values in look-up table
我在数据集中有两个变量(varx 和 vary)"dat",需要创建最终分数,方法是首先对 varx 和 vary 进行分类,然后根据查找 table "lookup".
我设法通过了分类部分,现在陷入了如何告诉 SAS 使用我创建的类别(即 "varxcat" 和 "varycat")作为 "lookup",获取我需要的每个观察值,并将其放入 "dat".
中的最终分数变量(称为 "score")
在 R(我通常在其中编写代码)中,这可以通过 for 循环之类的东西轻松完成。 SAS中有类似的东西吗? (我不是必须使用 "varxcat" 和 "varycat",只需要最终创建 "score"。)
data dat;
input ID $ varx vary;
datalines;
1 1 1
2 4 5
3 11 12
4 23 14
5 24 20
;
data lookup;
input x01to10 x11to20 x21to30;
datalines;
21 52 73
84 95 96
107 118 149
; /*first row is for y01to10, second row is for y11to20, and third row is for y21to30,
such that if someone's x score is in category 1 and y score is in category 3,
the person's final score should be 107*/
data dat;
set dat;
if varx <= 10 then varxcat = 1;
else if varx > 10 & varx <= 20 then varxcat = 2;
else if varx > 20 & varx <= 30 then varxcat = 3;
if vary <= 10 then varycat = 1;
else if vary > 10 & vary <= 20 then varycat = 2;
else if vary > 20 & vary <= 30 then varycat = 3;
run;
想要"dat"看起来像
data dat;
input ID $ varx vary score;
datalines;
1 1 1 21
2 4 5 21
3 11 12 95
4 23 14 96
5 24 20 96
;
数据值映射的查找table本质上是一个左连接操作。 SAS有很多方法来左连接数据,包括
- SQL
- 合并
- 哈希对象
- 数组(直接寻址)
- 格式
- 信息
这里有四种方式:SQL、Merge、Array和Hash。从 var*
到 category
的映射是通过函数映射 int (value/10)
:
完成的
data have;
input ID $ varx vary;
datalines;
1 1 1
2 4 5
3 11 12
4 23 14
5 24 20
6 5 29 /* score should be 107 */
;
data lookup;
do index_y = 0 to 2;
do index_x = 0 to 2;
input lookup_value @@;
output;
end;
end;
datalines;
21 52 73
84 95 96
107 118 149
;
*------------------- SQL;
proc sql;
create table want as
select
id, lookup_value as score
from
have
left join
lookup
on
int (have.varx/10) = lookup.index_x
and
int (have.vary/10) = lookup.index_y
order by
id
;
*------------------- MERGE;
data have2(index=(myindexname=(xcat ycat)));
set have;
xcat = int(varx/10);
ycat = int(vary/10);
run;
proc sort data=lookup;
by index_x index_y;
options msglevel=i;
data want2(keep=id lookup_value rename=(lookup_value=score));
merge
have2(rename=(xcat=index_x ycat=index_y) in=left)
lookup
;
by index_x index_y;
if left;
run;
proc sort data=want2;
by id;
run;
*------------------- ARRAY DIRECT ADDRESSING;
data want3;
array lookup [0:2,0:2] _temporary_;
if _n_ = 1 then do until (endlookup);
set lookup end=endlookup;
lookup[index_x,index_y] = lookup_value;
end;
set have;
xcat = varx/10;
ycat = vary/10;
score = lookup[xcat,ycat];
keep id score;
run;
*------------------- HASH LOOKUP;
data want4;
if 0 then set lookup;
if _n_ = 1 then do;
declare hash lookup(dataset:'lookup');
lookup.defineKey('index_x', 'index_y');
lookup.defineData('lookup_value');
lookup.defineDone();
end;
set have;
index_x = int(varx/10);
index_y = int(vary/10);
if (lookup.find() = 0) then
score = lookup_value;
keep id score;
run;
我在数据集中有两个变量(varx 和 vary)"dat",需要创建最终分数,方法是首先对 varx 和 vary 进行分类,然后根据查找 table "lookup".
我设法通过了分类部分,现在陷入了如何告诉 SAS 使用我创建的类别(即 "varxcat" 和 "varycat")作为 "lookup",获取我需要的每个观察值,并将其放入 "dat".
中的最终分数变量(称为 "score")在 R(我通常在其中编写代码)中,这可以通过 for 循环之类的东西轻松完成。 SAS中有类似的东西吗? (我不是必须使用 "varxcat" 和 "varycat",只需要最终创建 "score"。)
data dat;
input ID $ varx vary;
datalines;
1 1 1
2 4 5
3 11 12
4 23 14
5 24 20
;
data lookup;
input x01to10 x11to20 x21to30;
datalines;
21 52 73
84 95 96
107 118 149
; /*first row is for y01to10, second row is for y11to20, and third row is for y21to30,
such that if someone's x score is in category 1 and y score is in category 3,
the person's final score should be 107*/
data dat;
set dat;
if varx <= 10 then varxcat = 1;
else if varx > 10 & varx <= 20 then varxcat = 2;
else if varx > 20 & varx <= 30 then varxcat = 3;
if vary <= 10 then varycat = 1;
else if vary > 10 & vary <= 20 then varycat = 2;
else if vary > 20 & vary <= 30 then varycat = 3;
run;
想要"dat"看起来像
data dat;
input ID $ varx vary score;
datalines;
1 1 1 21
2 4 5 21
3 11 12 95
4 23 14 96
5 24 20 96
;
数据值映射的查找table本质上是一个左连接操作。 SAS有很多方法来左连接数据,包括
- SQL
- 合并
- 哈希对象
- 数组(直接寻址)
- 格式
- 信息
这里有四种方式:SQL、Merge、Array和Hash。从 var*
到 category
的映射是通过函数映射 int (value/10)
:
data have;
input ID $ varx vary;
datalines;
1 1 1
2 4 5
3 11 12
4 23 14
5 24 20
6 5 29 /* score should be 107 */
;
data lookup;
do index_y = 0 to 2;
do index_x = 0 to 2;
input lookup_value @@;
output;
end;
end;
datalines;
21 52 73
84 95 96
107 118 149
;
*------------------- SQL;
proc sql;
create table want as
select
id, lookup_value as score
from
have
left join
lookup
on
int (have.varx/10) = lookup.index_x
and
int (have.vary/10) = lookup.index_y
order by
id
;
*------------------- MERGE;
data have2(index=(myindexname=(xcat ycat)));
set have;
xcat = int(varx/10);
ycat = int(vary/10);
run;
proc sort data=lookup;
by index_x index_y;
options msglevel=i;
data want2(keep=id lookup_value rename=(lookup_value=score));
merge
have2(rename=(xcat=index_x ycat=index_y) in=left)
lookup
;
by index_x index_y;
if left;
run;
proc sort data=want2;
by id;
run;
*------------------- ARRAY DIRECT ADDRESSING;
data want3;
array lookup [0:2,0:2] _temporary_;
if _n_ = 1 then do until (endlookup);
set lookup end=endlookup;
lookup[index_x,index_y] = lookup_value;
end;
set have;
xcat = varx/10;
ycat = vary/10;
score = lookup[xcat,ycat];
keep id score;
run;
*------------------- HASH LOOKUP;
data want4;
if 0 then set lookup;
if _n_ = 1 then do;
declare hash lookup(dataset:'lookup');
lookup.defineKey('index_x', 'index_y');
lookup.defineData('lookup_value');
lookup.defineDone();
end;
set have;
index_x = int(varx/10);
index_y = int(vary/10);
if (lookup.find() = 0) then
score = lookup_value;
keep id score;
run;