创建特定的 PROC JSON 层次结构
Creating specific PROC JSON hierarchy
我正在尝试使用 SAS Enterprise Guide (EG) 创建以下格式的 JSON 文件:
{
"schema": "EMAIL_SHA26",
"data": ["1516e67afa2d9c3874c3e9874bdb41c4", "1a7e5f59b3f0dfe6ea152cb65aedb0d2"]
}
我已经很接近了,但是我生成的 JSON 文件中的括号太多了。这是我目前得到的:
{
"schema": "EMAIL_SHA26",
"data": [
[
"1516e67afa2d9c3874c3e9874bdb41c4"
],
[
"1a7e5f59b3f0dfe6ea152cb65aedb0d2"
]
]
}
这是我使用的代码:
PROC JSON
OUT = users NOSASTAGS PRETTY;
WRITE VALUES "schema";
WRITE VALUES "EMAIL_SHA26";
WRITE VALUES "data";
WRITE OPEN ARRAY;
EXPORT users_ds / NOKEYS;
WRITE CLOSE;
RUN;
"users_ds"数据集有一列有2条数据记录。有什么办法可以防止它在数据集中的每个值周围加上括号?此外,知道散列电子邮件列表可能多达 10,000 条记录,是否可以实现我想要的输出?
如有任何帮助,我们将不胜感激。
您得到的输出表明有多行,每一行都有不同的电子邮件。
要获得您想要的输出,您需要将所有这些电子邮件连接成一个长字符串,这会产生问题,因为 SAS 限制了每个字符变量的长度。
这是一个解决方法。这基本上使用 CALL EXECUTE
生成手动代码,将生成您想要的 JSON:
data have;
data = "1516e67afa2d9c3874c3e9874bdb41c4"; output;
data = "1a7e5f59b3f0dfe6ea152cb65aedb0d2"; output;
run;
data _null_;
set have end=lastrec;
if _N_ = 1 then do;
call execute(
"PROC JSON OUT = 'want.json' NOSASTAGS PRETTY;
WRITE VALUES 'schema';
WRITE VALUES 'EMAIL_SHA26';
WRITE VALUES 'data';
WRITE OPEN ARRAY;
");
end;
call execute('WRITE VALUES "' || data || '";');
if lastrec then call execute("WRITE CLOSE;");
run;
这会产生:
{
"schema": "EMAIL_SHA26",
"data": [
"1516e67afa2d9c3874c3e9874bdb41c4",
"1a7e5f59b3f0dfe6ea152cb65aedb0d2"
]
}
Export
默认将数据集输出为 name:value 对的对象数组。 NOKEYS
构造是一个数组数组,内部数组是一个值数组。
为了获取列的单个值数组,您可以将该列转置为单行数据集并将其导出。在 EXPORT
.
之前,您不必 OPEN ARRAY
data have;
do row = 1 to 10;
userid = uuidgen();
age = 19 + row;
output;
end;
run;
* transpose a single column into a single row data set;
proc transpose data=have out=have_1_row(drop=_name_);
var userid;
run;
filename users "C:\temp\users.json" ;
proc json out = users nosastags pretty;
WRITE VALUES "schema";
WRITE VALUES "EMAIL_SHA26";
WRITE VALUES "data";
EXPORT have_1_row / NOKEYS;
RUN;
产量json
{
"schema": "EMAIL_SHA26",
"data": [
"6ebd89fa-b6bc-4c14-b094-43792d202ad7",
"ec53dd59-1290-47d7-b437-0c754349434c",
"17332882-58ca-4c09-a599-2048d58460d0",
"d5b57a19-ff73-4deb-bfc7-62ebc19d719e",
"9d2758b2-e128-45df-8589-99cd7204c1ab",
"a13bcba7-742f-4a01-bd56-dc12f4190d3e",
"5f853bf3-9597-4c94-9b57-a54d3de190c3",
"0edbd2d8-bd5d-46be-aaa7-ac208df4ba62",
"07347e73-7efa-4e9c-8242-5a9c85f07b56",
"03976b1b-513f-41ee-92d5-d23c8d3d4918"
]
}
对于想要将多个 列导出为值数组的情况,请考虑使用 DOSUBL
调用一个宏来运行转置和生成宏代码中使用的单行数据集 generated EXPORT
statement:
%macro transpose_column(data=, column=, out=);
%* generate code that will transpose a single column into a single row data set;
proc transpose data=&data out=&out(keep=col:);
var &column;
run;
%mend;
%macro export_column_as_array (data=, column=);
%local rc out;
%let out = _%sysfunc(monotonic());
%* Invoke DOSUBL to side-run macro generated proc transpose code;
%let rc = %sysfunc(
DOSUBL(
%transpose_column(data=&data, column=&column, out=&out)
)
);
%* use the output data set created by the side-run code;
WRITE VALUES "&column";
EXPORT &out / NOKEYS;
%mend;
data have;
do row = 1 to 10;
userid = uuidgen();
age = 19 + row;
date = today() - row;
output;
end;
format date yymmdd10.;
run;
filename users "C:\temp\users.json" ;
options mprint mtrace;
proc json out = users nosastags pretty;
WRITE VALUES "schema";
WRITE VALUES "EMAIL_SHA26";
%export_column_as_array(data=have,column=userid);
%export_column_as_array(data=have,column=age);
%export_column_as_array(data=have,column=date);
run;
quit;
我正在尝试使用 SAS Enterprise Guide (EG) 创建以下格式的 JSON 文件:
{
"schema": "EMAIL_SHA26",
"data": ["1516e67afa2d9c3874c3e9874bdb41c4", "1a7e5f59b3f0dfe6ea152cb65aedb0d2"]
}
我已经很接近了,但是我生成的 JSON 文件中的括号太多了。这是我目前得到的:
{
"schema": "EMAIL_SHA26",
"data": [
[
"1516e67afa2d9c3874c3e9874bdb41c4"
],
[
"1a7e5f59b3f0dfe6ea152cb65aedb0d2"
]
]
}
这是我使用的代码:
PROC JSON
OUT = users NOSASTAGS PRETTY;
WRITE VALUES "schema";
WRITE VALUES "EMAIL_SHA26";
WRITE VALUES "data";
WRITE OPEN ARRAY;
EXPORT users_ds / NOKEYS;
WRITE CLOSE;
RUN;
"users_ds"数据集有一列有2条数据记录。有什么办法可以防止它在数据集中的每个值周围加上括号?此外,知道散列电子邮件列表可能多达 10,000 条记录,是否可以实现我想要的输出?
如有任何帮助,我们将不胜感激。
您得到的输出表明有多行,每一行都有不同的电子邮件。
要获得您想要的输出,您需要将所有这些电子邮件连接成一个长字符串,这会产生问题,因为 SAS 限制了每个字符变量的长度。
这是一个解决方法。这基本上使用 CALL EXECUTE
生成手动代码,将生成您想要的 JSON:
data have;
data = "1516e67afa2d9c3874c3e9874bdb41c4"; output;
data = "1a7e5f59b3f0dfe6ea152cb65aedb0d2"; output;
run;
data _null_;
set have end=lastrec;
if _N_ = 1 then do;
call execute(
"PROC JSON OUT = 'want.json' NOSASTAGS PRETTY;
WRITE VALUES 'schema';
WRITE VALUES 'EMAIL_SHA26';
WRITE VALUES 'data';
WRITE OPEN ARRAY;
");
end;
call execute('WRITE VALUES "' || data || '";');
if lastrec then call execute("WRITE CLOSE;");
run;
这会产生:
{
"schema": "EMAIL_SHA26",
"data": [
"1516e67afa2d9c3874c3e9874bdb41c4",
"1a7e5f59b3f0dfe6ea152cb65aedb0d2"
]
}
Export
默认将数据集输出为 name:value 对的对象数组。 NOKEYS
构造是一个数组数组,内部数组是一个值数组。
为了获取列的单个值数组,您可以将该列转置为单行数据集并将其导出。在 EXPORT
.
OPEN ARRAY
data have;
do row = 1 to 10;
userid = uuidgen();
age = 19 + row;
output;
end;
run;
* transpose a single column into a single row data set;
proc transpose data=have out=have_1_row(drop=_name_);
var userid;
run;
filename users "C:\temp\users.json" ;
proc json out = users nosastags pretty;
WRITE VALUES "schema";
WRITE VALUES "EMAIL_SHA26";
WRITE VALUES "data";
EXPORT have_1_row / NOKEYS;
RUN;
产量json
{
"schema": "EMAIL_SHA26",
"data": [
"6ebd89fa-b6bc-4c14-b094-43792d202ad7",
"ec53dd59-1290-47d7-b437-0c754349434c",
"17332882-58ca-4c09-a599-2048d58460d0",
"d5b57a19-ff73-4deb-bfc7-62ebc19d719e",
"9d2758b2-e128-45df-8589-99cd7204c1ab",
"a13bcba7-742f-4a01-bd56-dc12f4190d3e",
"5f853bf3-9597-4c94-9b57-a54d3de190c3",
"0edbd2d8-bd5d-46be-aaa7-ac208df4ba62",
"07347e73-7efa-4e9c-8242-5a9c85f07b56",
"03976b1b-513f-41ee-92d5-d23c8d3d4918"
]
}
对于想要将多个 列导出为值数组的情况,请考虑使用 DOSUBL
调用一个宏来运行转置和生成宏代码中使用的单行数据集 generated EXPORT
statement:
%macro transpose_column(data=, column=, out=);
%* generate code that will transpose a single column into a single row data set;
proc transpose data=&data out=&out(keep=col:);
var &column;
run;
%mend;
%macro export_column_as_array (data=, column=);
%local rc out;
%let out = _%sysfunc(monotonic());
%* Invoke DOSUBL to side-run macro generated proc transpose code;
%let rc = %sysfunc(
DOSUBL(
%transpose_column(data=&data, column=&column, out=&out)
)
);
%* use the output data set created by the side-run code;
WRITE VALUES "&column";
EXPORT &out / NOKEYS;
%mend;
data have;
do row = 1 to 10;
userid = uuidgen();
age = 19 + row;
date = today() - row;
output;
end;
format date yymmdd10.;
run;
filename users "C:\temp\users.json" ;
options mprint mtrace;
proc json out = users nosastags pretty;
WRITE VALUES "schema";
WRITE VALUES "EMAIL_SHA26";
%export_column_as_array(data=have,column=userid);
%export_column_as_array(data=have,column=age);
%export_column_as_array(data=have,column=date);
run;
quit;