创建特定的 PROC JSON 层次结构

Creating specific PROC JSON hierarchy

我正在尝试使用 SAS Enterprise Guide (EG) 创建以下格式的 JSON 文件:

{
"schema": "EMAIL_SHA26",
"data": ["1516e67afa2d9c3874c3e9874bdb41c4", "1a7e5f59b3f0dfe6ea152cb65aedb0d2"]
}

我已经很接近了,但是我生成的 JSON 文件中的括号太多了。这是我目前得到的:

{
"schema": "EMAIL_SHA26",
"data": [
  [
   "1516e67afa2d9c3874c3e9874bdb41c4"
  ],
  [
   "1a7e5f59b3f0dfe6ea152cb65aedb0d2"
  ]
 ]
}

这是我使用的代码:

PROC JSON 
    OUT = users NOSASTAGS PRETTY;
    WRITE VALUES "schema";
    WRITE VALUES "EMAIL_SHA26";
    WRITE VALUES "data";
    WRITE OPEN ARRAY;
    EXPORT users_ds / NOKEYS;
    WRITE CLOSE;
RUN;

"users_ds"数据集有一列有2条数据记录。有什么办法可以防止它在数据集中的每个值周围加上括号?此外,知道散列电子邮件列表可能多达 10,000 条记录,是否可以实现我想要的输出?

如有任何帮助,我们将不胜感激。

您得到的输出表明有多行,每一行都有不同的电子邮件。

要获得您想要的输出,您需要将所有这些电子邮件连接成一个长字符串,这会产生问题,因为 SAS 限制了每个字符变量的长度。

这是一个解决方法。这基本上使用 CALL EXECUTE 生成手动代码,将生成您想要的 JSON:

data have;
  data = "1516e67afa2d9c3874c3e9874bdb41c4"; output; 
  data = "1a7e5f59b3f0dfe6ea152cb65aedb0d2"; output;
run;

data _null_;
 set have end=lastrec;

 if _N_ = 1 then do;
   call execute(
   "PROC JSON  OUT = 'want.json' NOSASTAGS PRETTY; 
    WRITE VALUES 'schema';
    WRITE VALUES 'EMAIL_SHA26';
    WRITE VALUES 'data';
    WRITE OPEN ARRAY;
   ");

 end;

 call execute('WRITE VALUES "' || data ||  '";');

 if lastrec then call execute("WRITE CLOSE;");

run;

这会产生:

{
  "schema": "EMAIL_SHA26",
  "data": [
    "1516e67afa2d9c3874c3e9874bdb41c4",
    "1a7e5f59b3f0dfe6ea152cb65aedb0d2"
  ]
}

Export 默认将数据集输出为 name:value 对的对象数组。 NOKEYS 构造是一个数组数组,内部数组是一个值数组。

为了获取列的单个值数组,您可以将该列转置为单行数据集并将其导出。在 EXPORT.

之前,您不必 OPEN ARRAY
data have;
  do row = 1 to 10;
    userid = uuidgen();
    age = 19 + row;
    output;
  end;
run;

* transpose a single column into a single row data set;
proc transpose data=have out=have_1_row(drop=_name_);
  var userid;
run;

filename users "C:\temp\users.json" ;

proc json out = users nosastags pretty;
    WRITE VALUES "schema";
    WRITE VALUES "EMAIL_SHA26";
    WRITE VALUES "data";
    EXPORT have_1_row / NOKEYS;
RUN;

产量json

{
  "schema": "EMAIL_SHA26",
  "data": [
    "6ebd89fa-b6bc-4c14-b094-43792d202ad7",
    "ec53dd59-1290-47d7-b437-0c754349434c",
    "17332882-58ca-4c09-a599-2048d58460d0",
    "d5b57a19-ff73-4deb-bfc7-62ebc19d719e",
    "9d2758b2-e128-45df-8589-99cd7204c1ab",
    "a13bcba7-742f-4a01-bd56-dc12f4190d3e",
    "5f853bf3-9597-4c94-9b57-a54d3de190c3",
    "0edbd2d8-bd5d-46be-aaa7-ac208df4ba62",
    "07347e73-7efa-4e9c-8242-5a9c85f07b56",
    "03976b1b-513f-41ee-92d5-d23c8d3d4918"
  ]
}

对于想要将多个 列导出为值数组的情况,请考虑使用 DOSUBL 调用一个宏来运行转置和生成宏代码中使用的单行数据集 generated EXPORT statement:

%macro transpose_column(data=, column=, out=);
  %* generate code that will transpose a single column into a single row data set;
  proc transpose data=&data out=&out(keep=col:);
    var &column;
  run;
%mend;

%macro export_column_as_array (data=, column=);
  %local rc out;
  %let out = _%sysfunc(monotonic());

  %* Invoke DOSUBL to side-run macro generated proc transpose code;
  %let rc = %sysfunc(
    DOSUBL(
      %transpose_column(data=&data, column=&column, out=&out)
    )
  );

  %* use the output data set created by the side-run code;
  WRITE VALUES "&column";
  EXPORT &out / NOKEYS;
%mend;

data have;
  do row = 1 to 10;
    userid = uuidgen();
    age = 19 + row;
    date = today() - row;
    output;
  end;
  format date yymmdd10.;
run;

filename users "C:\temp\users.json" ;

options mprint mtrace;

proc json out = users nosastags pretty;
    WRITE VALUES "schema";
    WRITE VALUES "EMAIL_SHA26";
    %export_column_as_array(data=have,column=userid);
    %export_column_as_array(data=have,column=age);
    %export_column_as_array(data=have,column=date);
run;
quit;