将 PCRXFIND 与 DS2 结合使用
Using PCRXFIND with DS2
我想使用 PCRXFIND 而不是 prxmatch 来复制它以提高速度,因为 prxmatch 无法正常工作。
我正在尝试在数据文件中查找文本。这是一个可重现的例子。一个文件包含要搜索的数据,另一个文件包含搜索词。我想使用正则表达式。通常这是使用 prxmatch 完成的。我想用 PCRXFIND 来做,因为我在 DS2 内部工作。我使用哈希迭代器迭代搜索词或数据集中的每个观察值。
搜索结果不符合我能识别的任何模式。我很乐意提出任何建议。我将此发布到 sas 社区,但没有结果。
* ds2 PCRXFIND example;
data person;
input name $ dept $;
datalines;
John Sales
Mary Acctng
Joe Findme
Sue Hereiam
;
run;
data searchterms;
infile datalines missover;
input s_index $ term $;
datalines;
1 Hereiam
2 Findme
3 Acc
;
run;
proc contents data=searchterms; run;
proc print data=searchterms; run;
proc ds2;
data search_results (overwrite=yes);
dcl double rc c ;
declare char(8) s_index;
declare char(8) term;
declare char(11) name dept;
declare char(1) c_options;
declare char(20) search_term search_text;
dcl package hash h(1, '{select s_index, term from searchterms}');
dcl package hiter hi('h');
method init();
c_options = 'i';
rc = h.defineKey('s_index');
rc = h.defineData('term');
rc = h.defineDone();
end;
method run();
dcl double rc;
set {select name, dept from person};
rc = hi.first();
do while(rc=0);
c = prxmatch('/'||compress(term)||'/i',name||' '||dept);
search_term = '/'||compress(term)||'/i';
search_text = name||' '||dept;
rc = hi.next();
output;
end;
end;
enddata;
run;
quit;
我没用过 DS2,所以不太了解那里的代码,但您可以简单地在 proc sql 的连接条件下进行。类似下面
proc sql;
create table want as
select distinct t1.*
from person T1
INNER JOIN searchterms t2
ON find(t1.dept,trim(t2.term) ) > 0;
这看起来工作正常。我不是 100% 确定你的为什么不起作用,但通常当你对周围的空间不够小心时,会发生一些并不总是有意义的事情。
我使用 CATS
和 CATX
进行连接,这似乎是处理字符串组合最有用的方法。我还改变了输出,并使用了 c_options.
data person;
input name $ dept $;
datalines;
John Sales
Mary Acctng
Joe Findme
Sue Hereiam
;
run;
data searchterms;
infile datalines missover;
input s_index $ term $;
datalines;
1 Hereiam
2 Findme
3 Acc
;
run;
proc contents data=searchterms; run;
proc print data=searchterms; run;
proc ds2;
data search_results (overwrite=yes);
dcl double rc c ;
declare char(8) s_index;
declare char(8) term;
declare char(11) name dept;
declare char(1) c_options;
declare char(20) search_term search_text;
dcl package hash h(1, 'searchterms');
dcl package hiter hi('h');
method init();
c_options = 'i';
rc = h.defineKey('s_index');
rc = h.defineData('term');
rc = h.defineDone();
end;
method run();
dcl double rc;
set person;
rc = hi.first();
do while(rc=0);
search_term = cats('/',term,'/',c_options);
search_text = catx(' ',name,dept);
c = prxmatch(search_term, search_text);
output;
rc = hi.next();
end;
end;
enddata;
run;
quit;
proc print data=search_results;
run;
在 SAS 9.4M5 中,为正则表达式匹配添加了一个新包,DS2 PCRXFIND Package。下面是使用该包和自定义列表对象包来完成您的任务的示例。
proc ds2;
package prxElement/overwrite=yes;
declare package prxElement next;
declare int id;
declare package pcrxfind prx;
method prxElement( int id,
varchar(32767) character set utf8 regex);
this.id = id;
prx = _new_ pcrxfind(regex);
end;
endpackage;
run;
package prxList/overwrite=yes;
declare package prxElement head;
declare int count;
method prxList();
head = null; *null is an empty list;
count = 0;
end;
method last() returns package prxElement;
declare package prxElement item;
item = head;
do while ( ^null(item.next) );
item = item.next;
end;
return item;
end;
method add(package prxElement element);
if ( ^null(head) ) then do;
declare package prxElement joinTo;
joinTo=last();
joinTo.next = element;
count = count+1;
end;
else do;
head = element;
count = 1;
end;
end;
endpackage;
run;
data results(overwrite=yes);
declare double rc;
drop term;
declare package pcrxfind prx;
declare package prxElement item;
declare package prxList prxList;
method init();
declare int i;
prxList=_new_ prxList();
do i=1 to &termdim;
set searchterms;
item=_new_ prxElement(s_index,cats('/',term,'/i'));
prxList.add(item);
end;
end;
method run();
declare int i;
set person;
item = prxList.head;
do i=1 to prxList.count;
prx = item.prx;
s_index = item.id;
rc = (prx.Match(catx(' ',name,dept)))>0 ;
output;
item=item.next;
end;
end;
enddata;
run;
quit;
我想使用 PCRXFIND 而不是 prxmatch 来复制它以提高速度,因为 prxmatch 无法正常工作。
我正在尝试在数据文件中查找文本。这是一个可重现的例子。一个文件包含要搜索的数据,另一个文件包含搜索词。我想使用正则表达式。通常这是使用 prxmatch 完成的。我想用 PCRXFIND 来做,因为我在 DS2 内部工作。我使用哈希迭代器迭代搜索词或数据集中的每个观察值。
搜索结果不符合我能识别的任何模式。我很乐意提出任何建议。我将此发布到 sas 社区,但没有结果。
* ds2 PCRXFIND example;
data person;
input name $ dept $;
datalines;
John Sales
Mary Acctng
Joe Findme
Sue Hereiam
;
run;
data searchterms;
infile datalines missover;
input s_index $ term $;
datalines;
1 Hereiam
2 Findme
3 Acc
;
run;
proc contents data=searchterms; run;
proc print data=searchterms; run;
proc ds2;
data search_results (overwrite=yes);
dcl double rc c ;
declare char(8) s_index;
declare char(8) term;
declare char(11) name dept;
declare char(1) c_options;
declare char(20) search_term search_text;
dcl package hash h(1, '{select s_index, term from searchterms}');
dcl package hiter hi('h');
method init();
c_options = 'i';
rc = h.defineKey('s_index');
rc = h.defineData('term');
rc = h.defineDone();
end;
method run();
dcl double rc;
set {select name, dept from person};
rc = hi.first();
do while(rc=0);
c = prxmatch('/'||compress(term)||'/i',name||' '||dept);
search_term = '/'||compress(term)||'/i';
search_text = name||' '||dept;
rc = hi.next();
output;
end;
end;
enddata;
run;
quit;
我没用过 DS2,所以不太了解那里的代码,但您可以简单地在 proc sql 的连接条件下进行。类似下面
proc sql;
create table want as
select distinct t1.*
from person T1
INNER JOIN searchterms t2
ON find(t1.dept,trim(t2.term) ) > 0;
这看起来工作正常。我不是 100% 确定你的为什么不起作用,但通常当你对周围的空间不够小心时,会发生一些并不总是有意义的事情。
我使用 CATS
和 CATX
进行连接,这似乎是处理字符串组合最有用的方法。我还改变了输出,并使用了 c_options.
data person;
input name $ dept $;
datalines;
John Sales
Mary Acctng
Joe Findme
Sue Hereiam
;
run;
data searchterms;
infile datalines missover;
input s_index $ term $;
datalines;
1 Hereiam
2 Findme
3 Acc
;
run;
proc contents data=searchterms; run;
proc print data=searchterms; run;
proc ds2;
data search_results (overwrite=yes);
dcl double rc c ;
declare char(8) s_index;
declare char(8) term;
declare char(11) name dept;
declare char(1) c_options;
declare char(20) search_term search_text;
dcl package hash h(1, 'searchterms');
dcl package hiter hi('h');
method init();
c_options = 'i';
rc = h.defineKey('s_index');
rc = h.defineData('term');
rc = h.defineDone();
end;
method run();
dcl double rc;
set person;
rc = hi.first();
do while(rc=0);
search_term = cats('/',term,'/',c_options);
search_text = catx(' ',name,dept);
c = prxmatch(search_term, search_text);
output;
rc = hi.next();
end;
end;
enddata;
run;
quit;
proc print data=search_results;
run;
在 SAS 9.4M5 中,为正则表达式匹配添加了一个新包,DS2 PCRXFIND Package。下面是使用该包和自定义列表对象包来完成您的任务的示例。
proc ds2;
package prxElement/overwrite=yes;
declare package prxElement next;
declare int id;
declare package pcrxfind prx;
method prxElement( int id,
varchar(32767) character set utf8 regex);
this.id = id;
prx = _new_ pcrxfind(regex);
end;
endpackage;
run;
package prxList/overwrite=yes;
declare package prxElement head;
declare int count;
method prxList();
head = null; *null is an empty list;
count = 0;
end;
method last() returns package prxElement;
declare package prxElement item;
item = head;
do while ( ^null(item.next) );
item = item.next;
end;
return item;
end;
method add(package prxElement element);
if ( ^null(head) ) then do;
declare package prxElement joinTo;
joinTo=last();
joinTo.next = element;
count = count+1;
end;
else do;
head = element;
count = 1;
end;
end;
endpackage;
run;
data results(overwrite=yes);
declare double rc;
drop term;
declare package pcrxfind prx;
declare package prxElement item;
declare package prxList prxList;
method init();
declare int i;
prxList=_new_ prxList();
do i=1 to &termdim;
set searchterms;
item=_new_ prxElement(s_index,cats('/',term,'/i'));
prxList.add(item);
end;
end;
method run();
declare int i;
set person;
item = prxList.head;
do i=1 to prxList.count;
prx = item.prx;
s_index = item.id;
rc = (prx.Match(catx(' ',name,dept)))>0 ;
output;
item=item.next;
end;
end;
enddata;
run;
quit;