无法在 UDF 标量中评估雪花不受支持的子查询类型

Snowflake Unsupported subquery type cannot be evaluated in UDF scalar

我正在尝试创建一个函数,它将电子邮件数组作为输入,returns 电子邮件中的哈希用户名数组。 为此,我创建了这个 UDF:

CREATE OR REPLACE FUNCTION pseudonymize_email(email ARRAY) RETURNS ARRAY
LANGUAGE SQL STRICT IMMUTABLE
AS $$
    SELECT array_agg(regexp_replace(value,'.+\@', concat(hash(value), '@'))) as email
    FROM LATERAL flatten(input => email) as f
$$;

下面的例子可以正常运行

SELECT array_agg(regexp_replace(value,'.+\@', concat(hash(value), '@'))) as email 
FROM LATERAL flatten(input => array_construct('toto@gmail.com', 'hello@yahoo.com')) as f

这 returns 恰好是一列和一个值。

但是,当在正常的 select 语句中使用查询时,例如

WITH test_table(col1, col2) AS (
    SELECT 1, array_construct('toto@gmail.com', 'hello@yahoo.com')
)

SELECT
    col1,
    col2,
    pseudonymize_email(col2) as hashed_emails
FROM test_table

我收到以下错误:无法计算不支持的子查询类型

知道如何解决这个问题吗?

因此,鉴于您不能按您创建的性质对每行进行函数调用,一种选择是将其推送到 CTE 中,或者只将 email_hash 的正文作为您的 select .但通常当人们试图这样做时,他们试图隐藏复杂性或重用逻辑。

WITH test_table(col1, col2) AS (
    SELECT 1, array_construct('toto@gmail.com', 'hello@yahoo.com')
), email_hash AS (
    SELECT col1
        ,col2
        ,array_agg(regexp_replace(f.value,'.+\@', concat(hash(f.value), '@'))) as email
    FROM test_table AS t,
        TABLE(FLATTEN(input => t.col2)) f
    GROUP BY 1,2
)
SELECT
    col1,
    col2,
    email as hashed_emails
FROM email_hash

使用 SQL UDF,有时 Snowflake 会尝试内联它们但没有成功。

另一种方法是编写 JavaScript UDF - 在这种情况下效果很好:

CREATE OR REPLACE FUNCTION pseudonymize_email_JS(email ARRAY) RETURNS ARRAY
LANGUAGE JAVASCRIPT STRICT IMMUTABLE
AS $$

// 
// TODO: optimize by creating only once per VM
function md5(inputString) {
    var hc="0123456789abcdef";
    function rh(n) {var j,s="";for(j=0;j<=3;j++) s+=hc.charAt((n>>(j*8+4))&0x0F)+hc.charAt((n>>(j*8))&0x0F);return s;}
    function ad(x,y) {var l=(x&0xFFFF)+(y&0xFFFF);var m=(x>>16)+(y>>16)+(l>>16);return (m<<16)|(l&0xFFFF);}
    function rl(n,c)            {return (n<<c)|(n>>>(32-c));}
    function cm(q,a,b,x,s,t)    {return ad(rl(ad(ad(a,q),ad(x,t)),s),b);}
    function ff(a,b,c,d,x,s,t)  {return cm((b&c)|((~b)&d),a,b,x,s,t);}
    function gg(a,b,c,d,x,s,t)  {return cm((b&d)|(c&(~d)),a,b,x,s,t);}
    function hh(a,b,c,d,x,s,t)  {return cm(b^c^d,a,b,x,s,t);}
    function ii(a,b,c,d,x,s,t)  {return cm(c^(b|(~d)),a,b,x,s,t);}
    function sb(x) {
        var i;var nblk=((x.length+8)>>6)+1;var blks=new Array(nblk*16);for(i=0;i<nblk*16;i++) blks[i]=0;
        for(i=0;i<x.length;i++) blks[i>>2]|=x.charCodeAt(i)<<((i%4)*8);
        blks[i>>2]|=0x80<<((i%4)*8);blks[nblk*16-2]=x.length*8;return blks;
    }
    var i,x=sb(inputString),a=1732584193,b=-271733879,c=-1732584194,d=271733878,olda,oldb,oldc,oldd;
    for(i=0;i<x.length;i+=16) {olda=a;oldb=b;oldc=c;oldd=d;
        a=ff(a,b,c,d,x[i+ 0], 7, -680876936);d=ff(d,a,b,c,x[i+ 1],12, -389564586);c=ff(c,d,a,b,x[i+ 2],17,  606105819);
        b=ff(b,c,d,a,x[i+ 3],22,-1044525330);a=ff(a,b,c,d,x[i+ 4], 7, -176418897);d=ff(d,a,b,c,x[i+ 5],12, 1200080426);
        c=ff(c,d,a,b,x[i+ 6],17,-1473231341);b=ff(b,c,d,a,x[i+ 7],22,  -45705983);a=ff(a,b,c,d,x[i+ 8], 7, 1770035416);
        d=ff(d,a,b,c,x[i+ 9],12,-1958414417);c=ff(c,d,a,b,x[i+10],17,     -42063);b=ff(b,c,d,a,x[i+11],22,-1990404162);
        a=ff(a,b,c,d,x[i+12], 7, 1804603682);d=ff(d,a,b,c,x[i+13],12,  -40341101);c=ff(c,d,a,b,x[i+14],17,-1502002290);
        b=ff(b,c,d,a,x[i+15],22, 1236535329);a=gg(a,b,c,d,x[i+ 1], 5, -165796510);d=gg(d,a,b,c,x[i+ 6], 9,-1069501632);
        c=gg(c,d,a,b,x[i+11],14,  643717713);b=gg(b,c,d,a,x[i+ 0],20, -373897302);a=gg(a,b,c,d,x[i+ 5], 5, -701558691);
        d=gg(d,a,b,c,x[i+10], 9,   38016083);c=gg(c,d,a,b,x[i+15],14, -660478335);b=gg(b,c,d,a,x[i+ 4],20, -405537848);
        a=gg(a,b,c,d,x[i+ 9], 5,  568446438);d=gg(d,a,b,c,x[i+14], 9,-1019803690);c=gg(c,d,a,b,x[i+ 3],14, -187363961);
        b=gg(b,c,d,a,x[i+ 8],20, 1163531501);a=gg(a,b,c,d,x[i+13], 5,-1444681467);d=gg(d,a,b,c,x[i+ 2], 9,  -51403784);
        c=gg(c,d,a,b,x[i+ 7],14, 1735328473);b=gg(b,c,d,a,x[i+12],20,-1926607734);a=hh(a,b,c,d,x[i+ 5], 4,    -378558);
        d=hh(d,a,b,c,x[i+ 8],11,-2022574463);c=hh(c,d,a,b,x[i+11],16, 1839030562);b=hh(b,c,d,a,x[i+14],23,  -35309556);
        a=hh(a,b,c,d,x[i+ 1], 4,-1530992060);d=hh(d,a,b,c,x[i+ 4],11, 1272893353);c=hh(c,d,a,b,x[i+ 7],16, -155497632);
        b=hh(b,c,d,a,x[i+10],23,-1094730640);a=hh(a,b,c,d,x[i+13], 4,  681279174);d=hh(d,a,b,c,x[i+ 0],11, -358537222);
        c=hh(c,d,a,b,x[i+ 3],16, -722521979);b=hh(b,c,d,a,x[i+ 6],23,   76029189);a=hh(a,b,c,d,x[i+ 9], 4, -640364487);
        d=hh(d,a,b,c,x[i+12],11, -421815835);c=hh(c,d,a,b,x[i+15],16,  530742520);b=hh(b,c,d,a,x[i+ 2],23, -995338651);
        a=ii(a,b,c,d,x[i+ 0], 6, -198630844);d=ii(d,a,b,c,x[i+ 7],10, 1126891415);c=ii(c,d,a,b,x[i+14],15,-1416354905);
        b=ii(b,c,d,a,x[i+ 5],21,  -57434055);a=ii(a,b,c,d,x[i+12], 6, 1700485571);d=ii(d,a,b,c,x[i+ 3],10,-1894986606);
        c=ii(c,d,a,b,x[i+10],15,   -1051523);b=ii(b,c,d,a,x[i+ 1],21,-2054922799);a=ii(a,b,c,d,x[i+ 8], 6, 1873313359);
        d=ii(d,a,b,c,x[i+15],10,  -30611744);c=ii(c,d,a,b,x[i+ 6],15,-1560198380);b=ii(b,c,d,a,x[i+13],21, 1309151649);
        a=ii(a,b,c,d,x[i+ 4], 6, -145523070);d=ii(d,a,b,c,x[i+11],10,-1120210379);c=ii(c,d,a,b,x[i+ 2],15,  718787259);
        b=ii(b,c,d,a,x[i+ 9],21, -343485551);a=ad(a,olda);b=ad(b,oldb);c=ad(c,oldc);d=ad(d,oldd);
    }
    return rh(a)+rh(b)+rh(c)+rh(d);
}

return EMAIL.map(function(x) {
    var regex = /(.*)@/;
    var base = regex.exec(x)[1];
    return x.replace(regex, md5(base) + '@')
});
$$;




WITH test_table(col1, col2) AS (
    SELECT 1, array_construct('toto@gmail.com', 'hello@yahoo.com')
)

SELECT
    col1,
    col2,
    pseudonymize_email_js(col2) as hashed_emails
FROM test_table;
COL1 COL2 HASHED_EMAILS
1 [ "toto@gmail.com", "hello@yahoo.com" ] [ "f71dbe52628a3f83a77ab494817525c6@gmail.com", "5d41402abc4b2a76b9719d911017c592@yahoo.com" ]