如何获取数组元素的类型?
How do I get the type of an array's elements?
我正在编写一个 多态 PL/pgSQL 函数来遍历数组。我对使用 FOREACH
很感兴趣,但是我不知道如何声明具有正确类型的临时变量。
下面是我的函数,更多信息请看第4行的评论
CREATE OR REPLACE FUNCTION uniq(ary anyarray) RETURNS anyarray AS $$
DECLARE
ret ary%TYPE := '{}';
v ???; -- how do I get the element type of @ary@?
BEGIN
IF ary IS NULL THEN
return NULL;
END IF;
FOREACH v IN ARRAY ary LOOP
IF NOT v = any(ret) THEN
ret = array_append(ret, v);
END IF;
END LOOP;
RETURN ret;
END;
$$ LANGUAGE plpgsql;
我不知道如何声明一个 anyarray
参数的基本类型的变量(the documentation 没有提到这种可能性)。
您可以将 FOR LOOP
与整数变量一起使用:
CREATE OR REPLACE FUNCTION uniq(ary anyarray) RETURNS anyarray AS $$
DECLARE
ret ary%TYPE := '{}';
i int;
BEGIN
IF ary IS NULL THEN
return NULL;
END IF;
FOR i IN array_lower(ary, 1) .. array_upper(ary, 1) LOOP
IF NOT ary[i] = any(ret) THEN
ret = array_append(ret, ary[i]);
END IF;
END LOOP;
RETURN ret;
END;
$$ LANGUAGE plpgsql;
然而,循环和变量可能不是必需的:
create or replace function uniq_without_loop(arr anyarray)
returns anyarray language plpgsql as $$
begin
return (
select array_agg(distinct elem)
from unnest(arr) elem);
end $$;
保持数组顺序不变的上述函数版本:
create or replace function unsorted_uniq_without_loop(arr anyarray)
returns anyarray language plpgsql as $$
begin
return (
select array_agg(elem)
from (
select elem
from (
select distinct on(elem) elem, row_number() over ()
from unnest(array[arr]) elem
) sub
order by row_number
) sub);
end $$;
主要问题的答案
AFAIK,如果没有 "template" 变量或参数,您不能声明多态类型的变量 。
Declaring Function Parameters章末手册中有相关示例,但此技巧未涵盖: 在函数定义中添加数据类型为 ANYELEMENT
的另一个 IN
、INOUT
或 OUT
参数。它自动解析为匹配的元素类型,并且可以直接(ab)用作函数体内的变量或用作更多变量的模板:
CREATE OR REPLACE FUNCTION uniq1(ary ANYARRAY, <b>v ANYELEMENT = NULL</b>)
RETURNS anyarray AS
$func$
DECLARE
ret ary%TYPE := '{}';
<b>some_var v%TYPE; -- we could declare more variables now
-- but we don't need to</b>
BEGIN
IF ary IS NULL THEN
RETURN NULL;
END IF;
FOREACH <b>v</b> IN ARRAY ary LOOP <b>-- instead, we can use v directly</b>
IF NOT v = any(ret) THEN
ret := array_append(ret, v);
END IF;
END LOOP;
RETURN ret;
END
$func$ LANGUAGE plpgsql;
相关:
- Can I make a plpgsql function return an integer without using a variable?
这样的复制类型只适用于 DECLARE
部分,并且是不同的类型转换。 It is explained in the manual here.
分配一个默认值,所以添加的参数不必包含在函数调用中:ANYELEMENT
= NULL
调用(不变):
SELECT uniq1('{1,2,1}'::int[]);
SELECT uniq1('{foo,bar,bar}'::text[]);
更好的功能
为了方便,我实际上会使用 OUT 参数并反转测试逻辑:
CREATE OR REPLACE FUNCTION uniq2(ary ANYARRAY, elem ANYELEMENT = NULL
, OUT ret ANYARRAY)
RETURNS anyarray AS
$func$
BEGIN
IF ary IS NULL
THEN RETURN;
ELSE ret := '{}'; -- init
END IF;
FOREACH elem IN ARRAY ary LOOP
IF elem = ANY(ret) THEN -- do nothing
ELSE
ret := array_append(ret, elem);
END IF;
END LOOP;
END
$func$ LANGUAGE plpgsql;
但这仍然没有涵盖所有包含 NULL 元素的情况。
功能正常
也适用于 NULL 元素:
CREATE OR REPLACE FUNCTION uniq3(ary ANYARRAY, elem ANYELEMENT = NULL
, OUT ret ANYARRAY)
RETURNS anyarray AS
$func$
BEGIN
IF ary IS NULL
THEN RETURN;
ELSE ret := '{}'; -- init
END IF;
FOREACH elem IN ARRAY ary LOOP
IF elem IS NULL THEN -- special test for NULL
IF array_length(array_remove(ret, NULL), 1) = array_length(ret, 1) THEN
ret := array_append(ret, NULL);
END IF;
ELSIF elem = ANY(ret) THEN -- do nothing
ELSE
ret := array_append(ret, elem);
END IF;
END LOOP;
END
$func$ LANGUAGE plpgsql;
检查数组中的 NULL 有点麻烦:
- How to determine if NULL is contained in an array in Postgres?
所有这些功能只是概念验证。我不会使用 。相反:
具有简单 SQL
的高级解决方案
在 Postgres 9.4 中使用 WITH ORDINALITY
来保留元素的原始顺序。
详细解释:
- PostgreSQL unnest() with element number
单值基本代码:
SELECT ARRAY (
SELECT elem
FROM (
SELECT DISTINCT ON (elem) elem, i
FROM unnest('{1,2,1,NULL,4,NULL}'::int[]) WITH ORDINALITY u(elem, i)
ORDER BY elem, i
) sub
ORDER BY i) AS uniq;
Returns:
uniq
------------
{1,2,NULL,4}
关于DISTINCT ON
:
- Select first row in each GROUP BY group?
内置查询:
SELECT *
FROM test t
, LATERAL (
SELECT ARRAY (
SELECT elem
FROM (
SELECT DISTINCT ON (elem) elem, i
FROM unnest(t.arr) WITH ORDINALITY u(elem, i)
ORDER BY elem, i
) sub
ORDER BY i) AS arr
) a;
这有一个小的极端情况:它 returns 一个空数组 NULL 数组。覆盖所有基地:
SELECT t.*, CASE WHEN t.arr IS NULL THEN NULL ELSE a.arr END AS arr
FROM test t
, LATERAL (
SELECT ARRAY (
SELECT elem
FROM (
SELECT DISTINCT ON (elem) elem, ord
FROM unnest(t.arr) WITH ORDINALITY u(elem, ord)
ORDER BY elem, ord
) sub
ORDER BY ord) AS arr
) a;
或:
SELECT *
FROM test t
LEFT JOIN LATERAL (
SELECT ARRAY (
SELECT elem
FROM (
SELECT DISTINCT ON (elem) elem, i
FROM unnest(t.arr) WITH ORDINALITY u(elem, i)
ORDER BY elem, i
) sub
ORDER BY i) AS arr
) a ON t.arr IS NOT NULL;
在 Postgres 9.3 或更早版本中,您可以用 generate_subscripts()
:
代替
SELECT *
FROM test t
, LATERAL (
SELECT ARRAY (
SELECT elem
FROM (
SELECT DISTINCT ON (t.arr[i]) t.arr[i] AS elem, i
FROM generate_subscripts(t.arr, 1) i
ORDER BY t.arr[i], i
) sub
ORDER BY i
) AS arr
) a;
我们在sqlfiddle中需要这个,目前只支持pg 9.3,所以WITH ORDINALITY
不可用:
我正在编写一个 多态 PL/pgSQL 函数来遍历数组。我对使用 FOREACH
很感兴趣,但是我不知道如何声明具有正确类型的临时变量。
下面是我的函数,更多信息请看第4行的评论
CREATE OR REPLACE FUNCTION uniq(ary anyarray) RETURNS anyarray AS $$
DECLARE
ret ary%TYPE := '{}';
v ???; -- how do I get the element type of @ary@?
BEGIN
IF ary IS NULL THEN
return NULL;
END IF;
FOREACH v IN ARRAY ary LOOP
IF NOT v = any(ret) THEN
ret = array_append(ret, v);
END IF;
END LOOP;
RETURN ret;
END;
$$ LANGUAGE plpgsql;
我不知道如何声明一个 anyarray
参数的基本类型的变量(the documentation 没有提到这种可能性)。
您可以将 FOR LOOP
与整数变量一起使用:
CREATE OR REPLACE FUNCTION uniq(ary anyarray) RETURNS anyarray AS $$
DECLARE
ret ary%TYPE := '{}';
i int;
BEGIN
IF ary IS NULL THEN
return NULL;
END IF;
FOR i IN array_lower(ary, 1) .. array_upper(ary, 1) LOOP
IF NOT ary[i] = any(ret) THEN
ret = array_append(ret, ary[i]);
END IF;
END LOOP;
RETURN ret;
END;
$$ LANGUAGE plpgsql;
然而,循环和变量可能不是必需的:
create or replace function uniq_without_loop(arr anyarray)
returns anyarray language plpgsql as $$
begin
return (
select array_agg(distinct elem)
from unnest(arr) elem);
end $$;
保持数组顺序不变的上述函数版本:
create or replace function unsorted_uniq_without_loop(arr anyarray)
returns anyarray language plpgsql as $$
begin
return (
select array_agg(elem)
from (
select elem
from (
select distinct on(elem) elem, row_number() over ()
from unnest(array[arr]) elem
) sub
order by row_number
) sub);
end $$;
主要问题的答案
AFAIK,如果没有 "template" 变量或参数,您不能声明多态类型的变量 。
Declaring Function Parameters章末手册中有相关示例,但此技巧未涵盖: 在函数定义中添加数据类型为 ANYELEMENT
的另一个 IN
、INOUT
或 OUT
参数。它自动解析为匹配的元素类型,并且可以直接(ab)用作函数体内的变量或用作更多变量的模板:
CREATE OR REPLACE FUNCTION uniq1(ary ANYARRAY, <b>v ANYELEMENT = NULL</b>)
RETURNS anyarray AS
$func$
DECLARE
ret ary%TYPE := '{}';
<b>some_var v%TYPE; -- we could declare more variables now
-- but we don't need to</b>
BEGIN
IF ary IS NULL THEN
RETURN NULL;
END IF;
FOREACH <b>v</b> IN ARRAY ary LOOP <b>-- instead, we can use v directly</b>
IF NOT v = any(ret) THEN
ret := array_append(ret, v);
END IF;
END LOOP;
RETURN ret;
END
$func$ LANGUAGE plpgsql;
相关:
- Can I make a plpgsql function return an integer without using a variable?
这样的复制类型只适用于 DECLARE
部分,并且是不同的类型转换。 It is explained in the manual here.
分配一个默认值,所以添加的参数不必包含在函数调用中:ANYELEMENT
= NULL
调用(不变):
SELECT uniq1('{1,2,1}'::int[]);
SELECT uniq1('{foo,bar,bar}'::text[]);
更好的功能
为了方便,我实际上会使用 OUT 参数并反转测试逻辑:
CREATE OR REPLACE FUNCTION uniq2(ary ANYARRAY, elem ANYELEMENT = NULL
, OUT ret ANYARRAY)
RETURNS anyarray AS
$func$
BEGIN
IF ary IS NULL
THEN RETURN;
ELSE ret := '{}'; -- init
END IF;
FOREACH elem IN ARRAY ary LOOP
IF elem = ANY(ret) THEN -- do nothing
ELSE
ret := array_append(ret, elem);
END IF;
END LOOP;
END
$func$ LANGUAGE plpgsql;
但这仍然没有涵盖所有包含 NULL 元素的情况。
功能正常
也适用于 NULL 元素:
CREATE OR REPLACE FUNCTION uniq3(ary ANYARRAY, elem ANYELEMENT = NULL
, OUT ret ANYARRAY)
RETURNS anyarray AS
$func$
BEGIN
IF ary IS NULL
THEN RETURN;
ELSE ret := '{}'; -- init
END IF;
FOREACH elem IN ARRAY ary LOOP
IF elem IS NULL THEN -- special test for NULL
IF array_length(array_remove(ret, NULL), 1) = array_length(ret, 1) THEN
ret := array_append(ret, NULL);
END IF;
ELSIF elem = ANY(ret) THEN -- do nothing
ELSE
ret := array_append(ret, elem);
END IF;
END LOOP;
END
$func$ LANGUAGE plpgsql;
检查数组中的 NULL 有点麻烦:
- How to determine if NULL is contained in an array in Postgres?
所有这些功能只是概念验证。我不会使用 。相反:
具有简单 SQL
的高级解决方案在 Postgres 9.4 中使用 WITH ORDINALITY
来保留元素的原始顺序。
详细解释:
- PostgreSQL unnest() with element number
单值基本代码:
SELECT ARRAY (
SELECT elem
FROM (
SELECT DISTINCT ON (elem) elem, i
FROM unnest('{1,2,1,NULL,4,NULL}'::int[]) WITH ORDINALITY u(elem, i)
ORDER BY elem, i
) sub
ORDER BY i) AS uniq;
Returns:
uniq
------------
{1,2,NULL,4}
关于DISTINCT ON
:
- Select first row in each GROUP BY group?
内置查询:
SELECT *
FROM test t
, LATERAL (
SELECT ARRAY (
SELECT elem
FROM (
SELECT DISTINCT ON (elem) elem, i
FROM unnest(t.arr) WITH ORDINALITY u(elem, i)
ORDER BY elem, i
) sub
ORDER BY i) AS arr
) a;
这有一个小的极端情况:它 returns 一个空数组 NULL 数组。覆盖所有基地:
SELECT t.*, CASE WHEN t.arr IS NULL THEN NULL ELSE a.arr END AS arr
FROM test t
, LATERAL (
SELECT ARRAY (
SELECT elem
FROM (
SELECT DISTINCT ON (elem) elem, ord
FROM unnest(t.arr) WITH ORDINALITY u(elem, ord)
ORDER BY elem, ord
) sub
ORDER BY ord) AS arr
) a;
或:
SELECT *
FROM test t
LEFT JOIN LATERAL (
SELECT ARRAY (
SELECT elem
FROM (
SELECT DISTINCT ON (elem) elem, i
FROM unnest(t.arr) WITH ORDINALITY u(elem, i)
ORDER BY elem, i
) sub
ORDER BY i) AS arr
) a ON t.arr IS NOT NULL;
在 Postgres 9.3 或更早版本中,您可以用 generate_subscripts()
:
SELECT *
FROM test t
, LATERAL (
SELECT ARRAY (
SELECT elem
FROM (
SELECT DISTINCT ON (t.arr[i]) t.arr[i] AS elem, i
FROM generate_subscripts(t.arr, 1) i
ORDER BY t.arr[i], i
) sub
ORDER BY i
) AS arr
) a;
我们在sqlfiddle中需要这个,目前只支持pg 9.3,所以WITH ORDINALITY
不可用: