动态地将函数应用于 Postgres table 中的所有列
Apply function to all columns in a Postgres table dynamically
使用 Postgres 13.1,我想对 table 的所有列应用前向填充函数。前向填充功能在我之前的问题中有解释:
但是,在那种情况下,指定了列和 table。我想获取该代码并将其应用于任意 table,即。指定 table 并且前向填充应用于每一列。
以这个table为例:
CREATE TABLE example(row_num int, id int, str text, val integer);
INSERT INTO example VALUES
(1, 1, '1a', NULL)
, (2, 1, NULL, 1)
, (3, 2, '2a', 2)
, (4, 2, NULL, NULL)
, (5, 3, NULL, NULL)
, (6, 3, '3a', 31)
, (7, 3, NULL, NULL)
, (8, 3, NULL, 32)
, (9, 3, '3b', NULL)
, (10,3, NULL, NULL)
;
我从函数的以下工作基础开始。我称之为传递一些变量名。请注意,第一个是 table 名称而不是列名称。该函数采用 table 名称并创建所有列名称的数组,然后输出名称。
create or replace function col_collect(tbl text, id text, row_num text)
returns text[]
language plpgsql as
$func$
declare
tmp text[];
col text;
begin
select array (
select column_name
from information_schema."columns" c
where table_name = tbl
) into tmp;
foreach col in array tmp
loop
raise notice 'col: %', col;
end loop;
return tmp;
end
$func$;
我想将我从之前的问题中得到的“前向填充”函数应用到 table 的每一列。 UPDATE
似乎是正确的方法。所以这是前面的函数,我用 execute
更新替换 raise notice
这样我就可以传入 table 名称:
create or replace function col_collect(tbl text, id text, row_num text)
returns void
language plpgsql as
$func$
declare
tmp text[];
col text;
begin
select array (
select column_name
from information_schema."columns" c
where table_name = tbl
) into tmp;
foreach col in array tmp
loop
execute 'update '||tbl||'
set '||col||' = gapfill('||col||') OVER w AS '||col||'
where '||tbl||'.row_num = '||col||'.row_num
window w as (PARTITION BY '||id||' ORDER BY '||row_num||')
returning *;';
end loop;
end
$func$;
-- call the function
select col_collect('example','id','row_num')
前面的错误是语法错误。我已经尝试了很多变体,但都失败了。关于 SO 的有用答案是 here and 。我尝试应用的聚合函数(作为 window 函数)是:
CREATE OR REPLACE FUNCTION gap_fill_internal(s anyelement, v anyelement)
RETURNS anyelement
LANGUAGE plpgsql AS
$func$
BEGIN
RETURN COALESCE(v, s); -- that's all!
END
$func$;
CREATE AGGREGATE gap_fill(anyelement) (
SFUNC = gap_fill_internal,
STYPE = anyelement
);
我的问题是:
- 这是一个好方法吗?如果是,我做错了什么;或
- 有更好的方法吗?
你问的可不是小事。您应该对 PL/pgSQL 感到满意。我不建议初学者使用这种动态 SQL 查询,太强大了。
话虽如此,让我们开始吧。系好安全带!
CREATE OR REPLACE FUNCTION f_gap_fill_update(_tbl regclass, _id text, _row_num text, OUT nullable_columns int, OUT updated_rows int)
LANGUAGE plpgsql AS
$func$
DECLARE
_pk text := quote_ident(_row_num);
_sql text;
BEGIN
SELECT INTO _sql, nullable_columns
concat_ws(E'\n'
, 'UPDATE ' || _tbl || ' t'
, 'SET (' || string_agg( quote_ident(a.attname), ', ') || ')'
, ' = (' || string_agg('u.' || quote_ident(a.attname), ', ') || ')'
, 'FROM ('
, ' SELECT ' || _pk
, ' , ' || string_agg(format('gap_fill(%1$I) OVER w AS %1$I', a.attname), ', ')
, ' FROM ' || _tbl
, format(' WINDOW w AS (PARTITION BY %I ORDER BY %s)', _id, _pk)
, ' ) u'
, format('WHERE t.%1$s = u.%1$s', _pk)
, 'AND (' || string_agg('t.' || quote_ident(a.attname), ', ') || ') IS DISTINCT FROM'
, ' (' || string_agg('u.' || quote_ident(a.attname), ', ') || ')'
)
, count(*) -- AS _col_ct
FROM (
SELECT a.attname
FROM pg_attribute a
WHERE a.attrelid = _tbl
AND a.attnum > 0
AND NOT a.attisdropped
AND NOT a.attnotnull
ORDER BY a.attnum
) a;
IF nullable_columns = 0 THEN
RAISE EXCEPTION 'No nullable columns found in table >>%<<', _tbl;
ELSIF _sql IS NULL THEN
RAISE EXCEPTION 'SQL string is NULL. Should not occur!';
END IF;
-- RAISE NOTICE '%', _sql; -- debug
EXECUTE _sql; -- execute
GET DIAGNOSTICS updated_rows = ROW_COUNT;
END
$func$;
调用示例:
SELECT * FROM f_gap_fill_update('example', 'id', 'row_num');
db<>fiddle here
该功能是最先进的。
生成 和 执行以下形式的查询:
UPDATE tbl t
SET (str, val, col1)
= (u.str, u.val, u.col1)
FROM (
SELECT row_num
, gap_fill(str) OVER w AS str, gap_fill(val) OVER w AS val
, gap_fill(col1) OVER w AS col1
FROM tbl
WINDOW w AS (PARTITION BY id ORDER BY row_num)
) u
WHERE t.row_num = u.row_num
AND (t.str, t.val, t.col1) IS DISTINCT FROM
(u.str, u.val, u.col1)
使用 pg_catalog.pg_attribute
而不是信息架构。参见:
请注意最后的 WHERE
子句以防止(可能 昂贵)空更新。只会写入实际更改的行。参见:
- How do I (or can I) SELECT DISTINCT on multiple columns?
此外,甚至只会考虑可为空的列(未定义 NOT NULL
),以避免不必要的工作。
在 UPDATE
中使用 ROW
语法以保持代码简单。参见:
- SQL update fields of one table from fields of another one
函数 return 有两个整数值:nullable_columns
和 updated_rows
,报告顾名思义。
函数正确防御SQL注入。参见:
- Table name as a PostgreSQL function parameter
- SQL injection in Postgres functions vs prepared queries
- Calculate number of rows affected by batch query in PostgreSQL
以上函数更新,但不更新 return 行。这是一个基本演示,如何 return 不同类型的行:
CREATE OR REPLACE FUNCTION f_gap_fill_select(_tbl_type anyelement, _id text, _row_num text)
RETURNS SETOF anyelement
LANGUAGE plpgsql AS
$func$
DECLARE
_tbl regclass := pg_typeof(_tbl_type)::text::regclass;
_sql text;
BEGIN
SELECT INTO _sql
'SELECT ' || string_agg(CASE WHEN a.attnotnull
THEN format('%I', a.attname)
ELSE format('gap_fill(%1$I) OVER w AS %1$I', a.attname) END
, ', ' ORDER BY a.attnum)
|| E'\nFROM ' || _tbl
|| format(E'\nWINDOW w AS (PARTITION BY %I ORDER BY %I)', _id, _row_num)
FROM pg_attribute a
WHERE a.attrelid = _tbl
AND a.attnum > 0
AND NOT a.attisdropped;
IF _sql IS NULL THEN
RAISE EXCEPTION 'SQL string is NULL. Should not occur!';
END IF;
RETURN QUERY EXECUTE _sql;
-- RAISE NOTICE '%', _sql; -- debug
END
$func$;
调用(注意特殊语法!):
SELECT * FROM f_gap_fill_select(NULL::example, 'id', 'row_num');
db<>fiddle here
关于 return 多态行类型:
- Refactor a PL/pgSQL function to return the output of various SELECT queries
使用 Postgres 13.1,我想对 table 的所有列应用前向填充函数。前向填充功能在我之前的问题中有解释:
但是,在那种情况下,指定了列和 table。我想获取该代码并将其应用于任意 table,即。指定 table 并且前向填充应用于每一列。
以这个table为例:
CREATE TABLE example(row_num int, id int, str text, val integer);
INSERT INTO example VALUES
(1, 1, '1a', NULL)
, (2, 1, NULL, 1)
, (3, 2, '2a', 2)
, (4, 2, NULL, NULL)
, (5, 3, NULL, NULL)
, (6, 3, '3a', 31)
, (7, 3, NULL, NULL)
, (8, 3, NULL, 32)
, (9, 3, '3b', NULL)
, (10,3, NULL, NULL)
;
我从函数的以下工作基础开始。我称之为传递一些变量名。请注意,第一个是 table 名称而不是列名称。该函数采用 table 名称并创建所有列名称的数组,然后输出名称。
create or replace function col_collect(tbl text, id text, row_num text)
returns text[]
language plpgsql as
$func$
declare
tmp text[];
col text;
begin
select array (
select column_name
from information_schema."columns" c
where table_name = tbl
) into tmp;
foreach col in array tmp
loop
raise notice 'col: %', col;
end loop;
return tmp;
end
$func$;
我想将我从之前的问题中得到的“前向填充”函数应用到 table 的每一列。 UPDATE
似乎是正确的方法。所以这是前面的函数,我用 execute
更新替换 raise notice
这样我就可以传入 table 名称:
create or replace function col_collect(tbl text, id text, row_num text)
returns void
language plpgsql as
$func$
declare
tmp text[];
col text;
begin
select array (
select column_name
from information_schema."columns" c
where table_name = tbl
) into tmp;
foreach col in array tmp
loop
execute 'update '||tbl||'
set '||col||' = gapfill('||col||') OVER w AS '||col||'
where '||tbl||'.row_num = '||col||'.row_num
window w as (PARTITION BY '||id||' ORDER BY '||row_num||')
returning *;';
end loop;
end
$func$;
-- call the function
select col_collect('example','id','row_num')
前面的错误是语法错误。我已经尝试了很多变体,但都失败了。关于 SO 的有用答案是 here and
CREATE OR REPLACE FUNCTION gap_fill_internal(s anyelement, v anyelement)
RETURNS anyelement
LANGUAGE plpgsql AS
$func$
BEGIN
RETURN COALESCE(v, s); -- that's all!
END
$func$;
CREATE AGGREGATE gap_fill(anyelement) (
SFUNC = gap_fill_internal,
STYPE = anyelement
);
我的问题是:
- 这是一个好方法吗?如果是,我做错了什么;或
- 有更好的方法吗?
你问的可不是小事。您应该对 PL/pgSQL 感到满意。我不建议初学者使用这种动态 SQL 查询,太强大了。
话虽如此,让我们开始吧。系好安全带!
CREATE OR REPLACE FUNCTION f_gap_fill_update(_tbl regclass, _id text, _row_num text, OUT nullable_columns int, OUT updated_rows int)
LANGUAGE plpgsql AS
$func$
DECLARE
_pk text := quote_ident(_row_num);
_sql text;
BEGIN
SELECT INTO _sql, nullable_columns
concat_ws(E'\n'
, 'UPDATE ' || _tbl || ' t'
, 'SET (' || string_agg( quote_ident(a.attname), ', ') || ')'
, ' = (' || string_agg('u.' || quote_ident(a.attname), ', ') || ')'
, 'FROM ('
, ' SELECT ' || _pk
, ' , ' || string_agg(format('gap_fill(%1$I) OVER w AS %1$I', a.attname), ', ')
, ' FROM ' || _tbl
, format(' WINDOW w AS (PARTITION BY %I ORDER BY %s)', _id, _pk)
, ' ) u'
, format('WHERE t.%1$s = u.%1$s', _pk)
, 'AND (' || string_agg('t.' || quote_ident(a.attname), ', ') || ') IS DISTINCT FROM'
, ' (' || string_agg('u.' || quote_ident(a.attname), ', ') || ')'
)
, count(*) -- AS _col_ct
FROM (
SELECT a.attname
FROM pg_attribute a
WHERE a.attrelid = _tbl
AND a.attnum > 0
AND NOT a.attisdropped
AND NOT a.attnotnull
ORDER BY a.attnum
) a;
IF nullable_columns = 0 THEN
RAISE EXCEPTION 'No nullable columns found in table >>%<<', _tbl;
ELSIF _sql IS NULL THEN
RAISE EXCEPTION 'SQL string is NULL. Should not occur!';
END IF;
-- RAISE NOTICE '%', _sql; -- debug
EXECUTE _sql; -- execute
GET DIAGNOSTICS updated_rows = ROW_COUNT;
END
$func$;
调用示例:
SELECT * FROM f_gap_fill_update('example', 'id', 'row_num');
db<>fiddle here
该功能是最先进的。 生成 和 执行以下形式的查询:
UPDATE tbl t
SET (str, val, col1)
= (u.str, u.val, u.col1)
FROM (
SELECT row_num
, gap_fill(str) OVER w AS str, gap_fill(val) OVER w AS val
, gap_fill(col1) OVER w AS col1
FROM tbl
WINDOW w AS (PARTITION BY id ORDER BY row_num)
) u
WHERE t.row_num = u.row_num
AND (t.str, t.val, t.col1) IS DISTINCT FROM
(u.str, u.val, u.col1)
使用 pg_catalog.pg_attribute
而不是信息架构。参见:
请注意最后的 WHERE
子句以防止(可能 昂贵)空更新。只会写入实际更改的行。参见:
- How do I (or can I) SELECT DISTINCT on multiple columns?
此外,甚至只会考虑可为空的列(未定义 NOT NULL
),以避免不必要的工作。
在 UPDATE
中使用 ROW
语法以保持代码简单。参见:
- SQL update fields of one table from fields of another one
函数 return 有两个整数值:nullable_columns
和 updated_rows
,报告顾名思义。
函数正确防御SQL注入。参见:
- Table name as a PostgreSQL function parameter
- SQL injection in Postgres functions vs prepared queries
- Calculate number of rows affected by batch query in PostgreSQL
以上函数更新,但不更新 return 行。这是一个基本演示,如何 return 不同类型的行:
CREATE OR REPLACE FUNCTION f_gap_fill_select(_tbl_type anyelement, _id text, _row_num text)
RETURNS SETOF anyelement
LANGUAGE plpgsql AS
$func$
DECLARE
_tbl regclass := pg_typeof(_tbl_type)::text::regclass;
_sql text;
BEGIN
SELECT INTO _sql
'SELECT ' || string_agg(CASE WHEN a.attnotnull
THEN format('%I', a.attname)
ELSE format('gap_fill(%1$I) OVER w AS %1$I', a.attname) END
, ', ' ORDER BY a.attnum)
|| E'\nFROM ' || _tbl
|| format(E'\nWINDOW w AS (PARTITION BY %I ORDER BY %I)', _id, _row_num)
FROM pg_attribute a
WHERE a.attrelid = _tbl
AND a.attnum > 0
AND NOT a.attisdropped;
IF _sql IS NULL THEN
RAISE EXCEPTION 'SQL string is NULL. Should not occur!';
END IF;
RETURN QUERY EXECUTE _sql;
-- RAISE NOTICE '%', _sql; -- debug
END
$func$;
调用(注意特殊语法!):
SELECT * FROM f_gap_fill_select(NULL::example, 'id', 'row_num');
db<>fiddle here
关于 return 多态行类型:
- Refactor a PL/pgSQL function to return the output of various SELECT queries