在 Vertica 中连接字符串的聚合函数
aggregate function to concatenate strings in Vertica
在vertica中有一个table:像这样测试:
ID | name
1 | AA
2 | AB
2 | AC
3 | AD
3 | AE
3 | AF
如何使用聚合函数或如何编写查询来获取这样的数据(vertica 语法)?
ID | ag
1 | AA
2 | AB, AC
3 | AD, AE, AF
首先,您需要为 agg_concatenate
编译 udx。
-- Shell commands
cd /opt/vertica/sdk/examples/AggregateFunctions/
g++ -D HAVE_LONG_INT_64 -I /opt/vertica/sdk/include -Wall -shared -Wno-unused-value -fPIC -o Concatenate.so Concatenate.cpp /opt/vertica/sdk/include/Vertica.cpp
-- vsql commands
CREATE LIBRARY AggregateFunctionsConcatenate AS '/opt/vertica/sdk/examples/AggregateFunctions/Concatenate.so';
CREATE AGGREGATE FUNCTION agg_concatenate AS LANGUAGE 'C++' NAME 'ConcatenateFactory' LIBRARY AggregateFunctionsConcatenate;
然后你可以这样查询:
select id, rtrim(agg_concatenate(name || ', '),', ') ag
from mytable
group by 1
order by 1
使用 rtrim 删除最后一个 ', '。
如果您需要以某种方式对聚合进行排序,您可能需要 select/sort 在内嵌视图中或使用 first。
SELECT id,
MAX(DECODE(row_number, 1, a.name)) ||
NVL(MAX(DECODE(row_number, 2, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 3, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 4, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 5, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 6, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 7, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 8, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 9, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 10, ',' || a.name)), '')||
NVL(MAX(DECODE(row_number, 11, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 12, ',' || a.name)), '') ag
FROM
(SELECT id, name, ROW_NUMBER() OVER(PARTITION BY name ORDER BY id) row_number FROM test) a
GROUP BY a.id
ORDER BY a.id;
另一种方法是使用 strings package on github 中的 GROUP_CONCAT
。
select id, group_concat(name) over (partition by id order by name) ag
from mytable
但是,此方法存在一些限制,因为分析 udx 不允许您包含其他聚合(并且您必须将其内联或使用 with 向其添加更多数据)。
在vertica中有一个table:像这样测试:
ID | name
1 | AA
2 | AB
2 | AC
3 | AD
3 | AE
3 | AF
如何使用聚合函数或如何编写查询来获取这样的数据(vertica 语法)?
ID | ag
1 | AA
2 | AB, AC
3 | AD, AE, AF
首先,您需要为 agg_concatenate
编译 udx。
-- Shell commands
cd /opt/vertica/sdk/examples/AggregateFunctions/
g++ -D HAVE_LONG_INT_64 -I /opt/vertica/sdk/include -Wall -shared -Wno-unused-value -fPIC -o Concatenate.so Concatenate.cpp /opt/vertica/sdk/include/Vertica.cpp
-- vsql commands
CREATE LIBRARY AggregateFunctionsConcatenate AS '/opt/vertica/sdk/examples/AggregateFunctions/Concatenate.so';
CREATE AGGREGATE FUNCTION agg_concatenate AS LANGUAGE 'C++' NAME 'ConcatenateFactory' LIBRARY AggregateFunctionsConcatenate;
然后你可以这样查询:
select id, rtrim(agg_concatenate(name || ', '),', ') ag
from mytable
group by 1
order by 1
使用 rtrim 删除最后一个 ', '。
如果您需要以某种方式对聚合进行排序,您可能需要 select/sort 在内嵌视图中或使用 first。
SELECT id,
MAX(DECODE(row_number, 1, a.name)) ||
NVL(MAX(DECODE(row_number, 2, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 3, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 4, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 5, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 6, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 7, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 8, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 9, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 10, ',' || a.name)), '')||
NVL(MAX(DECODE(row_number, 11, ',' || a.name)), '') ||
NVL(MAX(DECODE(row_number, 12, ',' || a.name)), '') ag
FROM
(SELECT id, name, ROW_NUMBER() OVER(PARTITION BY name ORDER BY id) row_number FROM test) a
GROUP BY a.id
ORDER BY a.id;
另一种方法是使用 strings package on github 中的 GROUP_CONCAT
。
select id, group_concat(name) over (partition by id order by name) ag
from mytable
但是,此方法存在一些限制,因为分析 udx 不允许您包含其他聚合(并且您必须将其内联或使用 with 向其添加更多数据)。