Postgresql 按共同的数组元素分组
Postgresql Group by array elements in common
我有一个 table 这样的:
CREATE TABLE preferences (name varchar, preferences varchar[]);
INSERT INTO preferences (name, preferences)
VALUES
('John','{pizza, spaghetti}'),
('Charlie','{spaghetti, rice}'),
('Lucy','{rice, potatoes}'),
('Beth','{bread, cheese}'),
('Trudy','{rice, milk}');
所以从 table
John {pizza, spaghetti}
Charlie {spaghetti, rice}
Lucy {rice, potatoes}
Beth {bread, cheese}
Trudy {rice, milk}
我想将所有具有共同元素的行分组(即使是通过其他人)。
所以在这种情况下,我想结束:
{John,Charlie,Lucy,Trudy} {pizza,spaghetti,rice,potatoes,milk}
{Beth} {bread, cheese}
因为约翰的偏好与查理的偏好相交,而查理的偏好与露西和特鲁迪的偏好相交。
我已经有了这样的 array_intersection 函数:
CREATE OR REPLACE FUNCTION array_intersection(anyarray, anyarray)
RETURNS anyarray
language sql
as $FUNCTION$
SELECT ARRAY(
SELECT UNNEST()
INTERSECT
SELECT UNNEST()
);
$FUNCTION$;
并且知道 array_agg 聚合数组的函数,但是如何将它们变成我想要的分组是我缺少的步骤。
这是一个典型的递归任务。你需要一个辅助函数来合并和排序两个数组:
create or replace function public.array_merge(arr1 anyarray, arr2 anyarray)
returns anyarray
language sql immutable
as $function$
select array_agg(distinct elem order by elem)
from (
select unnest(arr1) elem
union
select unnest(arr2)
) s
$function$;
在递归查询中使用函数:
with recursive cte(name, preferences) as (
select *
from preferences
union
select p.name, array_merge(c.preferences, p.preferences)
from cte c
join preferences p
on c.preferences && p.preferences
and c.name <> p.name
)
select array_agg(name) as names, preferences
from (
select distinct on(name) *
from cte
order by name, cardinality(preferences) desc
) s
group by preferences;
names | preferences
---------------------------+--------------------------------------
{Charlie,John,Lucy,Trudy} | {milk,pizza,potatoes,rice,spaghetti}
{Beth} | {bread,cheese}
(2 rows)
我有一个 table 这样的:
CREATE TABLE preferences (name varchar, preferences varchar[]);
INSERT INTO preferences (name, preferences)
VALUES
('John','{pizza, spaghetti}'),
('Charlie','{spaghetti, rice}'),
('Lucy','{rice, potatoes}'),
('Beth','{bread, cheese}'),
('Trudy','{rice, milk}');
所以从 table
John {pizza, spaghetti}
Charlie {spaghetti, rice}
Lucy {rice, potatoes}
Beth {bread, cheese}
Trudy {rice, milk}
我想将所有具有共同元素的行分组(即使是通过其他人)。 所以在这种情况下,我想结束:
{John,Charlie,Lucy,Trudy} {pizza,spaghetti,rice,potatoes,milk}
{Beth} {bread, cheese}
因为约翰的偏好与查理的偏好相交,而查理的偏好与露西和特鲁迪的偏好相交。
我已经有了这样的 array_intersection 函数:
CREATE OR REPLACE FUNCTION array_intersection(anyarray, anyarray)
RETURNS anyarray
language sql
as $FUNCTION$
SELECT ARRAY(
SELECT UNNEST()
INTERSECT
SELECT UNNEST()
);
$FUNCTION$;
并且知道 array_agg 聚合数组的函数,但是如何将它们变成我想要的分组是我缺少的步骤。
这是一个典型的递归任务。你需要一个辅助函数来合并和排序两个数组:
create or replace function public.array_merge(arr1 anyarray, arr2 anyarray)
returns anyarray
language sql immutable
as $function$
select array_agg(distinct elem order by elem)
from (
select unnest(arr1) elem
union
select unnest(arr2)
) s
$function$;
在递归查询中使用函数:
with recursive cte(name, preferences) as (
select *
from preferences
union
select p.name, array_merge(c.preferences, p.preferences)
from cte c
join preferences p
on c.preferences && p.preferences
and c.name <> p.name
)
select array_agg(name) as names, preferences
from (
select distinct on(name) *
from cte
order by name, cardinality(preferences) desc
) s
group by preferences;
names | preferences
---------------------------+--------------------------------------
{Charlie,John,Lucy,Trudy} | {milk,pizza,potatoes,rice,spaghetti}
{Beth} | {bread,cheese}
(2 rows)