PostgreSQL:聚合列表中的名称组合
PostgreSQL: Aggregating combinations of names in list
我有一个看起来像这样的数据库结构:
Table "public.person"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
id | integer | | not null |
Table "public.person_name"
Column | Type | Collation | Nullable | Default
--------+-------------------+-----------+----------+---------
person | integer | | not null |
name | character varying | | |
Foreign-key constraints:
"person_name_person_fkey" FOREIGN KEY (person) REFERENCES person(id)
Table "public.event"
Column | Type | Collation | Nullable | Default
--------+-------------------+-----------+----------+---------
id | integer | | not null |
name | character varying | | |
Table "public.attendee"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
event | integer | | |
person | integer | | |
Foreign-key constraints:
"attendee_event_fkey" FOREIGN KEY (event) REFERENCES public.event(id)
"attendee_person_fkey" FOREIGN KEY (person) REFERENCES person(id)
一些示例数据:
person:
id
----
0
1
2
3
person_name:
person | name
--------+-----------
0 | Alex
0 | Alexander
1 | Barbara
1 | Barb
2 | Cecilia
3 | Dave
3 | David
event:
id | name
----+------------
0 | Wedding
1 | Party
2 | Funeral
attendee:
event | person
-------+--------
0 | 0
0 | 1
0 | 2
1 | 1
1 | 2
2 | 2
2 | 3
我想做一个 select 声明,即 return 所有事件,所有与会者都有的昵称的每种组合都有一行,如下所示:
event_id | event_name | attendee_list
----------+------------+---------------
0 | Wedding | Alex, Barbara, Cecilia
0 | Wedding | Alexander, Barbara, Cecilia
0 | Wedding | Alex, Barb, Cecilia
0 | Wedding | Alexander, Barb, Cecilia
1 | Party | Barbara, Cecilia
1 | Party | Barb, Cecilia
2 | Funeral | Cecilia, Dave
2 | Funeral | Cecilia, David
我最初的直觉是将所有表格连接在一起,按事件分组,然后使用 string_agg
,但这会将每个人的昵称都放在列表中(当然,因为它是对整个加入)。我的第二次尝试是 select 来自子查询的与会者姓名,但子查询不能 return 多行。我也尝试过使用数组进行聚合,如 here, but you can't aggregate arrays of differing dimensionality. Finally, I tried using some recursive magic as described 所述,但发现很难适应我的问题,最终无法让它工作。
我想我已经通过之前链接的“递归魔法”弄明白了。问题是我的真实数据有点复杂,每个与会者在列表中都有一个“位置”,这并不总是适用于 r.id < t.id
约束。这是一个与问题中的示例数据一起使用的查询:
with recursive recur as (
select
array[person_name.person] as persons,
array[name] as names,
attendee.event
from person_name
join attendee
on person_name.person=attendee.person
union all
select
persons || t.person,
names || t.name,
attendee.event
from person_name t
join recur r
on t.person != all(r.persons)
join attendee
on t.person=attendee.person
and attendee.event=r.event
)
select event, names
from recur
where cardinality(names)=(
select count(*)
from attendee
where attendee.event=recur.event
);
这也会 return 为每个可能的 订单 与会者增加一行,但我对此很好(就像我说的,我的真实数据有一个限制它的“位置”字段)。如果您只需要一个排序,则必须在某处的数据中指定排序,因此例如添加 r.id < t.id
位就可以了。
这是执行此操作的递归查询。我做了一个人 ID 数组,在递归的每个阶段,我用 person_name table.
加入了下一个 ID
WITH RECURSIVE recur AS (
SELECT
event as event_id,
event.name as event_name,
array_agg(person) as person_id_list,
ARRAY[]::text[] as person_name_list,
1 as index
FROM attendee, event
WHERE attendee.event = event.id
GROUP BY event, event.name
UNION ALL
SELECT
event_id,
event_name,
person_id_list,
person_name_list || person_name.name,
index + 1
FROM recur
JOIN person_name on (person_name.person = recur.person_id_list[recur.index])
WHERE cardinality(recur.person_id_list) >= recur.index
)
SELECT event_id, event_name, array_to_string(person_name_list, ', ') as attendee_list
FROM recur
WHERE cardinality(recur.person_id_list) < recur.index
ORDER BY event_id;
我有一个看起来像这样的数据库结构:
Table "public.person"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
id | integer | | not null |
Table "public.person_name"
Column | Type | Collation | Nullable | Default
--------+-------------------+-----------+----------+---------
person | integer | | not null |
name | character varying | | |
Foreign-key constraints:
"person_name_person_fkey" FOREIGN KEY (person) REFERENCES person(id)
Table "public.event"
Column | Type | Collation | Nullable | Default
--------+-------------------+-----------+----------+---------
id | integer | | not null |
name | character varying | | |
Table "public.attendee"
Column | Type | Collation | Nullable | Default
--------+---------+-----------+----------+---------
event | integer | | |
person | integer | | |
Foreign-key constraints:
"attendee_event_fkey" FOREIGN KEY (event) REFERENCES public.event(id)
"attendee_person_fkey" FOREIGN KEY (person) REFERENCES person(id)
一些示例数据:
person:
id
----
0
1
2
3
person_name:
person | name
--------+-----------
0 | Alex
0 | Alexander
1 | Barbara
1 | Barb
2 | Cecilia
3 | Dave
3 | David
event:
id | name
----+------------
0 | Wedding
1 | Party
2 | Funeral
attendee:
event | person
-------+--------
0 | 0
0 | 1
0 | 2
1 | 1
1 | 2
2 | 2
2 | 3
我想做一个 select 声明,即 return 所有事件,所有与会者都有的昵称的每种组合都有一行,如下所示:
event_id | event_name | attendee_list
----------+------------+---------------
0 | Wedding | Alex, Barbara, Cecilia
0 | Wedding | Alexander, Barbara, Cecilia
0 | Wedding | Alex, Barb, Cecilia
0 | Wedding | Alexander, Barb, Cecilia
1 | Party | Barbara, Cecilia
1 | Party | Barb, Cecilia
2 | Funeral | Cecilia, Dave
2 | Funeral | Cecilia, David
我最初的直觉是将所有表格连接在一起,按事件分组,然后使用 string_agg
,但这会将每个人的昵称都放在列表中(当然,因为它是对整个加入)。我的第二次尝试是 select 来自子查询的与会者姓名,但子查询不能 return 多行。我也尝试过使用数组进行聚合,如 here, but you can't aggregate arrays of differing dimensionality. Finally, I tried using some recursive magic as described
我想我已经通过之前链接的“递归魔法”弄明白了。问题是我的真实数据有点复杂,每个与会者在列表中都有一个“位置”,这并不总是适用于 r.id < t.id
约束。这是一个与问题中的示例数据一起使用的查询:
with recursive recur as (
select
array[person_name.person] as persons,
array[name] as names,
attendee.event
from person_name
join attendee
on person_name.person=attendee.person
union all
select
persons || t.person,
names || t.name,
attendee.event
from person_name t
join recur r
on t.person != all(r.persons)
join attendee
on t.person=attendee.person
and attendee.event=r.event
)
select event, names
from recur
where cardinality(names)=(
select count(*)
from attendee
where attendee.event=recur.event
);
这也会 return 为每个可能的 订单 与会者增加一行,但我对此很好(就像我说的,我的真实数据有一个限制它的“位置”字段)。如果您只需要一个排序,则必须在某处的数据中指定排序,因此例如添加 r.id < t.id
位就可以了。
这是执行此操作的递归查询。我做了一个人 ID 数组,在递归的每个阶段,我用 person_name table.
加入了下一个 IDWITH RECURSIVE recur AS (
SELECT
event as event_id,
event.name as event_name,
array_agg(person) as person_id_list,
ARRAY[]::text[] as person_name_list,
1 as index
FROM attendee, event
WHERE attendee.event = event.id
GROUP BY event, event.name
UNION ALL
SELECT
event_id,
event_name,
person_id_list,
person_name_list || person_name.name,
index + 1
FROM recur
JOIN person_name on (person_name.person = recur.person_id_list[recur.index])
WHERE cardinality(recur.person_id_list) >= recur.index
)
SELECT event_id, event_name, array_to_string(person_name_list, ', ') as attendee_list
FROM recur
WHERE cardinality(recur.person_id_list) < recur.index
ORDER BY event_id;