具有自引用连接条件的 Postgres 计数
Postgres count with self referential join condition
给定以下结构
CREATE TABLE products (
id integer NOT NULL,
subcategory_id integer,
stack_id integer,
)
CREATE TABLE subcategories (
id integer NOT NULL,
name character varying(255)
)
其中 products.stack_id 是返回产品的自引用关系。
我基本上是在尝试对
上的子类别加入产品进行计数
products.subcategory_id = subcategories.id
但将计数限制为每个不同的堆栈组一次。
样本子类别table
id name
1 subcategory_1
2 subcategory_2
3 subcategory_3
样品产品table
id subcategory_id stack_id
1 1 NULL
2 1 1
3 2 1
4 3 1
5 2 NULL
6 2 5
7 2 5
8 2 NULL
9 3 8
10 3 8
样本所需输出
id name total
1 subcategory_1 1 (row 1)
2 subcategory_2 3 (row 1 + row 5 + row 8)
3 subcategory_3 2 (row 1 + 8)
输出解释
子类别 ID 1
如果我对产品进行简单的连接,我会得到产品 (1, 2)。我只想要不同父对象的数量(stack_id 为空),因此 1 个计数和 2 个引用 1 已被计数,因此不会增加计数。
子类别 ID 2
加入将是 (3, 5, 6, 7, 8)。 3 的 stack_id 是 1,所以它算 1。产品 5、6 和 7 引用 5 所以算 1。产品 8 算 1。
子类别 3
连接是 (4, 9, 10)。 4 引用 1,9 和 10 都引用 8。
更新
删除了额外的可能混淆的列,添加了示例数据和输出
如果引用的最大深度是一级,那么这个简单的查询就可以完成工作:
select subcategory_id, name, count(*)
from (
select distinct subcategory_id, coalesce(stack_id, id) stack_id
from products
) sub
join subcategories s on s.id = sub.subcategory_id
group by 1, 2
order by 1, 2;
subcategory_id | name | count
----------------+---------------+-------
1 | subcategory_1 | 1
2 | subcategory_2 | 3
3 | subcategory_3 | 2
(3 rows)
此递归查询也适用于比一层更深的引用:
with recursive pr(id, subcategory_id, stack_id, stack) as (
select id, subcategory_id, stack_id, array[id]
from products
union
select pr.id, pr.subcategory_id, products.stack_id, pr.stack_id || pr.stack
from pr
join products on pr.stack_id = products.id
)
select distinct on (id) id, subcategory_id, stack
from pr
order by id, array_length(stack, 1) desc
id | subcategory_id | stack
----+----------------+--------
1 | 1 | {1}
2 | 1 | {1,2}
3 | 2 | {1,3}
4 | 3 | {1,4}
5 | 2 | {5}
6 | 2 | {5,6}
7 | 2 | {5,7}
8 | 2 | {8}
9 | 3 | {8,9}
10 | 3 | {8,10}
(10 rows)
使用上述数据集加入子类别:
select subcategory_id, name, count(*)
from (
select distinct subcategory_id, stack[1]
from (
with recursive pr(id, subcategory_id, stack_id, stack) as (
select id, subcategory_id, stack_id, array[id]
from products
union
select pr.id, pr.subcategory_id, products.stack_id, pr.stack_id || pr.stack
from pr
join products on pr.stack_id = products.id
)
select distinct on (id) id, subcategory_id, stack
from pr
order by id, array_length(stack, 1) desc
) sub
) sub
join subcategories s on s.id = sub.subcategory_id
group by 1, 2
order by 1, 2
subcategory_id | name | count
----------------+---------------+-------
1 | subcategory_1 | 1
2 | subcategory_2 | 3
3 | subcategory_3 | 2
(3 rows)
给定以下结构
CREATE TABLE products (
id integer NOT NULL,
subcategory_id integer,
stack_id integer,
)
CREATE TABLE subcategories (
id integer NOT NULL,
name character varying(255)
)
其中 products.stack_id 是返回产品的自引用关系。
我基本上是在尝试对
上的子类别加入产品进行计数products.subcategory_id = subcategories.id
但将计数限制为每个不同的堆栈组一次。
样本子类别table
id name
1 subcategory_1
2 subcategory_2
3 subcategory_3
样品产品table
id subcategory_id stack_id
1 1 NULL
2 1 1
3 2 1
4 3 1
5 2 NULL
6 2 5
7 2 5
8 2 NULL
9 3 8
10 3 8
样本所需输出
id name total
1 subcategory_1 1 (row 1)
2 subcategory_2 3 (row 1 + row 5 + row 8)
3 subcategory_3 2 (row 1 + 8)
输出解释
子类别 ID 1
如果我对产品进行简单的连接,我会得到产品 (1, 2)。我只想要不同父对象的数量(stack_id 为空),因此 1 个计数和 2 个引用 1 已被计数,因此不会增加计数。
子类别 ID 2
加入将是 (3, 5, 6, 7, 8)。 3 的 stack_id 是 1,所以它算 1。产品 5、6 和 7 引用 5 所以算 1。产品 8 算 1。
子类别 3
连接是 (4, 9, 10)。 4 引用 1,9 和 10 都引用 8。
更新
删除了额外的可能混淆的列,添加了示例数据和输出
如果引用的最大深度是一级,那么这个简单的查询就可以完成工作:
select subcategory_id, name, count(*)
from (
select distinct subcategory_id, coalesce(stack_id, id) stack_id
from products
) sub
join subcategories s on s.id = sub.subcategory_id
group by 1, 2
order by 1, 2;
subcategory_id | name | count
----------------+---------------+-------
1 | subcategory_1 | 1
2 | subcategory_2 | 3
3 | subcategory_3 | 2
(3 rows)
此递归查询也适用于比一层更深的引用:
with recursive pr(id, subcategory_id, stack_id, stack) as (
select id, subcategory_id, stack_id, array[id]
from products
union
select pr.id, pr.subcategory_id, products.stack_id, pr.stack_id || pr.stack
from pr
join products on pr.stack_id = products.id
)
select distinct on (id) id, subcategory_id, stack
from pr
order by id, array_length(stack, 1) desc
id | subcategory_id | stack
----+----------------+--------
1 | 1 | {1}
2 | 1 | {1,2}
3 | 2 | {1,3}
4 | 3 | {1,4}
5 | 2 | {5}
6 | 2 | {5,6}
7 | 2 | {5,7}
8 | 2 | {8}
9 | 3 | {8,9}
10 | 3 | {8,10}
(10 rows)
使用上述数据集加入子类别:
select subcategory_id, name, count(*)
from (
select distinct subcategory_id, stack[1]
from (
with recursive pr(id, subcategory_id, stack_id, stack) as (
select id, subcategory_id, stack_id, array[id]
from products
union
select pr.id, pr.subcategory_id, products.stack_id, pr.stack_id || pr.stack
from pr
join products on pr.stack_id = products.id
)
select distinct on (id) id, subcategory_id, stack
from pr
order by id, array_length(stack, 1) desc
) sub
) sub
join subcategories s on s.id = sub.subcategory_id
group by 1, 2
order by 1, 2
subcategory_id | name | count
----------------+---------------+-------
1 | subcategory_1 | 1
2 | subcategory_2 | 3
3 | subcategory_3 | 2
(3 rows)