PostgreSQL 中的递归 JSON 生成
Recursive JSON generation in PostgreSQL
我在 PostgreSQL 9.5 服务器中有以下表格:
值得注意的结构是 location
在理论上是无限递归的。我需要从根 location
生成一条 JSON 消息,递归到所有 sub-locations;每个 location
都有一些属性,一个 inventory
项目数组和一个子数组 locations
.
如何为此创建高性能查询?我正在查看各种 PostgreSQL JSON 函数、LATERAL 关键字、CTE,并且有点困惑。我用 non-recursive 查询完成了 JSON 输出,但不确定如何干净地处理递归。
这是一个示例输出:
{
"id": 1000,
"name": "By Location",
"type": "SITE",
"locations": [
{
"id": 1005,
"name": "Storage A",
"type": "STOR",
"locations": [ ...(same schema as parent)... ],
"inventories": [ ...(see below for schema)... ]
},
{
"id": 1017,
"name": "Storage B",
"name": "COLD",
"locations": [ ...(same schema as parent)... ],
"inventories": [...(see below for schema)... ]
}
],
"inventories": [
{
"id": 5340,
"product_id": 9120,
"name": "Product X",
"thumb": "https://example.com/api/images/nnnn.jpg",
"sort_order": 1,
"par_level": 3.5,
"created": 1452898800,
"updated": 1453071600,
"measures": [
{"id": 3498, "quantity": 2.25, "created": 1453071600, "updated": 1453071600},
{"id": 3456, "quantity": 3.25, "created": 1452898800, "updated": 1452898800}
]
}
]
}
让我们把它分解成碎片。首先,您将使用嵌套子查询来创建嵌套数组。常用 Table 表达式可能会有所帮助。
其他技巧是row_to_json和json_agg。
第一个问题是 row_to_json 需要 table 作为 return 正确标签的参数。
select json_agg(locations) from locations
将为每一行return一个json对象。要仅使用某些字段,您需要创建一个类型并对其进行转换,或者使用 CTE 和上述语法。在大多数情况下,我会使用 CTE。
所以你最终会得到这样的结果:
WITH lowlevel1 AS
(
SELECT a,
b,
c
FROM tab1) ,lowlevel2 AS
(
SELECT b,
c,
d
FROM tab2) ,midlevel1 AS
(
SELECT e,
f,
g,
json_agg(lowlevel1) AS lab1,
json_agg(lowlevel2) AS lab2
FROM tab3
LEFT OUTER JOIN lowlevel1
ON tab3.id = lowlevel1.parent
LEFT OUTER JOIN lowlevel2
ON tab3.id = lovlevel2.parent)
SELECT row_to_json(midlevel1) from midlevel1
或者在最后一行使用 json_agg(midlevel1)
而不是 row_to_json(midlevel1)
到 return 所有行的一个数组。
CTE 还支持使用 RECURSIVE
修饰符进行回归。但是,return 是一个 table 回归结果,而不是嵌套的 JSON 结构。因此,您可能需要明确编码所需的嵌套级别。
如果元素不存在,Postgres 将 return 为空。例如,sub-locations 的列表中有 none 将 return "locations":[null]
。要用更有意义的结果替换它,可以使用 case when <> then <> else '[]' end
或 if <> then <> else '[]' end
。第一个是 'searched case',其中每个测试都是一个布尔表达式。
我最终创建了三个函数。可能可以做得更少,但这些函数可以在其他查询中重用。基本上,JSON 输出中的任何地方都应该有一个值数组,由一个函数处理,该函数 returns 一个记录集被 json_agg()
编辑。
CREATE OR REPLACE FUNCTION get_measures_by_inventory_as_json(invid UUID, del TIMESTAMP WITH TIME ZONE DEFAULT now())
RETURNS TABLE(inventory_id UUID, measure_json JSON)
AS $$
-- returns a JSONified record per measure tied to an inventory record
SELECT m.inventory_id, json_build_object(
'id', m.id,
'quantity', m.quantity,
'read', TRUNC(EXTRACT(EPOCH FROM m.read_date)),
'created', TRUNC(EXTRACT(EPOCH FROM m.created)),
'updated', TRUNC(EXTRACT(EPOCH FROM m.updated)),
'deleted', TRUNC(EXTRACT(EPOCH FROM m.deleted))
)
FROM measure m
WHERE m.inventory_id = invid
AND (m.deleted >= del);
$$
LANGUAGE sql;
CREATE OR REPLACE FUNCTION get_inventories_by_location_as_json(locid UUID, del TIMESTAMP WITH TIME ZONE DEFAULT now())
RETURNS TABLE(location_id UUID, inventory_json JSON)
AS $$
-- returns a JSONified set of inventory items, with product info and measures, given a location
SELECT i.location_id, json_build_object(
'id', i.id,
'product_id', p.id,
'name', p.name,
'mass_quantity', p.mass_quantity,
'mass_unit', um.code,
'count_unit', uc.code,
'thumb', p.product_picture_uri,
'sort_order', i.sort_order,
'par_level', i.par_level,
'created', TRUNC(EXTRACT(EPOCH FROM i.created)),
'updated', TRUNC(EXTRACT(EPOCH FROM i.updated)),
'deleted', TRUNC(EXTRACT(EPOCH FROM i.deleted)),
'measures', COALESCE((SELECT json_agg(measure_json) FROM get_measures_by_inventory_as_json(i.id)), '[]')::json
)
FROM inventory i
INNER JOIN product p ON i.product_id = p.id
LEFT JOIN unit um ON p.mass_unit_id = um.id
LEFT JOIN unit uc ON p.count_unit_id = uc.id
WHERE i.location_id = locid
AND i.deleted >= del
AND p.deleted >= del;
$$
LANGUAGE sql;
CREATE OR REPLACE FUNCTION get_inventories_recursive_as_json(locid UUID[], del TIMESTAMP WITH TIME ZONE DEFAULT now())
RETURNS JSON
AS $$
-- returns JSONified location info and inventories in that location
-- and recurses into child locations, showing the same
SELECT json_agg(loc) FROM (
SELECT l.id, array_agg(c.id), json_build_object(
'id', l.id,
'name', l.name,
'type', t.code,
'locations', get_inventories_recursive_as_json(array_agg(c.id)),
'inventories', COALESCE((SELECT json_agg(inventory_json) FROM get_inventories_by_location_as_json(l.id)),'[]')::json
) AS loc
FROM location l
LEFT OUTER JOIN location c ON l.id = c.parent_id
INNER JOIN location_type t ON l.location_type_id = t.id
WHERE l.id = ANY(locid)
AND l.deleted >= del
GROUP BY l.id, l.name, t.code
) AS out;
$$
LANGUAGE sql;
试图通过 CTE 来完成它,这本来会非常优雅,但无法弄清楚如何在没有 运行 与无法在递归中聚合相关的错误的情况下进行。
WITH RECURSIVE locations AS (
WITH inventories AS (
WITH measures AS (
SELECT m.inventory_id, json_agg(json_build_object(
'id', m.id,
'quantity', m.quantity,
'read', TRUNC(EXTRACT(EPOCH FROM m.read_date)),
'created', TRUNC(EXTRACT(EPOCH FROM m.created)),
'updated', TRUNC(EXTRACT(EPOCH FROM m.updated)),
'deleted', TRUNC(EXTRACT(EPOCH FROM m.deleted))
)) as measures
FROM measure m
GROUP BY m.inventory_id
)
SELECT i.location_id, json_agg(json_build_object(
'id', i.id,
'product_id', p.id,
'name', p.name,
'mass_quantity', p.mass_quantity,
'mass_unit', um.code,
'count_unit', uc.code,
'thumb', p.product_picture_uri,
'sort_order', i.sort_order,
'par_level', i.par_level,
'created', TRUNC(EXTRACT(EPOCH FROM i.created)),
'updated', TRUNC(EXTRACT(EPOCH FROM i.updated)),
'deleted', TRUNC(EXTRACT(EPOCH FROM i.deleted)),
'measures', COALESCE(m.measures, '[]')
)) AS inventories
FROM inventory i
INNER JOIN product p ON i.product_id = p.id
LEFT JOIN unit um ON p.mass_unit_id = um.id
LEFT JOIN unit uc ON p.count_unit_id = uc.id
LEFT JOIN measures m ON i.id = m.inventory_id
GROUP BY i.location_id
)
SELECT null as id, null as name, null as type, null as inventories
FROM location l
INNER JOIN location_type t ON l.location_type_id = t.id
LEFT OUTER JOIN inventories i ON l.id = i.location_id
GROUP BY l.parent_id
/*UNION ALL
SELECT p.id, p.parent_id, p.name, t.code, COALESCE(i.inventories, '[]')::jsonb AS inventories, json_agg(row_to_json(c.*))
FROM location p
INNER JOIN location_type t ON p.location_type_id = t.id
LEFT OUTER JOIN inventories i ON p.id = i.location_id
INNER JOIN locations c ON p.id = c.parent_id
GROUP BY p.id, p.name, t.code, COALESCE(i.inventories, '[]')::jsonb*/
)
SELECT * FROM locations
我在 PostgreSQL 9.5 服务器中有以下表格:
值得注意的结构是 location
在理论上是无限递归的。我需要从根 location
生成一条 JSON 消息,递归到所有 sub-locations;每个 location
都有一些属性,一个 inventory
项目数组和一个子数组 locations
.
如何为此创建高性能查询?我正在查看各种 PostgreSQL JSON 函数、LATERAL 关键字、CTE,并且有点困惑。我用 non-recursive 查询完成了 JSON 输出,但不确定如何干净地处理递归。
这是一个示例输出:
{
"id": 1000,
"name": "By Location",
"type": "SITE",
"locations": [
{
"id": 1005,
"name": "Storage A",
"type": "STOR",
"locations": [ ...(same schema as parent)... ],
"inventories": [ ...(see below for schema)... ]
},
{
"id": 1017,
"name": "Storage B",
"name": "COLD",
"locations": [ ...(same schema as parent)... ],
"inventories": [...(see below for schema)... ]
}
],
"inventories": [
{
"id": 5340,
"product_id": 9120,
"name": "Product X",
"thumb": "https://example.com/api/images/nnnn.jpg",
"sort_order": 1,
"par_level": 3.5,
"created": 1452898800,
"updated": 1453071600,
"measures": [
{"id": 3498, "quantity": 2.25, "created": 1453071600, "updated": 1453071600},
{"id": 3456, "quantity": 3.25, "created": 1452898800, "updated": 1452898800}
]
}
]
}
让我们把它分解成碎片。首先,您将使用嵌套子查询来创建嵌套数组。常用 Table 表达式可能会有所帮助。
其他技巧是row_to_json和json_agg。
第一个问题是 row_to_json 需要 table 作为 return 正确标签的参数。
select json_agg(locations) from locations
将为每一行return一个json对象。要仅使用某些字段,您需要创建一个类型并对其进行转换,或者使用 CTE 和上述语法。在大多数情况下,我会使用 CTE。
所以你最终会得到这样的结果:
WITH lowlevel1 AS
(
SELECT a,
b,
c
FROM tab1) ,lowlevel2 AS
(
SELECT b,
c,
d
FROM tab2) ,midlevel1 AS
(
SELECT e,
f,
g,
json_agg(lowlevel1) AS lab1,
json_agg(lowlevel2) AS lab2
FROM tab3
LEFT OUTER JOIN lowlevel1
ON tab3.id = lowlevel1.parent
LEFT OUTER JOIN lowlevel2
ON tab3.id = lovlevel2.parent)
SELECT row_to_json(midlevel1) from midlevel1
或者在最后一行使用 json_agg(midlevel1)
而不是 row_to_json(midlevel1)
到 return 所有行的一个数组。
CTE 还支持使用 RECURSIVE
修饰符进行回归。但是,return 是一个 table 回归结果,而不是嵌套的 JSON 结构。因此,您可能需要明确编码所需的嵌套级别。
如果元素不存在,Postgres 将 return 为空。例如,sub-locations 的列表中有 none 将 return "locations":[null]
。要用更有意义的结果替换它,可以使用 case when <> then <> else '[]' end
或 if <> then <> else '[]' end
。第一个是 'searched case',其中每个测试都是一个布尔表达式。
我最终创建了三个函数。可能可以做得更少,但这些函数可以在其他查询中重用。基本上,JSON 输出中的任何地方都应该有一个值数组,由一个函数处理,该函数 returns 一个记录集被 json_agg()
编辑。
CREATE OR REPLACE FUNCTION get_measures_by_inventory_as_json(invid UUID, del TIMESTAMP WITH TIME ZONE DEFAULT now())
RETURNS TABLE(inventory_id UUID, measure_json JSON)
AS $$
-- returns a JSONified record per measure tied to an inventory record
SELECT m.inventory_id, json_build_object(
'id', m.id,
'quantity', m.quantity,
'read', TRUNC(EXTRACT(EPOCH FROM m.read_date)),
'created', TRUNC(EXTRACT(EPOCH FROM m.created)),
'updated', TRUNC(EXTRACT(EPOCH FROM m.updated)),
'deleted', TRUNC(EXTRACT(EPOCH FROM m.deleted))
)
FROM measure m
WHERE m.inventory_id = invid
AND (m.deleted >= del);
$$
LANGUAGE sql;
CREATE OR REPLACE FUNCTION get_inventories_by_location_as_json(locid UUID, del TIMESTAMP WITH TIME ZONE DEFAULT now())
RETURNS TABLE(location_id UUID, inventory_json JSON)
AS $$
-- returns a JSONified set of inventory items, with product info and measures, given a location
SELECT i.location_id, json_build_object(
'id', i.id,
'product_id', p.id,
'name', p.name,
'mass_quantity', p.mass_quantity,
'mass_unit', um.code,
'count_unit', uc.code,
'thumb', p.product_picture_uri,
'sort_order', i.sort_order,
'par_level', i.par_level,
'created', TRUNC(EXTRACT(EPOCH FROM i.created)),
'updated', TRUNC(EXTRACT(EPOCH FROM i.updated)),
'deleted', TRUNC(EXTRACT(EPOCH FROM i.deleted)),
'measures', COALESCE((SELECT json_agg(measure_json) FROM get_measures_by_inventory_as_json(i.id)), '[]')::json
)
FROM inventory i
INNER JOIN product p ON i.product_id = p.id
LEFT JOIN unit um ON p.mass_unit_id = um.id
LEFT JOIN unit uc ON p.count_unit_id = uc.id
WHERE i.location_id = locid
AND i.deleted >= del
AND p.deleted >= del;
$$
LANGUAGE sql;
CREATE OR REPLACE FUNCTION get_inventories_recursive_as_json(locid UUID[], del TIMESTAMP WITH TIME ZONE DEFAULT now())
RETURNS JSON
AS $$
-- returns JSONified location info and inventories in that location
-- and recurses into child locations, showing the same
SELECT json_agg(loc) FROM (
SELECT l.id, array_agg(c.id), json_build_object(
'id', l.id,
'name', l.name,
'type', t.code,
'locations', get_inventories_recursive_as_json(array_agg(c.id)),
'inventories', COALESCE((SELECT json_agg(inventory_json) FROM get_inventories_by_location_as_json(l.id)),'[]')::json
) AS loc
FROM location l
LEFT OUTER JOIN location c ON l.id = c.parent_id
INNER JOIN location_type t ON l.location_type_id = t.id
WHERE l.id = ANY(locid)
AND l.deleted >= del
GROUP BY l.id, l.name, t.code
) AS out;
$$
LANGUAGE sql;
试图通过 CTE 来完成它,这本来会非常优雅,但无法弄清楚如何在没有 运行 与无法在递归中聚合相关的错误的情况下进行。
WITH RECURSIVE locations AS (
WITH inventories AS (
WITH measures AS (
SELECT m.inventory_id, json_agg(json_build_object(
'id', m.id,
'quantity', m.quantity,
'read', TRUNC(EXTRACT(EPOCH FROM m.read_date)),
'created', TRUNC(EXTRACT(EPOCH FROM m.created)),
'updated', TRUNC(EXTRACT(EPOCH FROM m.updated)),
'deleted', TRUNC(EXTRACT(EPOCH FROM m.deleted))
)) as measures
FROM measure m
GROUP BY m.inventory_id
)
SELECT i.location_id, json_agg(json_build_object(
'id', i.id,
'product_id', p.id,
'name', p.name,
'mass_quantity', p.mass_quantity,
'mass_unit', um.code,
'count_unit', uc.code,
'thumb', p.product_picture_uri,
'sort_order', i.sort_order,
'par_level', i.par_level,
'created', TRUNC(EXTRACT(EPOCH FROM i.created)),
'updated', TRUNC(EXTRACT(EPOCH FROM i.updated)),
'deleted', TRUNC(EXTRACT(EPOCH FROM i.deleted)),
'measures', COALESCE(m.measures, '[]')
)) AS inventories
FROM inventory i
INNER JOIN product p ON i.product_id = p.id
LEFT JOIN unit um ON p.mass_unit_id = um.id
LEFT JOIN unit uc ON p.count_unit_id = uc.id
LEFT JOIN measures m ON i.id = m.inventory_id
GROUP BY i.location_id
)
SELECT null as id, null as name, null as type, null as inventories
FROM location l
INNER JOIN location_type t ON l.location_type_id = t.id
LEFT OUTER JOIN inventories i ON l.id = i.location_id
GROUP BY l.parent_id
/*UNION ALL
SELECT p.id, p.parent_id, p.name, t.code, COALESCE(i.inventories, '[]')::jsonb AS inventories, json_agg(row_to_json(c.*))
FROM location p
INNER JOIN location_type t ON p.location_type_id = t.id
LEFT OUTER JOIN inventories i ON p.id = i.location_id
INNER JOIN locations c ON p.id = c.parent_id
GROUP BY p.id, p.name, t.code, COALESCE(i.inventories, '[]')::jsonb*/
)
SELECT * FROM locations