为什么我的子查询使用字符串而不是字段引用?
Why does my sub-query work with a string, but not a field reference?
我有一个(我认为是)相当复杂的查询。查询获取我想要的记录,然后是第一个响应中引用的所有数据。如果我的子查询条件是一个字符串,它就可以工作,但如果它是一个字段(具有完全相同的值),它就不会工作。
// Query with string as conditional in lowest sub-query (4th line from the bottom)
SELECT
e1.entity as entity
,ARRAY_CAT(
ARRAY_COMPACT(
ARRAY_CONSTRUCT(
any_value(e2.entity),
any_value(u1.user)
)
)
,ARRAY_AGG(e3.entity)
) as includes
FROM ENTITIES e1
LEFT JOIN ENTITIES e2 ON e1.entity:owner:workspace = e2.entity:id
LEFT JOIN USERS u1 ON e1.entity:owner:user = u1.user:id
LEFT JOIN ENTITIES e3 ON e3.entity:id IN (
SELECT ee2.value FROM
table(FLATTEN( input=>
SELECT SPLIT(LISTAGG( CASE WHEN IS_ARRAY(ee1.value:id) THEN ARRAY_TO_STRING(ee1.value:id, ',') ELSE ee1.value:id END, ','), ',')
FROM table(FLATTEN( input => ( SELECT e4.entity:relationships:entities FROM ENTITIES e4 WHERE e4.entity:id = 'bd265f29-ca32-449a-b765-bb488e4d6b3c' ) )) ee1
)) ee2
)
GROUP BY e1.entity
以上产生:
"entity" 列:
https://jsonblob.com/6d98b587-8989-11e9-b738-a9487a0dac0b
"includes" 列:
https://jsonblob.com/068a8672-8988-11e9-b738-77f0e471310b
但是,如果我将 uuid 字符串 (bd265f29-ca32-449a-b765-bb488e4d6b3c
) 更改为 e1.entity:id
(如下),则会出现错误 SQL compilation error: Unsupported subquery type cannot be evaluated
.
SELECT
e1.entity as entity
,ARRAY_CAT(
ARRAY_COMPACT(
ARRAY_CONSTRUCT(
any_value(e2.entity),
any_value(u1.user)
)
)
,ARRAY_AGG(e3.entity)
) as includes
FROM ENTITIES e1
LEFT JOIN ENTITIES e2 ON e1.entity:owner:workspace = e2.entity:id
LEFT JOIN USERS u1 ON e1.entity:owner:user = u1.user:id
LEFT JOIN ENTITIES e3 ON e3.entity:id IN (
SELECT ee2.value FROM
table(FLATTEN( input=>
SELECT SPLIT(LISTAGG( CASE WHEN IS_ARRAY(ee1.value:id) THEN ARRAY_TO_STRING(ee1.value:id, ',') ELSE ee1.value:id END, ','), ',')
FROM table(FLATTEN( input => ( SELECT e4.entity:relationships:entities FROM ENTITIES e4 WHERE e4.entity:id = e1.entity:id ) )) ee1
)) ee2
)
GROUP BY e1.entity
我不知道为什么开关会导致错误。为什么我的子查询使用字符串而不是字段引用?
子查询上的 Snowflake documentation 包括此限制:
Correlated scalar subqueries are currently supported only if they can be statically determined to return one row (e.g. if the SELECT list contains an aggregate function with no GROUP BY).
所以你可以试试:
( SELECT MAX(e4.entity:relationships:entities)
FROM ENTITIES e4
WHERE e4.entity:id = e1.entity:id
)
你试过这样投吗?
e1.entity:id::string
Snowflake 文档提到:
Subqueries with a correlation inside of FLATTEN are currently
unsupported.
你能不能简单地使用 e1.entity:relationships:entities 而不是子查询?
因此,有了几个 CTE 来提供数据,什么时候可以完成大部分相关子查询的提升。我将两种形式的事物数组放在实体中,并在您的 FLATTEN 用法中表达了具有多个 id 的单个实体:
WITH users AS (
SELECT parse_json('{"id":1}') as user
), entities AS (
SELECT parse_json(column1) as entity
FROM VALUES
('{"id":10, "relationships":{"entities":[{"id":11},{"id":12}]}, "owner":{"user":1,"workspace":10}}'),
('{"id":11, "relationships":{"entities":[{"id":11}]}}'),
('{"id":12, "relationships":{"entities":[{"id":[10,11]}]}}')
), ent1 AS (
SELECT e4.entity:id as ent_id
,ee1.index
,SPLIT(LISTAGG( IFF( IS_ARRAY(ee1.value:id), ARRAY_TO_STRING(ee1.value:id, ','), ee1.value:id), ','), ',') as vals
FROM ENTITIES AS e4,
TABLE(FLATTEN( input => e4.entity:relationships:entities )) ee1
GROUP BY 1,2
), ent_rels AS (
SELECT ent_id, ee2.value::number as rel_id
FROM ent1 ee1,
TABLE(FLATTEN( input => ee1.vals)) ee2
)
SELECT
e1.entity:id as entity
,e2.entity:id as e2_entity
,u1.user:id as u1_user
,e3.entity:id as e3_entity
FROM ENTITIES e1
LEFT JOIN ENTITIES e2 ON e1.entity:owner:workspace = e2.entity:id
LEFT JOIN USERS u1 ON e1.entity:owner:user = u1.user:id
LEFT JOIN ent_rels er ON er.ent_id = e1.entity:id
LEFT JOIN ENTITIES e3 ON e3.entity:id = er.rel_id
ORDER BY e1.entity:id;
所以这个 SQL 不是您得到的 select 结果,但确实显示了预期的连接。
ENTITY E2_ENTITY U1_USER E3_ENTITY
10 10 1 11
10 10 1 12
11 null null 11
12 null null 10
12 null null 11
所以这个最后的 select 是你原来的样子
SELECT
e1.entity as entity
,ARRAY_CAT(
ARRAY_COMPACT(
ARRAY_CONSTRUCT(
any_value(e2.entity),
any_value(u1.user)
)
)
,ARRAY_AGG(e3.entity)
) as includes
FROM ENTITIES e1
LEFT JOIN ENTITIES e2 ON e1.entity:owner:workspace = e2.entity:id
LEFT JOIN USERS u1 ON e1.entity:owner:user = u1.user:id
LEFT JOIN ent_rels er ON er.ent_id = e1.entity:id
LEFT JOIN ENTITIES e3 ON e3.entity:id = er.rel_id
GROUP BY e1.entity
ORDER BY e1.entity:id;
此外,考虑到您要取消两层嵌套以获得匹配的 ID,您可以避免使用 LISTAGG 和 SPLITS,只需通过以下方式将它们分解:
), ent1 AS (
SELECT e4.entity:id as ent_id
,ee1.value:id as vals
FROM ENTITIES AS e4,
TABLE(FLATTEN( input => e4.entity:relationships:entities )) ee1
), ent_rels AS (
SELECT ent_id
,coalesce(ee2.value,ee1.vals) as rel_id
FROM ent1 ee1,
TABLE(FLATTEN( input => ee1.vals, outer => true)) ee2
)
如果您愿意,可以 merged/nested:
, ent_rels AS (
SELECT ent_id
,coalesce(ee3.value,ee2.vals) as rel_id
FROM (
SELECT e1.entity:id as ent_id
,ee1.value:id as vals
FROM ENTITIES AS e1,
TABLE(FLATTEN( input => e1.entity:relationships:entities )) ee1
) ee2,
TABLE(FLATTEN( input => ee2.vals, outer => true)) ee3
)
我有一个(我认为是)相当复杂的查询。查询获取我想要的记录,然后是第一个响应中引用的所有数据。如果我的子查询条件是一个字符串,它就可以工作,但如果它是一个字段(具有完全相同的值),它就不会工作。
// Query with string as conditional in lowest sub-query (4th line from the bottom)
SELECT
e1.entity as entity
,ARRAY_CAT(
ARRAY_COMPACT(
ARRAY_CONSTRUCT(
any_value(e2.entity),
any_value(u1.user)
)
)
,ARRAY_AGG(e3.entity)
) as includes
FROM ENTITIES e1
LEFT JOIN ENTITIES e2 ON e1.entity:owner:workspace = e2.entity:id
LEFT JOIN USERS u1 ON e1.entity:owner:user = u1.user:id
LEFT JOIN ENTITIES e3 ON e3.entity:id IN (
SELECT ee2.value FROM
table(FLATTEN( input=>
SELECT SPLIT(LISTAGG( CASE WHEN IS_ARRAY(ee1.value:id) THEN ARRAY_TO_STRING(ee1.value:id, ',') ELSE ee1.value:id END, ','), ',')
FROM table(FLATTEN( input => ( SELECT e4.entity:relationships:entities FROM ENTITIES e4 WHERE e4.entity:id = 'bd265f29-ca32-449a-b765-bb488e4d6b3c' ) )) ee1
)) ee2
)
GROUP BY e1.entity
以上产生:
"entity" 列: https://jsonblob.com/6d98b587-8989-11e9-b738-a9487a0dac0b
"includes" 列: https://jsonblob.com/068a8672-8988-11e9-b738-77f0e471310b
但是,如果我将 uuid 字符串 (bd265f29-ca32-449a-b765-bb488e4d6b3c
) 更改为 e1.entity:id
(如下),则会出现错误 SQL compilation error: Unsupported subquery type cannot be evaluated
.
SELECT
e1.entity as entity
,ARRAY_CAT(
ARRAY_COMPACT(
ARRAY_CONSTRUCT(
any_value(e2.entity),
any_value(u1.user)
)
)
,ARRAY_AGG(e3.entity)
) as includes
FROM ENTITIES e1
LEFT JOIN ENTITIES e2 ON e1.entity:owner:workspace = e2.entity:id
LEFT JOIN USERS u1 ON e1.entity:owner:user = u1.user:id
LEFT JOIN ENTITIES e3 ON e3.entity:id IN (
SELECT ee2.value FROM
table(FLATTEN( input=>
SELECT SPLIT(LISTAGG( CASE WHEN IS_ARRAY(ee1.value:id) THEN ARRAY_TO_STRING(ee1.value:id, ',') ELSE ee1.value:id END, ','), ',')
FROM table(FLATTEN( input => ( SELECT e4.entity:relationships:entities FROM ENTITIES e4 WHERE e4.entity:id = e1.entity:id ) )) ee1
)) ee2
)
GROUP BY e1.entity
我不知道为什么开关会导致错误。为什么我的子查询使用字符串而不是字段引用?
子查询上的 Snowflake documentation 包括此限制:
Correlated scalar subqueries are currently supported only if they can be statically determined to return one row (e.g. if the SELECT list contains an aggregate function with no GROUP BY).
所以你可以试试:
( SELECT MAX(e4.entity:relationships:entities)
FROM ENTITIES e4
WHERE e4.entity:id = e1.entity:id
)
你试过这样投吗?
e1.entity:id::string
Snowflake 文档提到:
Subqueries with a correlation inside of FLATTEN are currently unsupported.
你能不能简单地使用 e1.entity:relationships:entities 而不是子查询?
因此,有了几个 CTE 来提供数据,什么时候可以完成大部分相关子查询的提升。我将两种形式的事物数组放在实体中,并在您的 FLATTEN 用法中表达了具有多个 id 的单个实体:
WITH users AS (
SELECT parse_json('{"id":1}') as user
), entities AS (
SELECT parse_json(column1) as entity
FROM VALUES
('{"id":10, "relationships":{"entities":[{"id":11},{"id":12}]}, "owner":{"user":1,"workspace":10}}'),
('{"id":11, "relationships":{"entities":[{"id":11}]}}'),
('{"id":12, "relationships":{"entities":[{"id":[10,11]}]}}')
), ent1 AS (
SELECT e4.entity:id as ent_id
,ee1.index
,SPLIT(LISTAGG( IFF( IS_ARRAY(ee1.value:id), ARRAY_TO_STRING(ee1.value:id, ','), ee1.value:id), ','), ',') as vals
FROM ENTITIES AS e4,
TABLE(FLATTEN( input => e4.entity:relationships:entities )) ee1
GROUP BY 1,2
), ent_rels AS (
SELECT ent_id, ee2.value::number as rel_id
FROM ent1 ee1,
TABLE(FLATTEN( input => ee1.vals)) ee2
)
SELECT
e1.entity:id as entity
,e2.entity:id as e2_entity
,u1.user:id as u1_user
,e3.entity:id as e3_entity
FROM ENTITIES e1
LEFT JOIN ENTITIES e2 ON e1.entity:owner:workspace = e2.entity:id
LEFT JOIN USERS u1 ON e1.entity:owner:user = u1.user:id
LEFT JOIN ent_rels er ON er.ent_id = e1.entity:id
LEFT JOIN ENTITIES e3 ON e3.entity:id = er.rel_id
ORDER BY e1.entity:id;
所以这个 SQL 不是您得到的 select 结果,但确实显示了预期的连接。
ENTITY E2_ENTITY U1_USER E3_ENTITY
10 10 1 11
10 10 1 12
11 null null 11
12 null null 10
12 null null 11
所以这个最后的 select 是你原来的样子
SELECT
e1.entity as entity
,ARRAY_CAT(
ARRAY_COMPACT(
ARRAY_CONSTRUCT(
any_value(e2.entity),
any_value(u1.user)
)
)
,ARRAY_AGG(e3.entity)
) as includes
FROM ENTITIES e1
LEFT JOIN ENTITIES e2 ON e1.entity:owner:workspace = e2.entity:id
LEFT JOIN USERS u1 ON e1.entity:owner:user = u1.user:id
LEFT JOIN ent_rels er ON er.ent_id = e1.entity:id
LEFT JOIN ENTITIES e3 ON e3.entity:id = er.rel_id
GROUP BY e1.entity
ORDER BY e1.entity:id;
此外,考虑到您要取消两层嵌套以获得匹配的 ID,您可以避免使用 LISTAGG 和 SPLITS,只需通过以下方式将它们分解:
), ent1 AS (
SELECT e4.entity:id as ent_id
,ee1.value:id as vals
FROM ENTITIES AS e4,
TABLE(FLATTEN( input => e4.entity:relationships:entities )) ee1
), ent_rels AS (
SELECT ent_id
,coalesce(ee2.value,ee1.vals) as rel_id
FROM ent1 ee1,
TABLE(FLATTEN( input => ee1.vals, outer => true)) ee2
)
如果您愿意,可以 merged/nested:
, ent_rels AS (
SELECT ent_id
,coalesce(ee3.value,ee2.vals) as rel_id
FROM (
SELECT e1.entity:id as ent_id
,ee1.value:id as vals
FROM ENTITIES AS e1,
TABLE(FLATTEN( input => e1.entity:relationships:entities )) ee1
) ee2,
TABLE(FLATTEN( input => ee2.vals, outer => true)) ee3
)