SQL 查询 listAgg 和分组依据
SQL query to listAgg and group by
我写了一个新的查询,得到了如下所示的 listAgg 结果
</p>
<pre><code> Col A
1000016932,1000020056,1000020100,1000020144,1000020243
现在我想要的结果如下
col C col D
1000016932 1000020056
1000016932 1000020100
1000016932 1000020144
1000016932 1000020243
1000020056 1000020100
1000020056 1000020144 ...and so on
请注意,我无法对关卡进行硬编码,因为每个字符串的长度可以是任意的
with table_1 (colA) as (
select '1000016932,1000020056,1000020100,1000020144,1000020243' from dual
),
prep (lvl, token) as (
select level, regexp_substr(colA, '[^,]+', 1, level) from table_1
connect by level <= regexp_count(colA, ',') + 1
and colA = prior colA
and prior sys_guid() is not null
)
select p1.token as token_1, p2.token as token_2
from prep p1 join prep p2 on p1.lvl < p2.lvl;
这假设逗号之间没有空值(您没有两个连续的逗号,它们之间没有任何内容,在序列中标记 "null")。
结果:
TOKEN_1 TOKEN_2
---------- ----------
1000016932 1000020056
1000016932 1000020100
1000016932 1000020144
1000016932 1000020243
1000020056 1000020100
1000020056 1000020144
1000020056 1000020243
1000020100 1000020144
1000020100 1000020243
1000020144 1000020243
允许输入多行 table(假设在初始 table 中有一个 row_id 列):
with table_1 (row_id, colA) as (
select 101, '1000016932,1000020056,1000020100,1000020144,1000020243' from dual union all
select 102, '1000040042,1000045543,1000045664' from dual
),
prep (lvl, row_id, token) as (
select level, row_id, regexp_substr(colA, '[^,]+', 1, level) from table_1
connect by level <= regexp_count(colA, ',') + 1
and row_id = prior row_id
and prior sys_guid() is not null
)
select p1.row_id, p1.token as token_1, p2.token as token_2
from prep p1 join prep p2 on p1.row_id = p2.row_id and p1.lvl < p2.lvl
order by row_id, token_1;
结果:
ROW_ID TOKEN_1 TOKEN_2
---------- ---------- ----------
101 1000016932 1000020144
101 1000016932 1000020056
101 1000016932 1000020100
101 1000016932 1000020243
101 1000020056 1000020243
101 1000020056 1000020100
101 1000020056 1000020144
101 1000020100 1000020243
101 1000020100 1000020144
101 1000020144 1000020243
102 1000040042 1000045543
102 1000040042 1000045664
102 1000045543 1000045664
如果我理解正确,您需要在顺序不重要的逗号分隔字符串中获取值对的所有组合,不包括相同的值对,如 (1,1)、(2,2)、等等
第一步是将字符串转换为行和 select 行号以及值 -
SELECT ROWNUM AS r,
REGEXP_SUBSTR (col_A,
'(.*?)(,|$)',
1,
LEVEL,
NULL,
1)
val
FROM my_table
CONNECT BY LEVEL <= REGEXP_COUNT (COL_A, ',') + 1;
然后与自身进行交叉连接。然而,这会给你相同的一对两次。所以像 {(1,1), (1,2), (1,3), (2,1), (2,2), (2,3), (3,1), (3,2), (3,3)}
。为了消除重复项并以您想要的方式检索行 - 确保第二个 table 的行号大于第一个。这样你会得到 - {(1,2),(1,3),(2,3)}。
所以最终查询看起来像 -
WITH my_table
AS (SELECT '1000016932,1000020056,1000020100,1000020144,1000020243'
AS col_A
FROM DUAL),
vals
AS ( SELECT ROWNUM AS r,
REGEXP_SUBSTR (col_A,
'(.*?)(,|$)',
1,
LEVEL,
NULL,
1)
val
FROM my_table
CONNECT BY LEVEL <= REGEXP_COUNT (COL_A, ',') + 1)
SELECT v_a.val AS col_B, v_B.val AS col_C
FROM vals v_A
CROSS JOIN vals v_B
WHERE v_B.val > v_A.val;
编辑:
因为可能有多行,所以最好有某种 ID 列,您可以使用它来将行连接在一起。所以在这个例子中 -
ID COL_A
1 1,2,3,4
2 5,6,7
您唯一需要做的就是在拆分逗号分隔的字符串时根据 ID select 唯一行。
WITH my_table
AS (SELECT 1 AS id, '1,2,3,4' AS col_A FROM DUAL
UNION ALL
SELECT 2, '5,6,7' FROM DUAL),
vals
AS ( SELECT DISTINCT id,
REGEXP_SUBSTR (col_A,
'(.*?)(,|$)',
1,
LEVEL,
NULL,
1)
val
FROM my_table
CONNECT BY LEVEL <= REGEXP_COUNT (COL_A, ',') + 1)
SELECT v_a.val AS col_B, v_B.val AS col_C
FROM vals v_A
JOIN vals v_B ON v_A.id = v_B.id
WHERE v_B.val > v_A.val;
编辑 2:
我意识到我在比较实际值,这是不正确的。它会强制所有值都是整数。这是一个允许整数或字符串的查询。
WITH my_table
AS (SELECT 1 AS id, '1,2,3,4' AS col_A FROM DUAL
UNION ALL
SELECT 2, '5,6,7' FROM DUAL
UNION ALL
SELECT 3, 'a,b,c' FROM DUAL),
vals
AS ( SELECT DISTINCT id,
REGEXP_SUBSTR (col_A,
'(.*?)(,|$)',
1,
LEVEL,
NULL,
1)
val
FROM my_table
CONNECT BY LEVEL <= REGEXP_COUNT (COL_A, ',') + 1
ORDER BY id, val),
vals_r AS (SELECT ROWNUM AS r, vals.* FROM vals)
SELECT v_a.val AS col_B, v_B.val AS col_C
FROM vals_r v_A
JOIN vals_r v_B ON v_A.id = v_B.id
WHERE v_B.r > v_A.r;
I wrote a new query which gets me a listAgg result such as the below
...
Now what i want with that results is as follows
您想退后一步,因为这看起来像 XY problem。如果您有行中的数据,然后对其进行聚合,然后想将其再次散布到行中,那么您可以通过不首先进行聚合来更有效地进行操作。
假设您有 table:
CREATE TABLE table_name ( id, value ) AS
SELECT 1, 1 FROM DUAL UNION ALL
SELECT 1, 2 FROM DUAL UNION ALL
SELECT 2, 3 FROM DUAL UNION ALL
SELECT 2, 4 FROM DUAL UNION ALL
SELECT 2, 5 FROM DUAL;
现在您可以聚合它了:
SELECT id,
LISTAGG( value, ',' ) WITHIN GROUP ( ORDER BY value ) AS "VALUES"
FROM table_name
GROUP BY id;
这会给这个:
ID VALUES
---------- ------
1 1,2
2 3,4,5
然后再次开始将值拆分成行...
但是通过只执行自连接而不首先聚合来获得所有组合要简单得多:
SELECT a.id,
a.value AS value1,
b.value AS value2
FROM table_name a
INNER JOIN table_name b
ON ( a.id = b.id AND a.value < b.value )
这会给你输出:
ID VALUE1 VALUE2
---------- ---------- ----------
1 1 2
2 3 4
2 4 5
2 3 5
我写了一个新的查询,得到了如下所示的 listAgg 结果
</p>
<pre><code> Col A
1000016932,1000020056,1000020100,1000020144,1000020243
现在我想要的结果如下
col C col D
1000016932 1000020056
1000016932 1000020100
1000016932 1000020144
1000016932 1000020243
1000020056 1000020100
1000020056 1000020144 ...and so on
请注意,我无法对关卡进行硬编码,因为每个字符串的长度可以是任意的
with table_1 (colA) as (
select '1000016932,1000020056,1000020100,1000020144,1000020243' from dual
),
prep (lvl, token) as (
select level, regexp_substr(colA, '[^,]+', 1, level) from table_1
connect by level <= regexp_count(colA, ',') + 1
and colA = prior colA
and prior sys_guid() is not null
)
select p1.token as token_1, p2.token as token_2
from prep p1 join prep p2 on p1.lvl < p2.lvl;
这假设逗号之间没有空值(您没有两个连续的逗号,它们之间没有任何内容,在序列中标记 "null")。
结果:
TOKEN_1 TOKEN_2
---------- ----------
1000016932 1000020056
1000016932 1000020100
1000016932 1000020144
1000016932 1000020243
1000020056 1000020100
1000020056 1000020144
1000020056 1000020243
1000020100 1000020144
1000020100 1000020243
1000020144 1000020243
允许输入多行 table(假设在初始 table 中有一个 row_id 列):
with table_1 (row_id, colA) as (
select 101, '1000016932,1000020056,1000020100,1000020144,1000020243' from dual union all
select 102, '1000040042,1000045543,1000045664' from dual
),
prep (lvl, row_id, token) as (
select level, row_id, regexp_substr(colA, '[^,]+', 1, level) from table_1
connect by level <= regexp_count(colA, ',') + 1
and row_id = prior row_id
and prior sys_guid() is not null
)
select p1.row_id, p1.token as token_1, p2.token as token_2
from prep p1 join prep p2 on p1.row_id = p2.row_id and p1.lvl < p2.lvl
order by row_id, token_1;
结果:
ROW_ID TOKEN_1 TOKEN_2
---------- ---------- ----------
101 1000016932 1000020144
101 1000016932 1000020056
101 1000016932 1000020100
101 1000016932 1000020243
101 1000020056 1000020243
101 1000020056 1000020100
101 1000020056 1000020144
101 1000020100 1000020243
101 1000020100 1000020144
101 1000020144 1000020243
102 1000040042 1000045543
102 1000040042 1000045664
102 1000045543 1000045664
如果我理解正确,您需要在顺序不重要的逗号分隔字符串中获取值对的所有组合,不包括相同的值对,如 (1,1)、(2,2)、等等
第一步是将字符串转换为行和 select 行号以及值 -
SELECT ROWNUM AS r,
REGEXP_SUBSTR (col_A,
'(.*?)(,|$)',
1,
LEVEL,
NULL,
1)
val
FROM my_table
CONNECT BY LEVEL <= REGEXP_COUNT (COL_A, ',') + 1;
然后与自身进行交叉连接。然而,这会给你相同的一对两次。所以像 {(1,1), (1,2), (1,3), (2,1), (2,2), (2,3), (3,1), (3,2), (3,3)}
。为了消除重复项并以您想要的方式检索行 - 确保第二个 table 的行号大于第一个。这样你会得到 - {(1,2),(1,3),(2,3)}。
所以最终查询看起来像 -
WITH my_table
AS (SELECT '1000016932,1000020056,1000020100,1000020144,1000020243'
AS col_A
FROM DUAL),
vals
AS ( SELECT ROWNUM AS r,
REGEXP_SUBSTR (col_A,
'(.*?)(,|$)',
1,
LEVEL,
NULL,
1)
val
FROM my_table
CONNECT BY LEVEL <= REGEXP_COUNT (COL_A, ',') + 1)
SELECT v_a.val AS col_B, v_B.val AS col_C
FROM vals v_A
CROSS JOIN vals v_B
WHERE v_B.val > v_A.val;
编辑:
因为可能有多行,所以最好有某种 ID 列,您可以使用它来将行连接在一起。所以在这个例子中 -
ID COL_A
1 1,2,3,4
2 5,6,7
您唯一需要做的就是在拆分逗号分隔的字符串时根据 ID select 唯一行。
WITH my_table
AS (SELECT 1 AS id, '1,2,3,4' AS col_A FROM DUAL
UNION ALL
SELECT 2, '5,6,7' FROM DUAL),
vals
AS ( SELECT DISTINCT id,
REGEXP_SUBSTR (col_A,
'(.*?)(,|$)',
1,
LEVEL,
NULL,
1)
val
FROM my_table
CONNECT BY LEVEL <= REGEXP_COUNT (COL_A, ',') + 1)
SELECT v_a.val AS col_B, v_B.val AS col_C
FROM vals v_A
JOIN vals v_B ON v_A.id = v_B.id
WHERE v_B.val > v_A.val;
编辑 2:
我意识到我在比较实际值,这是不正确的。它会强制所有值都是整数。这是一个允许整数或字符串的查询。
WITH my_table
AS (SELECT 1 AS id, '1,2,3,4' AS col_A FROM DUAL
UNION ALL
SELECT 2, '5,6,7' FROM DUAL
UNION ALL
SELECT 3, 'a,b,c' FROM DUAL),
vals
AS ( SELECT DISTINCT id,
REGEXP_SUBSTR (col_A,
'(.*?)(,|$)',
1,
LEVEL,
NULL,
1)
val
FROM my_table
CONNECT BY LEVEL <= REGEXP_COUNT (COL_A, ',') + 1
ORDER BY id, val),
vals_r AS (SELECT ROWNUM AS r, vals.* FROM vals)
SELECT v_a.val AS col_B, v_B.val AS col_C
FROM vals_r v_A
JOIN vals_r v_B ON v_A.id = v_B.id
WHERE v_B.r > v_A.r;
I wrote a new query which gets me a listAgg result such as the below
...
Now what i want with that results is as follows
您想退后一步,因为这看起来像 XY problem。如果您有行中的数据,然后对其进行聚合,然后想将其再次散布到行中,那么您可以通过不首先进行聚合来更有效地进行操作。
假设您有 table:
CREATE TABLE table_name ( id, value ) AS
SELECT 1, 1 FROM DUAL UNION ALL
SELECT 1, 2 FROM DUAL UNION ALL
SELECT 2, 3 FROM DUAL UNION ALL
SELECT 2, 4 FROM DUAL UNION ALL
SELECT 2, 5 FROM DUAL;
现在您可以聚合它了:
SELECT id,
LISTAGG( value, ',' ) WITHIN GROUP ( ORDER BY value ) AS "VALUES"
FROM table_name
GROUP BY id;
这会给这个:
ID VALUES
---------- ------
1 1,2
2 3,4,5
然后再次开始将值拆分成行...
但是通过只执行自连接而不首先聚合来获得所有组合要简单得多:
SELECT a.id,
a.value AS value1,
b.value AS value2
FROM table_name a
INNER JOIN table_name b
ON ( a.id = b.id AND a.value < b.value )
这会给你输出:
ID VALUE1 VALUE2
---------- ---------- ----------
1 1 2
2 3 4
2 4 5
2 3 5