在 Oracle 中,如何将数据透视到多个列中?
In Oracle, how do I pivot data into multiple columns?
我有以下测试 table,我需要得到以下结果 table。我试过一个枢轴,但它似乎只在你有一个描述符和一个值时才起作用。在这种情况下,我们只有一个值。有些 site_id 可能只有 1 个值,但有些最多可以有 3 个其他 site_id_alt。有超过 2800 个独特的 site_id,因此将它们手动编码为枢轴函数是一种选择。
我在想某种分区可能会起作用,但不知道从哪里开始。
drop table site_id_cd_test;
create table site_id_cd_test
(
site_id_alt VARCHAR(30),
site_id VARCHAR(30)
);
insert into site_id_cd_test values('1A1-071', '1A1-071');
insert into site_id_cd_test values('1A1-071O', '1A1-071');
insert into site_id_cd_test values('030256', '1A1-071');
insert into site_id_cd_test values('04268384', '1A1-071');
insert into site_id_cd_test values('04268383', '1A1-072');
drop table site_id_cd_result;
create table site_id_cd_result
(
site_id VARCHAR(30),
site_id_alt_01 VARCHAR(30),
site_id_alt_02 VARCHAR(30),
site_id_alt_03 VARCHAR(30)
);
insert into site_id_cd_result values('1A1-071', '1A1-071O', '030256', '04268384' );
insert into site_id_cd_result values('1A1-072', '04268383', NULL, NULL);
编辑9_19_19:
在@MT0 的反馈之后,我们完成了大约 90% 的工作(它很好地以行号为中心)。当 site_id 与 site_id_alt 匹配时,它会将它们添加为新行(如下面的示例 table 所示)。理想情况下,我们希望匹配的 site_id 和 site_id_alt 与其他值位于同一行。
下面例子的总结tables:
site_id_cd_result 是结果 table 当 运行 行号旋转
site_id_cd_result_02 是使用 site_id <> site_id_alt
时的结果 table
drop table site_id_cd_result;
create table site_id_cd_result
(
site_id VARCHAR(30),
site_id_alt_01 VARCHAR(30),
site_id_alt_02 VARCHAR(30),
site_id_alt_03 VARCHAR(30),
site_id_alt_04 VARCHAR(30)
);
insert into site_id_cd_result values('1A1-071', '1A1-071', NULL, NULL, NULL);
insert into site_id_cd_result values('1A1-071', NULL, '040777', '04253626', '1A1-071O');
insert into site_id_cd_result values('1A1-072', '04268383', '123546', NULL, NULL);
drop table site_id_cd_result_02;
create table site_id_cd_result_02
(
site_id VARCHAR(30),
site_id_alt_01 VARCHAR(30),
site_id_alt_02 VARCHAR(30),
site_id_alt_03 VARCHAR(30),
site_id_alt_04 VARCHAR(30)
);
insert into site_id_cd_result_02 values('1A1-071', '040777', '04253626', '1A1-071O', NULL);
insert into site_id_cd_result_02 values('1A1-072', '04268383', '123546', NULL, NULL);
好吧,这有点笨拙,但它的优点是工作:
WITH cteAgg AS (SELECT SITE_ID,
LISTAGG(SITE_ID_ALT, '|') WITHIN GROUP (ORDER BY SITE_ID_ALT) ALTS
FROM SITE_ID_CD_TEST
GROUP BY SITE_ID)
SELECT SITE_ID,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 1) AS ALT1,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 2) AS ALT2,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 3) AS ALT3,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 4) AS ALT4,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 5) AS ALT5,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 6) AS ALT6,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 7) AS ALT7,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 8) AS ALT8,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 9) AS ALT9,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 10) AS ALT10
FROM cteAgg;
基本上,您将所有替代项 LISTAGG 成一个大字符串,然后将子字符串分解为单独的字段。限制是替代项+分隔符的总长度不能超过您选择的版本中 VARCHAR2 的最大允许大小。尽管如此,我还是见过(并写过 :-) 更糟的。
使用 ROW_NUMBER()
分析函数为每个 SITE_ID_ALT
提供每个 SITE_ID
的列号,然后您可以 PIVOT
:
查询:
INSERT INTO site_id_cd_result ( site_id, site_id_alt_01, site_id_alt_02, site_id_alt_03 )
SELECT site_id,
site_id_alt_01,
site_id_alt_02,
site_id_alt_03
FROM (
SELECT t.*,
ROW_NUMBER() OVER ( PARTITION BY site_id ORDER BY ROWNUM ) AS rn
FROM site_id_cd_test t
-- WHERE site_id <> site_id_alt -- If you don't want the matching value
)
PIVOT ( MAX( site_id_alt ) FOR rn IN (
1 AS site_id_alt_01,
2 AS site_id_alt_02,
3 AS site_id_alt_03
) )
结果:
SELECT * FROM site_id_cd_result;
SITE_ID | SITE_ID_ALT_01 | SITE_ID_ALT_02 | SITE_ID_ALT_03
:------ | :------------- | :------------- | :-------------
1A1-071 | 1A1-071 | 1A1-071O | 030256
1A1-072 | 04268383 | null | null
db<>fiddle here
终于明白了!一位同事帮助了我,我们想出了在 site_id 到 select 值上使用 MAX 函数和 GROUP BY,这样只有一行数据在开头没有空值。可能有另一种方法可以做到这一点,但目前有效。这确实需要制作两张表,但我无法将它们压缩成一张。
DROP TABLE site_id_cd_result;
CREATE TABLE site_id_cd_result
(
site_id VARCHAR(30),
site_id_alt_01 VARCHAR(30),
site_id_alt_02 VARCHAR(30),
site_id_alt_03 VARCHAR(30),
site_id_alt_04 VARCHAR(30),
site_id_alt_05 VARCHAR(30)
);
INSERT INTO site_id_cd_result (
site_id, site_id_alt_01, site_id_alt_02, site_id_alt_03, site_id_alt_04, site_id_alt_05)
SELECT site_id,
site_id_alt_01,
site_id_alt_02,
site_id_alt_03,
site_id_alt_04,
site_id_alt_05
FROM (
SELECT t.*,
ROW_NUMBER() OVER ( PARTITION BY site_id ORDER BY ROWNUM ) AS rn
FROM site_id_cd t
)
PIVOT ( MAX( site_id_alt ) FOR rn IN (
1 AS site_id_alt_01,
2 AS site_id_alt_02,
3 AS site_id_alt_03,
4 AS site_id_alt_04,
5 AS site_id_alt_05,
6 AS site_id_alt_06
) )
;
DROP TABLE site_id_cd_result_02;
CREATE TABLE site_id_cd_result_02 AS
SELECT site_id, MAX(site_id_alt_01) AS site_id_alt_01, MAX(site_id_alt_02) AS site_id_alt_02, MAX(site_id_alt_03) AS site_id_alt_03,
MAX(site_id_alt_04) AS site_id_alt_04, MAX(site_id_alt_05) AS site_id_alt_05
FROM site_id_cd_result
GROUP BY site_id;
我有以下测试 table,我需要得到以下结果 table。我试过一个枢轴,但它似乎只在你有一个描述符和一个值时才起作用。在这种情况下,我们只有一个值。有些 site_id 可能只有 1 个值,但有些最多可以有 3 个其他 site_id_alt。有超过 2800 个独特的 site_id,因此将它们手动编码为枢轴函数是一种选择。
我在想某种分区可能会起作用,但不知道从哪里开始。
drop table site_id_cd_test;
create table site_id_cd_test
(
site_id_alt VARCHAR(30),
site_id VARCHAR(30)
);
insert into site_id_cd_test values('1A1-071', '1A1-071');
insert into site_id_cd_test values('1A1-071O', '1A1-071');
insert into site_id_cd_test values('030256', '1A1-071');
insert into site_id_cd_test values('04268384', '1A1-071');
insert into site_id_cd_test values('04268383', '1A1-072');
drop table site_id_cd_result;
create table site_id_cd_result
(
site_id VARCHAR(30),
site_id_alt_01 VARCHAR(30),
site_id_alt_02 VARCHAR(30),
site_id_alt_03 VARCHAR(30)
);
insert into site_id_cd_result values('1A1-071', '1A1-071O', '030256', '04268384' );
insert into site_id_cd_result values('1A1-072', '04268383', NULL, NULL);
编辑9_19_19:
在@MT0 的反馈之后,我们完成了大约 90% 的工作(它很好地以行号为中心)。当 site_id 与 site_id_alt 匹配时,它会将它们添加为新行(如下面的示例 table 所示)。理想情况下,我们希望匹配的 site_id 和 site_id_alt 与其他值位于同一行。
下面例子的总结tables:
site_id_cd_result 是结果 table 当 运行 行号旋转
site_id_cd_result_02 是使用 site_id <> site_id_alt
时的结果 tabledrop table site_id_cd_result;
create table site_id_cd_result
(
site_id VARCHAR(30),
site_id_alt_01 VARCHAR(30),
site_id_alt_02 VARCHAR(30),
site_id_alt_03 VARCHAR(30),
site_id_alt_04 VARCHAR(30)
);
insert into site_id_cd_result values('1A1-071', '1A1-071', NULL, NULL, NULL);
insert into site_id_cd_result values('1A1-071', NULL, '040777', '04253626', '1A1-071O');
insert into site_id_cd_result values('1A1-072', '04268383', '123546', NULL, NULL);
drop table site_id_cd_result_02;
create table site_id_cd_result_02
(
site_id VARCHAR(30),
site_id_alt_01 VARCHAR(30),
site_id_alt_02 VARCHAR(30),
site_id_alt_03 VARCHAR(30),
site_id_alt_04 VARCHAR(30)
);
insert into site_id_cd_result_02 values('1A1-071', '040777', '04253626', '1A1-071O', NULL);
insert into site_id_cd_result_02 values('1A1-072', '04268383', '123546', NULL, NULL);
好吧,这有点笨拙,但它的优点是工作:
WITH cteAgg AS (SELECT SITE_ID,
LISTAGG(SITE_ID_ALT, '|') WITHIN GROUP (ORDER BY SITE_ID_ALT) ALTS
FROM SITE_ID_CD_TEST
GROUP BY SITE_ID)
SELECT SITE_ID,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 1) AS ALT1,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 2) AS ALT2,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 3) AS ALT3,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 4) AS ALT4,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 5) AS ALT5,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 6) AS ALT6,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 7) AS ALT7,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 8) AS ALT8,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 9) AS ALT9,
REGEXP_SUBSTR(ALTS, '[^|]+', 1, 10) AS ALT10
FROM cteAgg;
基本上,您将所有替代项 LISTAGG 成一个大字符串,然后将子字符串分解为单独的字段。限制是替代项+分隔符的总长度不能超过您选择的版本中 VARCHAR2 的最大允许大小。尽管如此,我还是见过(并写过 :-) 更糟的。
使用 ROW_NUMBER()
分析函数为每个 SITE_ID_ALT
提供每个 SITE_ID
的列号,然后您可以 PIVOT
:
查询:
INSERT INTO site_id_cd_result ( site_id, site_id_alt_01, site_id_alt_02, site_id_alt_03 )
SELECT site_id,
site_id_alt_01,
site_id_alt_02,
site_id_alt_03
FROM (
SELECT t.*,
ROW_NUMBER() OVER ( PARTITION BY site_id ORDER BY ROWNUM ) AS rn
FROM site_id_cd_test t
-- WHERE site_id <> site_id_alt -- If you don't want the matching value
)
PIVOT ( MAX( site_id_alt ) FOR rn IN (
1 AS site_id_alt_01,
2 AS site_id_alt_02,
3 AS site_id_alt_03
) )
结果:
SELECT * FROM site_id_cd_result;
SITE_ID | SITE_ID_ALT_01 | SITE_ID_ALT_02 | SITE_ID_ALT_03 :------ | :------------- | :------------- | :------------- 1A1-071 | 1A1-071 | 1A1-071O | 030256 1A1-072 | 04268383 | null | null
db<>fiddle here
终于明白了!一位同事帮助了我,我们想出了在 site_id 到 select 值上使用 MAX 函数和 GROUP BY,这样只有一行数据在开头没有空值。可能有另一种方法可以做到这一点,但目前有效。这确实需要制作两张表,但我无法将它们压缩成一张。
DROP TABLE site_id_cd_result;
CREATE TABLE site_id_cd_result
(
site_id VARCHAR(30),
site_id_alt_01 VARCHAR(30),
site_id_alt_02 VARCHAR(30),
site_id_alt_03 VARCHAR(30),
site_id_alt_04 VARCHAR(30),
site_id_alt_05 VARCHAR(30)
);
INSERT INTO site_id_cd_result (
site_id, site_id_alt_01, site_id_alt_02, site_id_alt_03, site_id_alt_04, site_id_alt_05)
SELECT site_id,
site_id_alt_01,
site_id_alt_02,
site_id_alt_03,
site_id_alt_04,
site_id_alt_05
FROM (
SELECT t.*,
ROW_NUMBER() OVER ( PARTITION BY site_id ORDER BY ROWNUM ) AS rn
FROM site_id_cd t
)
PIVOT ( MAX( site_id_alt ) FOR rn IN (
1 AS site_id_alt_01,
2 AS site_id_alt_02,
3 AS site_id_alt_03,
4 AS site_id_alt_04,
5 AS site_id_alt_05,
6 AS site_id_alt_06
) )
;
DROP TABLE site_id_cd_result_02;
CREATE TABLE site_id_cd_result_02 AS
SELECT site_id, MAX(site_id_alt_01) AS site_id_alt_01, MAX(site_id_alt_02) AS site_id_alt_02, MAX(site_id_alt_03) AS site_id_alt_03,
MAX(site_id_alt_04) AS site_id_alt_04, MAX(site_id_alt_05) AS site_id_alt_05
FROM site_id_cd_result
GROUP BY site_id;