使用 SQL 在 Table 中拆分记录
Split Records in Table using SQL
我有一个 table 有以下记录
PREFIX
BEGIN
END
DIV
INDUSTRY
1AB
00001
99999
LPSD
RI
1AB
00527
00528
MHUG
RI
我想拆分如下
PREFIX
BEGIN
END
DIV
INDUSTRY
1AB
00001
99999
LPSD
RI
1AB
00527
00528
MHUG
RI
1AB
00529
99999
LPSD
RI
如果你看到因为我们有一条前缀相同但开始和结束不同的记录,我想将第一条记录分成2条记录,一条从00001到00526,一条从00529到99999。
是否可以使用 SQL 查询
执行相同的操作
我认为这可行(但需要更大的数据集才能正确测试):
SELECT prefix,
"BEGIN",
"END",
CASE type
WHEN 1
THEN div
ELSE LAG(CASE type WHEN 1 THEN div END) IGNORE NULLS
OVER (PARTITION BY prefix, industry, grp ORDER BY "BEGIN")
END AS div,
industry
FROM (
SELECT prefix,
div,
industry,
value AS "BEGIN",
type,
SUM(type) OVER (PARTITION BY prefix, industry ORDER BY value) AS grp,
LEAD(value) OVER (PARTITION BY prefix, industry ORDER BY value) AS "END"
FROM table_name
UNPIVOT (value FOR type IN ("BEGIN" AS 1, "END" AS -1))
)
WHERE grp > 0
ORDER BY prefix, industry, "BEGIN";
其中,对于示例数据:
CREATE TABLE table_name (prefix, "BEGIN", "END", div, industry) AS
SELECT '1AB', '00001', '99999', 'LPSD', 'RI' FROM DUAL UNION ALL
SELECT '1AB', '00527', '00528', 'MHUG', 'RI' FROM DUAL;
输出:
PREFIX
BEGIN
END
DIV
INDUSTRY
1AB
00001
00527
LPSD
RI
1AB
00527
00528
MHUG
RI
1AB
00528
99999
LPSD
RI
db<>fiddle here
首先,用保留字(如 Begin、End ...)命名列不是一个好习惯。这可以使用像这样的 MODEL 子句来解决...
WITH
tbl AS
( Select '1AB' "PRFX", '00001' "BGN", '99999' "ND", 'LPSD' "DV", 'RI' "IND" From Dual UNION ALL
Select '1AB' "PRFX", '00527' "BGN", '00528' "ND", 'MHUG' "DV", 'RI' "IND" From Dual
)
SELECT DISTINCT
m.*,
t.DV "DV",
t.IND "IND"
FROM
(
SELECT
INDX,
PRFX, BGN, ND
FROM
( SELECT
PRFX "PRFX",
LISTAGG(BGN, ',') WITHIN GROUP (ORDER BY PRFX, BGN) "BGN",
LISTAGG(ND, ',') WITHIN GROUP (ORDER BY PRFX, ND) "ND"
FROM
tbl
GROUP BY
PRFX
)
MODEL
DIMENSION BY(0 as INDX)
MEASURES (PRFX, BGN, ND)
RULES ITERATE(3)
( PRFX[ITERATION_NUMBER+1] = PRFX[0],
BGN[ITERATION_NUMBER+1] = Nvl(SubStr(REPLACE(BGN[0], ',', ''), (ITERATION_NUMBER*5) + 1, 5), LPAD(To_Number(SubStr(REPLACE(ND[0], ',', ''), ((ITERATION_NUMBER-2)*5) + 1, 5)) + 1, 5, '0')),
ND[ITERATION_NUMBER+1] = CASE
WHEN SubStr(REPLACE(BGN[0], ',', ''), ((ITERATION_NUMBER+1)*5) + 1, 5) < SubStr(REPLACE(ND[0], ',', ''), (ITERATION_NUMBER*5) + 1, 5) THEN
LPAD(To_Number(SubStr(REPLACE(BGN[0], ',', ''), ((ITERATION_NUMBER+1)*5) + 1, 5)) - 1, 5, '0')
ELSE
SubStr(REPLACE(ND[0], ',', ''), ((ITERATION_NUMBER-1)*5) + 1, 5)
END
)
) m
LEFT JOIN
( SELECT
PRFX, DV,
IND,
Max(ND) OVER(PARTITION BY PRFX, DV ORDER BY PRFX, DV) "MAX_ND",
Min(BGN) OVER(PARTITION BY PRFX, DV ORDER BY PRFX, DV) "MIN_BGN"
FROM
tbl
) t ON (t.PRFX = m.PRFX And (t.MAX_ND = m.ND OR t.MIN_BGN = m.BGN))
WHERE
m.INDX > 0 And ND Is Not Null
ORDER BY
m.INDX
--
-- Result
-- INDX PRFX BGN ND DV IND
-- 1 1AB 00001 00526 LPSD RI
-- 2 1AB 00527 00528 MHUG RI
-- 3 1AB 00529 99999 LPSD RI
希望对您有所帮助。问候...
我有一个 table 有以下记录
PREFIX | BEGIN | END | DIV | INDUSTRY |
---|---|---|---|---|
1AB | 00001 | 99999 | LPSD | RI |
1AB | 00527 | 00528 | MHUG | RI |
我想拆分如下
PREFIX | BEGIN | END | DIV | INDUSTRY |
---|---|---|---|---|
1AB | 00001 | 99999 | LPSD | RI |
1AB | 00527 | 00528 | MHUG | RI |
1AB | 00529 | 99999 | LPSD | RI |
如果你看到因为我们有一条前缀相同但开始和结束不同的记录,我想将第一条记录分成2条记录,一条从00001到00526,一条从00529到99999。
是否可以使用 SQL 查询
执行相同的操作我认为这可行(但需要更大的数据集才能正确测试):
SELECT prefix,
"BEGIN",
"END",
CASE type
WHEN 1
THEN div
ELSE LAG(CASE type WHEN 1 THEN div END) IGNORE NULLS
OVER (PARTITION BY prefix, industry, grp ORDER BY "BEGIN")
END AS div,
industry
FROM (
SELECT prefix,
div,
industry,
value AS "BEGIN",
type,
SUM(type) OVER (PARTITION BY prefix, industry ORDER BY value) AS grp,
LEAD(value) OVER (PARTITION BY prefix, industry ORDER BY value) AS "END"
FROM table_name
UNPIVOT (value FOR type IN ("BEGIN" AS 1, "END" AS -1))
)
WHERE grp > 0
ORDER BY prefix, industry, "BEGIN";
其中,对于示例数据:
CREATE TABLE table_name (prefix, "BEGIN", "END", div, industry) AS
SELECT '1AB', '00001', '99999', 'LPSD', 'RI' FROM DUAL UNION ALL
SELECT '1AB', '00527', '00528', 'MHUG', 'RI' FROM DUAL;
输出:
PREFIX BEGIN END DIV INDUSTRY 1AB 00001 00527 LPSD RI 1AB 00527 00528 MHUG RI 1AB 00528 99999 LPSD RI
db<>fiddle here
首先,用保留字(如 Begin、End ...)命名列不是一个好习惯。这可以使用像这样的 MODEL 子句来解决...
WITH
tbl AS
( Select '1AB' "PRFX", '00001' "BGN", '99999' "ND", 'LPSD' "DV", 'RI' "IND" From Dual UNION ALL
Select '1AB' "PRFX", '00527' "BGN", '00528' "ND", 'MHUG' "DV", 'RI' "IND" From Dual
)
SELECT DISTINCT
m.*,
t.DV "DV",
t.IND "IND"
FROM
(
SELECT
INDX,
PRFX, BGN, ND
FROM
( SELECT
PRFX "PRFX",
LISTAGG(BGN, ',') WITHIN GROUP (ORDER BY PRFX, BGN) "BGN",
LISTAGG(ND, ',') WITHIN GROUP (ORDER BY PRFX, ND) "ND"
FROM
tbl
GROUP BY
PRFX
)
MODEL
DIMENSION BY(0 as INDX)
MEASURES (PRFX, BGN, ND)
RULES ITERATE(3)
( PRFX[ITERATION_NUMBER+1] = PRFX[0],
BGN[ITERATION_NUMBER+1] = Nvl(SubStr(REPLACE(BGN[0], ',', ''), (ITERATION_NUMBER*5) + 1, 5), LPAD(To_Number(SubStr(REPLACE(ND[0], ',', ''), ((ITERATION_NUMBER-2)*5) + 1, 5)) + 1, 5, '0')),
ND[ITERATION_NUMBER+1] = CASE
WHEN SubStr(REPLACE(BGN[0], ',', ''), ((ITERATION_NUMBER+1)*5) + 1, 5) < SubStr(REPLACE(ND[0], ',', ''), (ITERATION_NUMBER*5) + 1, 5) THEN
LPAD(To_Number(SubStr(REPLACE(BGN[0], ',', ''), ((ITERATION_NUMBER+1)*5) + 1, 5)) - 1, 5, '0')
ELSE
SubStr(REPLACE(ND[0], ',', ''), ((ITERATION_NUMBER-1)*5) + 1, 5)
END
)
) m
LEFT JOIN
( SELECT
PRFX, DV,
IND,
Max(ND) OVER(PARTITION BY PRFX, DV ORDER BY PRFX, DV) "MAX_ND",
Min(BGN) OVER(PARTITION BY PRFX, DV ORDER BY PRFX, DV) "MIN_BGN"
FROM
tbl
) t ON (t.PRFX = m.PRFX And (t.MAX_ND = m.ND OR t.MIN_BGN = m.BGN))
WHERE
m.INDX > 0 And ND Is Not Null
ORDER BY
m.INDX
--
-- Result
-- INDX PRFX BGN ND DV IND
-- 1 1AB 00001 00526 LPSD RI
-- 2 1AB 00527 00528 MHUG RI
-- 3 1AB 00529 99999 LPSD RI
希望对您有所帮助。问候...