将一系列 0 和 1 转换为打印样式的页面列表
Convert a sequence of 0s and 1s to a print-style page list
我需要将 0 和 1 的字符串转换为代表 1 的整数序列,类似于打印对话框中的页面 selection 序列。
例如'0011001110101' -> '3-4,7-9,11,13'
是否可以在单个 SQL select 中完成此操作(在 Oracle 11g 中)?
我可以通过以下方式获得页码的单独列表:
with data as (
select 'K1' KEY, '0011001110101' VAL from dual
union select 'K2', '0101000110' from dual
union select 'K3', '011100011010' from dual
)
select
KEY,
listagg(ords.column_value, ',') within group (
order by ords.column_value
) PAGES
from
data
cross join (
table(cast(multiset(
select level
from dual
connect by level <= length(VAL)
) as sys.OdciNumberList)) ords
)
where
substr(VAL, ords.column_value, 1) = '1'
group by
KEY
但这并没有进行分组(例如 returns 第一个值是“3,4,7,8,9,11,13”)。
如果我可以在每次值更改时分配一个组号,那么我可以使用分析函数来获取每个组的最小值和最大值。 IE。如果我可以生成以下内容,那么我将被设置:
Key Page Val Group
K1 1 0 1
K1 2 0 1
K1 3 1 2
K1 4 1 2
K1 5 0 3
K1 6 0 3
K1 7 1 4
K1 8 1 4
K1 9 1 4
K1 10 0 5
K1 11 1 6
K1 12 0 7
K1 13 1 8
但我坚持这一点。
有人有任何想法或其他方法吗?
首先让我们来平衡它:
select regexp_instr('0011001110101', '1+', 1, LEVEL) istr,
regexp_substr('0011001110101', '1+', 1, LEVEL) strlen
FROM dual
CONNECT BY regexp_substr('0011001110101', '1+', 1, LEVEL) is not null
然后剩下的就很简单了 listagg :
with data as
(
select 'K1' KEY, '0011001110101' VAL from dual
union select 'K2', '0101000110' from dual
union select 'K3', '011100011010' from dual
)
SELECT key,
(SELECT listagg(CASE
WHEN length(regexp_substr(val, '1+', 1, LEVEL)) = 1 THEN
to_char(regexp_instr(val, '1+', 1, LEVEL))
ELSE
regexp_instr(val, '1+', 1, LEVEL) || '-' ||
to_char(regexp_instr(val, '1+', 1, LEVEL) +
length(regexp_substr(val, '1+', 1, LEVEL)) - 1)
END,
' ,') within GROUP(ORDER BY regexp_instr(val, '1+', 1, LEVEL))
from dual
CONNECT BY regexp_substr(data.val, '1+', 1, LEVEL) IS NOT NULL) val
FROM data
使用不带正则表达式的递归子查询分解子句:
Oracle 设置:
CREATE TABLE data ( key, val ) AS
SELECT 'K1', '0011001110101' FROM DUAL UNION ALL
SELECT 'K2', '0101000110' FROM DUAL UNION ALL
SELECT 'K3', '011100011010' FROM DUAL UNION ALL
SELECT 'K4', '000000000000' FROM DUAL UNION ALL
SELECT 'K5', '000000000001' FROM DUAL;
查询:
WITH ranges ( key, val, pos, rng ) AS (
SELECT key,
val,
INSTR( val, '1', 1 ), -- Position of the first 1
NULL
FROM data
UNION ALL
SELECT key,
val,
INSTR( val, '1', INSTR( val, '0', pos ) ), -- Position of the next 1
rng || ',' || CASE
WHEN pos = LENGTH( val ) -- Single 1 at end-of-string
OR pos = INSTR( val, '0', pos ) - 1 -- 1 immediately followed by 0
THEN TO_CHAR( pos )
WHEN INSTR( val, '0', pos ) = 0 -- Multiple 1s until end-of-string
THEN pos || '-' || LENGTH( val )
ELSE pos || '-' || ( INSTR( val, '0', pos ) - 1 ) -- Normal range
END
FROM ranges
WHERE pos > 0
)
SELECT KEY,
VAL,
SUBSTR( rng, 2 ) AS rng -- Strip the leading comma
FROM ranges
WHERE pos = 0 OR val IS NULL
ORDER BY KEY;
输出
KEY VAL RNG
--- ------------- -------------
K1 0011001110101 3-4,7-9,11,13
K2 0101000110 2,4,8-9
K3 011100011010 2-4,8-9,11
K4 000000000000
K5 000000000001 12
这是 Isalamon 解决方案(使用分层查询)的稍微更有效的版本。它的效率稍微高一些,因为我使用单个分层查询而不是多个(在相关子查询中),并且我在内部查询中只计算每个 1 序列的长度一次。 (实际上反正只计算一次,但是函数调用本身有一定的开销。)
此版本还可以正确处理 '00000'
和 NULL
等输入。 Isalamon 的方案没有,而MT0 的方案在输入值为NULL
时并没有return 一行。尚不清楚 NULL
在输入数据中是否甚至可能,如果是,期望的结果是什么;我假设一行应该 returned,还有 page_list NULL
。
此版本的优化器成本为 17,而 Isalamon 的解决方案为 18,MT0 的为 33。然而,优化器成本并没有考虑到与标准字符串函数相比,正则表达式的处理速度要慢得多;如果执行速度很重要,那么绝对应该尝试 MT0 的解决方案,因为它可能会更快。
with data ( key, val ) as (
select 'K1', '0011001110101' from dual union all
select 'K2', '0101000110' from dual union all
select 'K3', '011100011010' from dual union all
select 'K4', '000000000000' from dual union all
select 'K5', '000000000001' from dual union all
select 'K6', null from dual union all
select 'K7', '1111111' from dual union all
select 'K8', '1' from dual
)
-- End of test data (not part of the solution); SQL query begins below this line.
select key, val,
listagg(case when len = 1 then to_char(s_pos)
when len > 1 then to_char(s_pos) || '-' || to_char(s_pos + len - 1)
end, ',') within group (order by lvl) as page_list
from ( select key, level as lvl, val,
regexp_instr(val, '1+', 1, level) as s_pos,
length(regexp_substr(val, '1+', 1, level)) as len
from data
connect by regexp_substr(val, '1+', 1, level) is not null
and prior key = key
and prior sys_guid() is not null
)
group by key, val
order by key
;
输出:
KEY VAL PAGE_LIST
--- ------------- -------------
K1 0011001110101 3-4,7-9,11,13
K2 0101000110 2,4,8-9
K3 011100011010 2-4,8-9,11
K4 000000000000
K5 000000000001 12
K6
K7 1111111 1-7
K8 1 1
我需要将 0 和 1 的字符串转换为代表 1 的整数序列,类似于打印对话框中的页面 selection 序列。
例如'0011001110101' -> '3-4,7-9,11,13'
是否可以在单个 SQL select 中完成此操作(在 Oracle 11g 中)?
我可以通过以下方式获得页码的单独列表:
with data as (
select 'K1' KEY, '0011001110101' VAL from dual
union select 'K2', '0101000110' from dual
union select 'K3', '011100011010' from dual
)
select
KEY,
listagg(ords.column_value, ',') within group (
order by ords.column_value
) PAGES
from
data
cross join (
table(cast(multiset(
select level
from dual
connect by level <= length(VAL)
) as sys.OdciNumberList)) ords
)
where
substr(VAL, ords.column_value, 1) = '1'
group by
KEY
但这并没有进行分组(例如 returns 第一个值是“3,4,7,8,9,11,13”)。
如果我可以在每次值更改时分配一个组号,那么我可以使用分析函数来获取每个组的最小值和最大值。 IE。如果我可以生成以下内容,那么我将被设置:
Key Page Val Group
K1 1 0 1
K1 2 0 1
K1 3 1 2
K1 4 1 2
K1 5 0 3
K1 6 0 3
K1 7 1 4
K1 8 1 4
K1 9 1 4
K1 10 0 5
K1 11 1 6
K1 12 0 7
K1 13 1 8
但我坚持这一点。
有人有任何想法或其他方法吗?
首先让我们来平衡它:
select regexp_instr('0011001110101', '1+', 1, LEVEL) istr,
regexp_substr('0011001110101', '1+', 1, LEVEL) strlen
FROM dual
CONNECT BY regexp_substr('0011001110101', '1+', 1, LEVEL) is not null
然后剩下的就很简单了 listagg :
with data as
(
select 'K1' KEY, '0011001110101' VAL from dual
union select 'K2', '0101000110' from dual
union select 'K3', '011100011010' from dual
)
SELECT key,
(SELECT listagg(CASE
WHEN length(regexp_substr(val, '1+', 1, LEVEL)) = 1 THEN
to_char(regexp_instr(val, '1+', 1, LEVEL))
ELSE
regexp_instr(val, '1+', 1, LEVEL) || '-' ||
to_char(regexp_instr(val, '1+', 1, LEVEL) +
length(regexp_substr(val, '1+', 1, LEVEL)) - 1)
END,
' ,') within GROUP(ORDER BY regexp_instr(val, '1+', 1, LEVEL))
from dual
CONNECT BY regexp_substr(data.val, '1+', 1, LEVEL) IS NOT NULL) val
FROM data
使用不带正则表达式的递归子查询分解子句:
Oracle 设置:
CREATE TABLE data ( key, val ) AS
SELECT 'K1', '0011001110101' FROM DUAL UNION ALL
SELECT 'K2', '0101000110' FROM DUAL UNION ALL
SELECT 'K3', '011100011010' FROM DUAL UNION ALL
SELECT 'K4', '000000000000' FROM DUAL UNION ALL
SELECT 'K5', '000000000001' FROM DUAL;
查询:
WITH ranges ( key, val, pos, rng ) AS (
SELECT key,
val,
INSTR( val, '1', 1 ), -- Position of the first 1
NULL
FROM data
UNION ALL
SELECT key,
val,
INSTR( val, '1', INSTR( val, '0', pos ) ), -- Position of the next 1
rng || ',' || CASE
WHEN pos = LENGTH( val ) -- Single 1 at end-of-string
OR pos = INSTR( val, '0', pos ) - 1 -- 1 immediately followed by 0
THEN TO_CHAR( pos )
WHEN INSTR( val, '0', pos ) = 0 -- Multiple 1s until end-of-string
THEN pos || '-' || LENGTH( val )
ELSE pos || '-' || ( INSTR( val, '0', pos ) - 1 ) -- Normal range
END
FROM ranges
WHERE pos > 0
)
SELECT KEY,
VAL,
SUBSTR( rng, 2 ) AS rng -- Strip the leading comma
FROM ranges
WHERE pos = 0 OR val IS NULL
ORDER BY KEY;
输出
KEY VAL RNG
--- ------------- -------------
K1 0011001110101 3-4,7-9,11,13
K2 0101000110 2,4,8-9
K3 011100011010 2-4,8-9,11
K4 000000000000
K5 000000000001 12
这是 Isalamon 解决方案(使用分层查询)的稍微更有效的版本。它的效率稍微高一些,因为我使用单个分层查询而不是多个(在相关子查询中),并且我在内部查询中只计算每个 1 序列的长度一次。 (实际上反正只计算一次,但是函数调用本身有一定的开销。)
此版本还可以正确处理 '00000'
和 NULL
等输入。 Isalamon 的方案没有,而MT0 的方案在输入值为NULL
时并没有return 一行。尚不清楚 NULL
在输入数据中是否甚至可能,如果是,期望的结果是什么;我假设一行应该 returned,还有 page_list NULL
。
此版本的优化器成本为 17,而 Isalamon 的解决方案为 18,MT0 的为 33。然而,优化器成本并没有考虑到与标准字符串函数相比,正则表达式的处理速度要慢得多;如果执行速度很重要,那么绝对应该尝试 MT0 的解决方案,因为它可能会更快。
with data ( key, val ) as (
select 'K1', '0011001110101' from dual union all
select 'K2', '0101000110' from dual union all
select 'K3', '011100011010' from dual union all
select 'K4', '000000000000' from dual union all
select 'K5', '000000000001' from dual union all
select 'K6', null from dual union all
select 'K7', '1111111' from dual union all
select 'K8', '1' from dual
)
-- End of test data (not part of the solution); SQL query begins below this line.
select key, val,
listagg(case when len = 1 then to_char(s_pos)
when len > 1 then to_char(s_pos) || '-' || to_char(s_pos + len - 1)
end, ',') within group (order by lvl) as page_list
from ( select key, level as lvl, val,
regexp_instr(val, '1+', 1, level) as s_pos,
length(regexp_substr(val, '1+', 1, level)) as len
from data
connect by regexp_substr(val, '1+', 1, level) is not null
and prior key = key
and prior sys_guid() is not null
)
group by key, val
order by key
;
输出:
KEY VAL PAGE_LIST
--- ------------- -------------
K1 0011001110101 3-4,7-9,11,13
K2 0101000110 2,4,8-9
K3 011100011010 2-4,8-9,11
K4 000000000000
K5 000000000001 12
K6
K7 1111111 1-7
K8 1 1