如何在 Oracle 中将字符串分成百分比部分和普通文本?
How to segregate a string into percentage component and normal text in Oracle?
给定一个字符串,如
73 % polyester, 20 % modacrylic, 7 % cotton
应该有 3 行 2 列:
Percentage
Component
73
polyester
20
modacrylic
7
cotton
5,5% cotton, 20% modacrylic, 74,5 % polyester, min. 90 % recycled material
输出应该有 3 行:
Percentage
Component
5,5
cotton
20
modacrylic
74,5
polyester
如果字符串不以数字字符开头,则默认情况下百分比列的值应为 100,组件应获取逗号 (',') 之前的所有字符
例如
Polyester fibre , 150 g/sq.m.
Percentage
Component
100
Polyester fibre
我已经编写了以下逻辑,但它没有按预期工作:
SELECT
a.item_no,
a.item_type,
a.code_sup,
a.type_sup,
a.from_dtime,
a.id,
a.material_name,
a.str AS component,
b.str AS percentage
FROM
( SELECT
item_no,
item_type,
code_sup,
type_sup,
from_dtime,
id,
material_name,
level rowseq,
regexp_substr(str,'[^/]+',1,ROWNUM) str
FROM
( SELECT
'23456' item_no,
'PLASTIC' item_type,
'10121' code_sup,
'SUP' type_sup,
'27-Nov-2020' from_dtime,
'1.1' id,
'26,5 % polyester, min. 90% recycled, 67 % cotton' material_name,
level rowseq,
regexp_substr(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% /','%/'),'[^%]+',1,ROWNUM) str
FROM
dual
CONNECT BY
level <= regexp_count('26,5 % polyester, min. 90% recycled, 67 % cotton','[^%]+')
)
WHERE
rowseq = 1
CONNECT BY
level <= regexp_count(str,'[^/]+')
) a,
(
SELECT
item_no,
item_type,
code_sup,
type_sup,
from_dtime,
id,
material_name,
ROWNUM rowseq,
TRIM(regexp_substr(str,'[^/%]+',1,ROWNUM) ) str
FROM
( SELECT
'23456' item_no,
'PLASTIC' item_type,
'10121' code_sup,
'SUP' type_sup,
'27-Nov-2020' from_dtime,
'1.1' id,
'26,5 % polyester, min. 90% recycled, 67 % cotton' material_name,
level rowseq,
regexp_substr(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% /','%/'),'[^%]+',1,ROWNUM) str
FROM
dual
CONNECT BY
level <= regexp_count('26,5 % polyester, min. 90% recycled, 67 % cotton','[^%]+')
)
WHERE
rowseq = 2
CONNECT BY
level <= regexp_count(str,'[^/%]+')
) b
WHERE a.rowseq = b.rowseq
AND a.str IS NOT NULL
ORDER BY
a.item_no,
a.item_type,
a.code_sup,
a.type_sup,
a.from_dtime,
a.id ;
这里我只得到 1 行,百分比为 26,5,成分为聚酯,最小值。 90 ,虽然我应该得到 2 行
Percentage
Component
26,5
polyester
67
cotton
请指导。
我试过这个逻辑,但它不适用于以字符开头的 material 名称,它应该设置默认百分比 100
WITH parsed as(
SELECT /*+ parallel(t,8) materialize */
'80393904' item_no,
'ART' item_type,
'22025' bu_code_sup,
'SUP' bu_type_sup,
'27-FEB-2020' from_dtime,
1.4 id,
'PPCO, grade 4 acc. to spec. AA-168522' material_name,
regexp_substr(REGEXP_REPLACE(replace(replace('PPCO, grade 4 acc. to spec. AA-168522','% ','% '),', ',','), '(\d+),(\d+)', '.'),'[^,]+',1,ROWNUM)
AS split_value
FROM dual
CONNECT BY level <= regexp_count(REGEXP_REPLACE(replace('PPCO, grade 4 acc. to spec. AA-168522','% /','%/'), '(\d+),(\d+)', '.'),'[^,]+')
)
,in_pairs as(
select /*+ parallel(k,8) materialize */
item_no,item_type,bu_code_sup,bu_type_sup,from_dtime,id,material_name
,regexp_substr(split_value, '[0-9]*[.]*[0-9]*') as percentage
,trim(substr(split_value, instr(split_value, '%') + 1)) as component
from parsed k where split_value LIKE '%\%%' ESCAPE '\'
)
select /*+ parallel(it,8) */
distinct item_no,item_type,bu_code_sup,bu_type_sup,from_dtime,id,material_name,percentage,component from in_pairs it
;
如果您将字符串视为 csv,那么有一种方法可以从 csv 字符串中拆分元素并提取百分比和分量。代码内评论:
Select
TRIM(SubStr(COL1, 1, InStr(COL1, '%') - 1)) "PERCENTAGE",
TRIM(SubStr(COL1, InStr(COL1, '%') + 1)) "COMPONENT"
FROM
(
SELECT
COL1
FROM
(
SELECT
INDX,
MY_STR1,
COL1_ELEMENTS,
CASE WHEN SubStr(TRIM(COL1), 1, 1) IN('0','1','2','3','4','5','6','7','8','9') THEN COL1 ELSE '100 % ' || COL1 END "COL1"
FROM
(
SELECT
0 "INDX",
COL1 "MY_STR1",
COL1_ELEMENTS "COL1_ELEMENTS",
COL1 "COL1"
FROM
(
SELECT
REPLACE(COL1, DELIMITER || ' ', DELIMITER) "COL1",
Trim(Length(Replace(COL1, DELIMITER || ' ', DELIMITER))) - Trim(Length(Translate(REPLACE(COL1, DELIMITER || ' ', DELIMITER), 'A' || DELIMITER, 'A'))) + 1 "COL1_ELEMENTS"
FROM (SELECT
'73 % polyester, 20 % modacrylic, something else, 67 % cotton' "COL1", ',' "DELIMITER" -- here comes your string and delimiter, if delimiter is not comma (,) then you should replace ',' with your delimiter in RULES clause
FROM DUAL)
)
)
MODEL
DIMENSION BY(0 as INDX)
MEASURES(COL1, COL1_ELEMENTS, CAST('a' as VarChar2(4000)) as MY_STR1)
RULES ITERATE (10) --UNTIL (ITERATION_NUMBER <= COL1_ELEMENTS[ITERATION_NUMBER + 1]) -- If you don't know the number of elements this should be bigger then you aproximation. Othewrwise it will split given number of elements
(
COL1_ELEMENTS[ITERATION_NUMBER + 1] = COL1_ELEMENTS[0],
MY_STR1[0] = COL1[CV()],
MY_STR1[ITERATION_NUMBER + 1] = SubStr(MY_STR1[ITERATION_NUMBER], InStr(MY_STR1[ITERATION_NUMBER], ',', 1) + Length(',')),
COL1[ITERATION_NUMBER + 1] = SubStr(MY_STR1[ITERATION_NUMBER], 1, CASE WHEN InStr(MY_STR1[ITERATION_NUMBER], ',') <> 0 THEN InStr(MY_STR1[ITERATION_NUMBER], ',')-1 ELSE Length(MY_STR1[ITERATION_NUMBER]) END)
)
)
WHERE INDX > 0 And INDX <= COL1_ELEMENTS
)
--
-- Result:
--
-- PERCENTAGE COMPONENT
-- 73 polyester
-- 20 modacrylic
-- 100 something else
-- 67 cotton
您需要彻底地进行字符串转换 step-by-step。
- 删除逗号和百分号后的白色 space
replace(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% ','% '),', ',',')
2.Using用“,”代替“%”作为字符串分隔符,
regexp_substr(REGEXP_REPLACE(replace(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% ','% '),', ',','), '(\d+),(\d+)', '.'),'[^,]+',1,ROWNUM) str
3.Filter 第 1 行和第 3 行不只是 1
WHERE rowseq IN (1, 3)
- 不确定为什么需要第 13 行的嵌套查询和从第 46 行开始的连接子查询“b”(多余)
- 做你的组件,使用“%”进行百分比分割
REGEXP_REPLACE(a.str, '^(\d+.*\d*)(%)([^,]*)$','') AS component,
REGEXP_REPLACE(a.str, '^(\d+.*\d*)(%)([^,]*)$','') AS percentage
你会得到..
SELECT
a.item_no,
a.item_type,
a.code_sup,
a.type_sup,
a.from_dtime,
a.id,
a.material_name,
a.rowseq,
REGEXP_REPLACE(a.str, '^(\d+.*\d*)(%)([^,]*)$','') AS component,
REGEXP_REPLACE(a.str, '^(\d+.*\d*)(%)([^,]*)$','') AS percentage
FROM
( SELECT
'23456' item_no,
'PLASTIC' item_type,
'10121' code_sup,
'SUP' type_sup,
'27-Nov-2020' from_dtime,
'1.1' id,
'26,5 % polyester, min. 90% recycled, 67 % cotton' material_name,
level rowseq,
regexp_substr(REGEXP_REPLACE(replace(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% ','% '),', ',','), '(\d+),(\d+)', '.'),'[^,]+',1,ROWNUM) str
FROM
dual
CONNECT BY
level <= regexp_count(REGEXP_REPLACE(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% /','%/'), '(\d+),(\d+)', '.'),'[^,]+')
) a
WHERE rowseq IN (1, 3)
ORDER BY
a.item_no,
a.item_type,
a.code_sup,
a.type_sup,
a.from_dtime,
a.id ;
对于更复杂的场景和更健壮的代码,请执行以下操作。
当然,您需要在 WITH 子句中仔细指定您的转换 RULES,我不是 [= 的忠实粉丝27=]嵌套语句 但用例需要它。
var the_specification varchar2(500)
exec :the_specification := '5,5% cotton, 20% modacrylic, 74,5 % polyester, min. 90 % recycled material, Polyester fibre , 150 g/sq.m.'
WITH specstr as (
SELECT CASE WHEN REGEXP_LIKE(rndOne.refined_str, '%') THEN
rndOne.refined_str
ELSE
REGEXP_REPLACE(rndOne.refined_str, '(^[[:alnum:][:space:]]+),|,([[:alnum:][:space:]]+),|,([[:alnum:][:space:]]+$)', ',100% ,',1,1) -- Add the default Percentage Column '100%'
END AS refined_str
FROM(
SELECT
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(:the_specification,'\s*%','% '), -- remove white space tagging the percentage'%' sign
'(\d+),(\d+)', '.'), -- Replace dot'.' as decimal separator
'(,*[^,]*recycl[^,]*,*)', ','), -- remove any additonal comment THE_RULE: consider 'recycl' as comment
'\s{2,}', ' '), -- remove double white space
',{2,}|\s,|,\s', ','), -- remove duplicate commas ',' or any trailing and tagging white space from comma
'^,*|,*$', '') refined_str -- remove comma from the start and end
FROM DUAL
)rndOne
)
SELECT REGEXP_REPLACE(mtrl_ratio.str, '^(\d+,*\d*)(%)([^,]*)$','') Percentage,
REGEXP_REPLACE(mtrl_ratio.str, '^(\d+,*\d*)(%)([^,]*)$','') Component
FROM (
SELECT
level rowseq,
REGEXP_REPLACE(
regexp_substr(
specstr.refined_str
,'[^,]+',1,ROWNUM), -- split string using comma as delimiter
'(\d+)\.(\d+)', ',') str -- Replace Back comma',' as decimal separator)
FROM
specstr
CONNECT BY
level <= regexp_count(
specstr.refined_str
,'[^,]+')
)mtrl_ratio
WHERE REGEXP_LIKE(mtrl_ratio.str, '%')
给定一个字符串,如
73 % polyester, 20 % modacrylic, 7 % cotton
应该有 3 行 2 列:
Percentage | Component |
---|---|
73 | polyester |
20 | modacrylic |
7 | cotton |
5,5% cotton, 20% modacrylic, 74,5 % polyester, min. 90 % recycled material
输出应该有 3 行:
Percentage | Component |
---|---|
5,5 | cotton |
20 | modacrylic |
74,5 | polyester |
如果字符串不以数字字符开头,则默认情况下百分比列的值应为 100,组件应获取逗号 (',') 之前的所有字符
例如
Polyester fibre , 150 g/sq.m.
Percentage | Component |
---|---|
100 | Polyester fibre |
我已经编写了以下逻辑,但它没有按预期工作:
SELECT
a.item_no,
a.item_type,
a.code_sup,
a.type_sup,
a.from_dtime,
a.id,
a.material_name,
a.str AS component,
b.str AS percentage
FROM
( SELECT
item_no,
item_type,
code_sup,
type_sup,
from_dtime,
id,
material_name,
level rowseq,
regexp_substr(str,'[^/]+',1,ROWNUM) str
FROM
( SELECT
'23456' item_no,
'PLASTIC' item_type,
'10121' code_sup,
'SUP' type_sup,
'27-Nov-2020' from_dtime,
'1.1' id,
'26,5 % polyester, min. 90% recycled, 67 % cotton' material_name,
level rowseq,
regexp_substr(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% /','%/'),'[^%]+',1,ROWNUM) str
FROM
dual
CONNECT BY
level <= regexp_count('26,5 % polyester, min. 90% recycled, 67 % cotton','[^%]+')
)
WHERE
rowseq = 1
CONNECT BY
level <= regexp_count(str,'[^/]+')
) a,
(
SELECT
item_no,
item_type,
code_sup,
type_sup,
from_dtime,
id,
material_name,
ROWNUM rowseq,
TRIM(regexp_substr(str,'[^/%]+',1,ROWNUM) ) str
FROM
( SELECT
'23456' item_no,
'PLASTIC' item_type,
'10121' code_sup,
'SUP' type_sup,
'27-Nov-2020' from_dtime,
'1.1' id,
'26,5 % polyester, min. 90% recycled, 67 % cotton' material_name,
level rowseq,
regexp_substr(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% /','%/'),'[^%]+',1,ROWNUM) str
FROM
dual
CONNECT BY
level <= regexp_count('26,5 % polyester, min. 90% recycled, 67 % cotton','[^%]+')
)
WHERE
rowseq = 2
CONNECT BY
level <= regexp_count(str,'[^/%]+')
) b
WHERE a.rowseq = b.rowseq
AND a.str IS NOT NULL
ORDER BY
a.item_no,
a.item_type,
a.code_sup,
a.type_sup,
a.from_dtime,
a.id ;
这里我只得到 1 行,百分比为 26,5,成分为聚酯,最小值。 90 ,虽然我应该得到 2 行
Percentage | Component |
---|---|
26,5 | polyester |
67 | cotton |
请指导。
我试过这个逻辑,但它不适用于以字符开头的 material 名称,它应该设置默认百分比 100
WITH parsed as(
SELECT /*+ parallel(t,8) materialize */
'80393904' item_no,
'ART' item_type,
'22025' bu_code_sup,
'SUP' bu_type_sup,
'27-FEB-2020' from_dtime,
1.4 id,
'PPCO, grade 4 acc. to spec. AA-168522' material_name,
regexp_substr(REGEXP_REPLACE(replace(replace('PPCO, grade 4 acc. to spec. AA-168522','% ','% '),', ',','), '(\d+),(\d+)', '.'),'[^,]+',1,ROWNUM)
AS split_value
FROM dual
CONNECT BY level <= regexp_count(REGEXP_REPLACE(replace('PPCO, grade 4 acc. to spec. AA-168522','% /','%/'), '(\d+),(\d+)', '.'),'[^,]+')
)
,in_pairs as(
select /*+ parallel(k,8) materialize */
item_no,item_type,bu_code_sup,bu_type_sup,from_dtime,id,material_name
,regexp_substr(split_value, '[0-9]*[.]*[0-9]*') as percentage
,trim(substr(split_value, instr(split_value, '%') + 1)) as component
from parsed k where split_value LIKE '%\%%' ESCAPE '\'
)
select /*+ parallel(it,8) */
distinct item_no,item_type,bu_code_sup,bu_type_sup,from_dtime,id,material_name,percentage,component from in_pairs it
;
如果您将字符串视为 csv,那么有一种方法可以从 csv 字符串中拆分元素并提取百分比和分量。代码内评论:
Select
TRIM(SubStr(COL1, 1, InStr(COL1, '%') - 1)) "PERCENTAGE",
TRIM(SubStr(COL1, InStr(COL1, '%') + 1)) "COMPONENT"
FROM
(
SELECT
COL1
FROM
(
SELECT
INDX,
MY_STR1,
COL1_ELEMENTS,
CASE WHEN SubStr(TRIM(COL1), 1, 1) IN('0','1','2','3','4','5','6','7','8','9') THEN COL1 ELSE '100 % ' || COL1 END "COL1"
FROM
(
SELECT
0 "INDX",
COL1 "MY_STR1",
COL1_ELEMENTS "COL1_ELEMENTS",
COL1 "COL1"
FROM
(
SELECT
REPLACE(COL1, DELIMITER || ' ', DELIMITER) "COL1",
Trim(Length(Replace(COL1, DELIMITER || ' ', DELIMITER))) - Trim(Length(Translate(REPLACE(COL1, DELIMITER || ' ', DELIMITER), 'A' || DELIMITER, 'A'))) + 1 "COL1_ELEMENTS"
FROM (SELECT
'73 % polyester, 20 % modacrylic, something else, 67 % cotton' "COL1", ',' "DELIMITER" -- here comes your string and delimiter, if delimiter is not comma (,) then you should replace ',' with your delimiter in RULES clause
FROM DUAL)
)
)
MODEL
DIMENSION BY(0 as INDX)
MEASURES(COL1, COL1_ELEMENTS, CAST('a' as VarChar2(4000)) as MY_STR1)
RULES ITERATE (10) --UNTIL (ITERATION_NUMBER <= COL1_ELEMENTS[ITERATION_NUMBER + 1]) -- If you don't know the number of elements this should be bigger then you aproximation. Othewrwise it will split given number of elements
(
COL1_ELEMENTS[ITERATION_NUMBER + 1] = COL1_ELEMENTS[0],
MY_STR1[0] = COL1[CV()],
MY_STR1[ITERATION_NUMBER + 1] = SubStr(MY_STR1[ITERATION_NUMBER], InStr(MY_STR1[ITERATION_NUMBER], ',', 1) + Length(',')),
COL1[ITERATION_NUMBER + 1] = SubStr(MY_STR1[ITERATION_NUMBER], 1, CASE WHEN InStr(MY_STR1[ITERATION_NUMBER], ',') <> 0 THEN InStr(MY_STR1[ITERATION_NUMBER], ',')-1 ELSE Length(MY_STR1[ITERATION_NUMBER]) END)
)
)
WHERE INDX > 0 And INDX <= COL1_ELEMENTS
)
--
-- Result:
--
-- PERCENTAGE COMPONENT
-- 73 polyester
-- 20 modacrylic
-- 100 something else
-- 67 cotton
您需要彻底地进行字符串转换 step-by-step。
- 删除逗号和百分号后的白色 space
replace(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% ','% '),', ',',')
2.Using用“,”代替“%”作为字符串分隔符,
regexp_substr(REGEXP_REPLACE(replace(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% ','% '),', ',','), '(\d+),(\d+)', '.'),'[^,]+',1,ROWNUM) str
3.Filter 第 1 行和第 3 行不只是 1
WHERE rowseq IN (1, 3)
- 不确定为什么需要第 13 行的嵌套查询和从第 46 行开始的连接子查询“b”(多余)
- 做你的组件,使用“%”进行百分比分割
REGEXP_REPLACE(a.str, '^(\d+.*\d*)(%)([^,]*)$','') AS component,
REGEXP_REPLACE(a.str, '^(\d+.*\d*)(%)([^,]*)$','') AS percentage
你会得到..
SELECT
a.item_no,
a.item_type,
a.code_sup,
a.type_sup,
a.from_dtime,
a.id,
a.material_name,
a.rowseq,
REGEXP_REPLACE(a.str, '^(\d+.*\d*)(%)([^,]*)$','') AS component,
REGEXP_REPLACE(a.str, '^(\d+.*\d*)(%)([^,]*)$','') AS percentage
FROM
( SELECT
'23456' item_no,
'PLASTIC' item_type,
'10121' code_sup,
'SUP' type_sup,
'27-Nov-2020' from_dtime,
'1.1' id,
'26,5 % polyester, min. 90% recycled, 67 % cotton' material_name,
level rowseq,
regexp_substr(REGEXP_REPLACE(replace(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% ','% '),', ',','), '(\d+),(\d+)', '.'),'[^,]+',1,ROWNUM) str
FROM
dual
CONNECT BY
level <= regexp_count(REGEXP_REPLACE(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% /','%/'), '(\d+),(\d+)', '.'),'[^,]+')
) a
WHERE rowseq IN (1, 3)
ORDER BY
a.item_no,
a.item_type,
a.code_sup,
a.type_sup,
a.from_dtime,
a.id ;
对于更复杂的场景和更健壮的代码,请执行以下操作。 当然,您需要在 WITH 子句中仔细指定您的转换 RULES,我不是 [= 的忠实粉丝27=]嵌套语句 但用例需要它。
var the_specification varchar2(500)
exec :the_specification := '5,5% cotton, 20% modacrylic, 74,5 % polyester, min. 90 % recycled material, Polyester fibre , 150 g/sq.m.'
WITH specstr as (
SELECT CASE WHEN REGEXP_LIKE(rndOne.refined_str, '%') THEN
rndOne.refined_str
ELSE
REGEXP_REPLACE(rndOne.refined_str, '(^[[:alnum:][:space:]]+),|,([[:alnum:][:space:]]+),|,([[:alnum:][:space:]]+$)', ',100% ,',1,1) -- Add the default Percentage Column '100%'
END AS refined_str
FROM(
SELECT
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(
REGEXP_REPLACE(:the_specification,'\s*%','% '), -- remove white space tagging the percentage'%' sign
'(\d+),(\d+)', '.'), -- Replace dot'.' as decimal separator
'(,*[^,]*recycl[^,]*,*)', ','), -- remove any additonal comment THE_RULE: consider 'recycl' as comment
'\s{2,}', ' '), -- remove double white space
',{2,}|\s,|,\s', ','), -- remove duplicate commas ',' or any trailing and tagging white space from comma
'^,*|,*$', '') refined_str -- remove comma from the start and end
FROM DUAL
)rndOne
)
SELECT REGEXP_REPLACE(mtrl_ratio.str, '^(\d+,*\d*)(%)([^,]*)$','') Percentage,
REGEXP_REPLACE(mtrl_ratio.str, '^(\d+,*\d*)(%)([^,]*)$','') Component
FROM (
SELECT
level rowseq,
REGEXP_REPLACE(
regexp_substr(
specstr.refined_str
,'[^,]+',1,ROWNUM), -- split string using comma as delimiter
'(\d+)\.(\d+)', ',') str -- Replace Back comma',' as decimal separator)
FROM
specstr
CONNECT BY
level <= regexp_count(
specstr.refined_str
,'[^,]+')
)mtrl_ratio
WHERE REGEXP_LIKE(mtrl_ratio.str, '%')