如何在 Oracle 中将字符串分成百分比部分和普通文本?

How to segregate a string into percentage component and normal text in Oracle?

给定一个字符串,如

73 % polyester, 20 % modacrylic, 7 % cotton

应该有 3 行 2 列:

Percentage Component
73 polyester
20 modacrylic
7 cotton
5,5% cotton, 20% modacrylic, 74,5 % polyester, min. 90 % recycled material

输出应该有 3 行:

Percentage Component
5,5 cotton
20 modacrylic
74,5 polyester

如果字符串不以数字字符开头,则默认情况下百分比列的值应为 100,组件应获取逗号 (',') 之前的所有字符

例如

Polyester fibre , 150 g/sq.m.
Percentage Component
100 Polyester fibre

我已经编写了以下逻辑,但它没有按预期工作:

SELECT
       a.item_no,
       a.item_type,
       a.code_sup,
       a.type_sup,
       a.from_dtime,
       a.id,
       a.material_name,
       a.str AS component,
       b.str AS percentage
FROM
       ( SELECT
                item_no,
                item_type,
                code_sup,
                type_sup,
                from_dtime,
                id,
                material_name,
                level rowseq,
                regexp_substr(str,'[^/]+',1,ROWNUM) str
         FROM
                ( SELECT
                         '23456' item_no,
                         'PLASTIC' item_type,
                         '10121' code_sup,
                         'SUP' type_sup,
                         '27-Nov-2020' from_dtime,
                         '1.1' id,
                         '26,5 % polyester, min. 90% recycled, 67 % cotton' material_name,
                         level rowseq,
                         regexp_substr(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% /','%/'),'[^%]+',1,ROWNUM) str
                   FROM
                        dual
                   CONNECT BY
                        level <= regexp_count('26,5 % polyester, min. 90% recycled, 67 % cotton','[^%]+')
                 )
         WHERE
                 rowseq = 1
         CONNECT BY
                 level <= regexp_count(str,'[^/]+')     
       ) a,
       (
         SELECT
                item_no,
                item_type,
                code_sup,
                type_sup,
                from_dtime,
                id,
                material_name,
                ROWNUM rowseq,
                TRIM(regexp_substr(str,'[^/%]+',1,ROWNUM) ) str
         FROM
                ( SELECT
                         '23456' item_no,
                         'PLASTIC' item_type,
                         '10121' code_sup,
                         'SUP' type_sup,
                         '27-Nov-2020' from_dtime,
                         '1.1' id,
                         '26,5 % polyester, min. 90% recycled, 67 % cotton' material_name,
                         level rowseq,
                         regexp_substr(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% /','%/'),'[^%]+',1,ROWNUM) str
                  FROM
                         dual
                         CONNECT BY
                         level <= regexp_count('26,5 % polyester, min. 90% recycled, 67 % cotton','[^%]+')
                 )
          WHERE
                 rowseq = 2
                 CONNECT BY
                 level <= regexp_count(str,'[^/%]+')
       ) b
WHERE  a.rowseq = b.rowseq
AND    a.str IS NOT NULL
ORDER BY
       a.item_no,
       a.item_type,
       a.code_sup,
       a.type_sup,
       a.from_dtime,
       a.id ;

这里我只得到 1 行,百分比为 26,5,成分为聚酯,最小值。 90 ,虽然我应该得到 2 行

Percentage Component
26,5 polyester
67 cotton

请指导。

我试过这个逻辑,但它不适用于以字符开头的 material 名称,它应该设置默认百分比 100

WITH parsed as(
    SELECT /*+ parallel(t,8) materialize */
    '80393904' item_no,
                    'ART' item_type,
                    '22025' bu_code_sup,
                    'SUP' bu_type_sup,
                    '27-FEB-2020' from_dtime,
                    1.4 id,
    'PPCO, grade 4 acc. to spec. AA-168522' material_name, 
    regexp_substr(REGEXP_REPLACE(replace(replace('PPCO, grade 4 acc. to spec. AA-168522','%  ','% '),', ',','), '(\d+),(\d+)', '.'),'[^,]+',1,ROWNUM) 
    AS split_value
      FROM dual
 CONNECT BY level <= regexp_count(REGEXP_REPLACE(replace('PPCO, grade 4 acc. to spec. AA-168522','% /','%/'), '(\d+),(\d+)', '.'),'[^,]+')
)
,in_pairs as(
select /*+ parallel(k,8) materialize */
item_no,item_type,bu_code_sup,bu_type_sup,from_dtime,id,material_name
      ,regexp_substr(split_value, '[0-9]*[.]*[0-9]*') as percentage
      ,trim(substr(split_value, instr(split_value, '%') + 1)) as component
  from parsed k where split_value LIKE '%\%%' ESCAPE '\'
)
select /*+ parallel(it,8) */
distinct item_no,item_type,bu_code_sup,bu_type_sup,from_dtime,id,material_name,percentage,component from in_pairs it
;

如果您将字符串视为 csv,那么有一种方法可以从 csv 字符串中拆分元素并提取百分比和分量。代码内评论:

Select
    TRIM(SubStr(COL1, 1, InStr(COL1, '%') - 1)) "PERCENTAGE",
    TRIM(SubStr(COL1, InStr(COL1, '%') + 1)) "COMPONENT"
FROM
    (
        SELECT  
            COL1
        FROM    
            (   
                SELECT 
                    INDX, 
                    MY_STR1, 
                    COL1_ELEMENTS, 
                    CASE WHEN SubStr(TRIM(COL1), 1, 1) IN('0','1','2','3','4','5','6','7','8','9') THEN COL1 ELSE '100 % ' || COL1 END "COL1"
                FROM    
                    (   
                        SELECT 
                            0 "INDX", 
                            COL1 "MY_STR1", 
                            COL1_ELEMENTS "COL1_ELEMENTS", 
                            COL1 "COL1"
                        FROM
                            (
                                SELECT
                                  REPLACE(COL1, DELIMITER || ' ', DELIMITER) "COL1",    
                                  Trim(Length(Replace(COL1, DELIMITER || ' ', DELIMITER))) - Trim(Length(Translate(REPLACE(COL1, DELIMITER || ' ', DELIMITER), 'A' || DELIMITER, 'A'))) + 1 "COL1_ELEMENTS"
                                FROM (SELECT 
                                        '73 % polyester, 20 % modacrylic, something else, 67 % cotton' "COL1", ',' "DELIMITER" -- here comes your string and delimiter, if delimiter is not comma (,) then you should replace ',' with your delimiter in RULES clause
                                      FROM DUAL)        
                            )
                    )
                MODEL       
                    DIMENSION BY(0 as INDX)
                    MEASURES(COL1, COL1_ELEMENTS, CAST('a' as VarChar2(4000)) as MY_STR1)
                    RULES ITERATE (10)      --UNTIL (ITERATION_NUMBER <= COL1_ELEMENTS[ITERATION_NUMBER + 1]) -- If you don't know the number of elements this should be bigger then you aproximation. Othewrwise it will split given number of elements
                    (
                        COL1_ELEMENTS[ITERATION_NUMBER + 1] = COL1_ELEMENTS[0],
                        MY_STR1[0] = COL1[CV()],
                        MY_STR1[ITERATION_NUMBER + 1] = SubStr(MY_STR1[ITERATION_NUMBER], InStr(MY_STR1[ITERATION_NUMBER], ',', 1) + Length(',')),
                        COL1[ITERATION_NUMBER + 1] = SubStr(MY_STR1[ITERATION_NUMBER], 1, CASE WHEN InStr(MY_STR1[ITERATION_NUMBER], ',') <> 0 THEN InStr(MY_STR1[ITERATION_NUMBER], ',')-1 ELSE Length(MY_STR1[ITERATION_NUMBER]) END)
                    )
            )
        WHERE INDX > 0 And INDX <= COL1_ELEMENTS
    )
--
-- Result:
--
-- PERCENTAGE   COMPONENT
-- 73           polyester
-- 20           modacrylic
-- 100          something else
-- 67           cotton

您需要彻底地进行字符串转换 step-by-step。

  1. 删除逗号和百分号后的白色 space
replace(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','%  ','% '),', ',',')

2.Using用“,”代替“%”作为字符串分隔符,

regexp_substr(REGEXP_REPLACE(replace(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','%  ','% '),', ',','), '(\d+),(\d+)', '.'),'[^,]+',1,ROWNUM) str

3.Filter 第 1 行和第 3 行不只是 1

    WHERE rowseq IN (1, 3)
  1. 不确定为什么需要第 13 行的嵌套查询和从第 46 行开始的连接子查询“b”(多余)
  2. 做你的组件,使用“%”进行百分比分割
    REGEXP_REPLACE(a.str, '^(\d+.*\d*)(%)([^,]*)$','') AS component,
    REGEXP_REPLACE(a.str, '^(\d+.*\d*)(%)([^,]*)$','') AS percentage

你会得到..

SELECT
                    a.item_no,
                    a.item_type,
                    a.code_sup,
                    a.type_sup,
                    a.from_dtime,
                    a.id,
                    a.material_name,
                    a.rowseq,
                    REGEXP_REPLACE(a.str, '^(\d+.*\d*)(%)([^,]*)$','') AS component,
                    REGEXP_REPLACE(a.str, '^(\d+.*\d*)(%)([^,]*)$','') AS percentage                    
                FROM
                    (  SELECT
                    '23456' item_no,
                    'PLASTIC' item_type,
                    '10121' code_sup,
                    'SUP' type_sup,
                    '27-Nov-2020' from_dtime,
                    '1.1' id,
                   '26,5 % polyester, min. 90% recycled, 67 % cotton' material_name,
                    level rowseq,
                    regexp_substr(REGEXP_REPLACE(replace(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','%  ','% '),', ',','), '(\d+),(\d+)', '.'),'[^,]+',1,ROWNUM) str
                    
                FROM
                    dual
                CONNECT BY
                    level <= regexp_count(REGEXP_REPLACE(replace('26,5 % polyester, min. 90% recycled, 67 % cotton','% /','%/'), '(\d+),(\d+)', '.'),'[^,]+')
                         
                    ) a
                WHERE rowseq IN (1, 3)
                ORDER BY
                    a.item_no,
                    a.item_type,
                    a.code_sup,
                    a.type_sup,
                    a.from_dtime,
                    a.id ;

对于更复杂的场景和更健壮的代码,请执行以下操作。 当然,您需要在 WITH 子句中仔细指定您的转换 RULES,我不是 [= 的忠实粉丝27=]嵌套语句 但用例需要它。

var the_specification varchar2(500)
exec :the_specification := '5,5% cotton, 20% modacrylic, 74,5 % polyester, min. 90 % recycled material, Polyester fibre , 150 g/sq.m.'

WITH specstr as (
      SELECT CASE WHEN REGEXP_LIKE(rndOne.refined_str, '%') THEN
              rndOne.refined_str
            ELSE  
              REGEXP_REPLACE(rndOne.refined_str, '(^[[:alnum:][:space:]]+),|,([[:alnum:][:space:]]+),|,([[:alnum:][:space:]]+$)', ',100% ,',1,1) -- Add the default Percentage Column '100%'
            END AS refined_str
      FROM(
            SELECT
               REGEXP_REPLACE(
                  REGEXP_REPLACE(
                     REGEXP_REPLACE( 
                        REGEXP_REPLACE(
                           REGEXP_REPLACE(
                              REGEXP_REPLACE(:the_specification,'\s*%','% '), -- remove white space tagging the percentage'%' sign
                            '(\d+),(\d+)', '.'), -- Replace dot'.' as decimal separator
                        '(,*[^,]*recycl[^,]*,*)', ','), -- remove any additonal comment THE_RULE: consider 'recycl' as comment
                      '\s{2,}', ' '), -- remove double white space
                  ',{2,}|\s,|,\s', ','), -- remove duplicate commas ',' or any trailing and tagging white space from comma
                '^,*|,*$', '') refined_str -- remove comma from the start and end                
              FROM DUAL
            )rndOne
)
SELECT REGEXP_REPLACE(mtrl_ratio.str, '^(\d+,*\d*)(%)([^,]*)$','') Percentage,
       REGEXP_REPLACE(mtrl_ratio.str, '^(\d+,*\d*)(%)([^,]*)$','') Component
FROM (
  SELECT        
        level rowseq,
        REGEXP_REPLACE(
          regexp_substr(
            specstr.refined_str
           ,'[^,]+',1,ROWNUM), -- split string using comma as delimiter
        '(\d+)\.(\d+)', ',') str -- Replace Back comma',' as decimal separator)
    FROM
        specstr
    CONNECT BY
        level <= regexp_count(
                  specstr.refined_str
                  ,'[^,]+')
)mtrl_ratio
WHERE REGEXP_LIKE(mtrl_ratio.str, '%')