解析描述字符串以填充 NULL 字段

Parse description string to populate NULL fields

Informix 12.10

tblItems
(
Type        SMALLINT,       {Precious Metal = 1, Other = 2}
Description VARCHAR,
Quantity    SMALLINT,
Name        VARCHAR,
Weight      DECIMAL(5,1),
Purity      SMALLINT,
Brand       VARCHAR,
Model       VARCHAR,
SerialNum   VARCHAR
);

编辑更新:下面的示例数据存储在 tblItems.Type 和 tblItems.Description 中。请注意Description栏中的内容均为大写字符,可能还包含标点符号。

2|1LAPTOP APPLE 15.5" MODEL MACKBOOK PRO,S/N W80461WCAGX, WITH CHARGER||||||||
1|1RING 2.3PW 14K||||||||
2|DRILL RIOBY, MODEL D5521 S/N77720||||||||
2|TRIMMER TORO, MODEL 0242 S/N 66759||||||||
2|CELL SAMSUNG NOTE3, MODEL SM-N900T S/N RV8F90YLZ9W||||||||

我需要使用评论中提到的规则将示例项目描述解析到下面的列中:

Quantity,      {if description string does not start with a number, then Quantity = 1}
Name,          {Always the first element if description has no quantity, second element if quantity present] 
Weight,        {Always before "PW" if Type = 1, Default to zero if Type = 2}
Purity,        {Always before "K" if Type = 1, Default to NULL if Type = 2} 
Brand,         {Always the second element in description, if present} 
Model,         {Always after "MODEL", with or without a space}
Serial Number  {Always after "S/N", with or without a space}

我想使用 UPDATE 语句来执行此操作,但如果 Informix 具有导入实用工具,如 SQL-Server 的 SSIS,那么这可能是更好的选择。

更新,预期结果:

Quantity   1               1       1        1         1
Name       LAPTOP          RING    DRILL    TRIMMER   CELL
Weight     0.0             2.3     0.0      0.0       0.0
Purity                     14
Brand      APPLE                   RIOBY    TORO      SAMSUNG
Model      MACKBOOK PRO            D5521    0242      SM-N900T
SerialNum  W8046WCAGX              77720    66759     RV8F90YLZ9W

假设您使用的是Informix 12.10.XC8或以上版本,您可以尝试使用正则表达式来解析描述字符串(参见在线documentation这里)。

以序列号为例,您可以这样做:

UPDATE tblitems
SET
serialnum = 
DECODE 
(
    regex_match(description, '(.*)(S\/N)(.*)', 3)
    , 't'::BOOLEAN, regex_replace(description, '(.*)(S\/N)([[:blank:]]?)([[:alnum:]]*)(.*)', '', 0, 3)
    , 'f'::BOOLEAN, ''
)

所以在前面的示例中,我正在测试描述是否包含 S/N 字符串,如果是,我使用 regex_replace 到 return 它后面的值,在这种情况下正则表达式中的第 4 个匹配组(我没有使用 regex_extract 来获取值,因为它似乎 return 多个值并且我得到错误 -686)。

您可以将此方法扩展到其余列,看看正则表达式是否足以解析描述列。

如果您正在寻找 SQL 服务器选项并打开维护序列 Split/Parse 的函数

例子

Select A.Type
      ,A.Description
      ,C.*
 From  YourTable A
 Cross Apply (values ( replace(
                       replace(
                       replace(
                       replace(A.Description,',',' ')
                       ,'  ',' ')
                       ,'Model ','Model')
                       ,'S/N ','S/N')
                     ) 
             )B(CleanString)

 Cross Apply (
                Select Quantity  = IsNull(left(max(case when RetSeq=1 then RetVal end),NullIf(patindex('%[^0-9]%',max(case when RetSeq=1 then RetVal end)) -1,0)),1)
                      ,Name      = substring(max(case when RetSeq=1 then RetVal end),patindex('%[^0-9]%',max(case when RetSeq=1 then RetVal end)),charindex(' ',max(case when RetSeq=1 then RetVal end)+' ')-1)
                      ,Weight    = IIF(A.Type=2,null,try_convert(decimal(5,1),replace(max(case when RetVal like '%PW' then RetVal end),'PW','')))
                      ,Purity    = try_convert(smallint    ,replace(max(case when RetVal like '%K'  then RetVal end),'K',''))
                      ,Brand     = IIF(A.Type=1,null,max(case when RetSeq=2 then RetVal end))
                      ,Model     = replace(max(case when RetVal Like 'Model[0-9,A-Z]%' then RetVal end),'Model','')
                      ,SerialNum = replace(max(case when RetVal Like 'S/N[0-9,A-Z]%' then RetVal end),'S/N','')
                 From [dbo].[tvf-Str-Parse](CleanString,' ') B1
             ) C

Returns

感兴趣的 TVF

CREATE FUNCTION [dbo].[tvf-Str-Parse] (@String varchar(max),@Delimiter varchar(10))
Returns Table 
As
Return (  
    Select RetSeq = Row_Number() over (Order By (Select null))
          ,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
    From  (Select x = Cast('<x>' + replace((Select replace(@String,@Delimiter,'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A 
    Cross Apply x.nodes('x') AS B(i)
);

EDIT - If you don't want or can't use a TVF

dbFiddle

Select A.Type
      ,A.Description
      ,C.*
 From  YourTable A
 Cross Apply (values ( replace(
                       replace(
                       replace(
                       replace(A.Description,',',' ')
                       ,'  ',' ')
                       ,'Model ','Model')
                       ,'S/N ','S/N')
                     ) 
             )B(CleanString)

 Cross Apply (
                Select Quantity  = IsNull(left(max(case when RetSeq=1 then RetVal end),NullIf(patindex('%[^0-9]%',max(case when RetSeq=1 then RetVal end)) -1,0)),1)
                      ,Name      = substring(max(case when RetSeq=1 then RetVal end),patindex('%[^0-9]%',max(case when RetSeq=1 then RetVal end)),charindex(' ',max(case when RetSeq=1 then RetVal end)+' ')-1)
                      ,Weight    = IIF(A.Type=2,null,try_convert(decimal(5,1),replace(max(case when RetVal like '%PW' then RetVal end),'PW','')))
                      ,Purity    = try_convert(smallint    ,replace(max(case when RetVal like '%K'  then RetVal end),'K',''))
                      ,Brand     = IIF(A.Type=1,null,max(case when RetSeq=2 then RetVal end))
                      ,Model     = replace(max(case when RetVal Like 'Model[0-9,A-Z]%' then RetVal end),'Model','')
                      ,SerialNum = replace(max(case when RetVal Like 'S/N[0-9,A-Z]%' then RetVal end),'S/N','')
                 From  (
                        Select RetSeq = row_number() over (Order By (Select null))
                              ,RetVal = ltrim(rtrim(B.i.value('(./text())[1]', 'varchar(max)')))
                        From  (Select x = Cast('<x>' + replace((Select replace(CleanString,' ','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A 
                        Cross Apply x.nodes('x') AS B(i)
                       ) B1
             ) C