解析描述字符串以填充 NULL 字段
Parse description string to populate NULL fields
Informix 12.10
tblItems
(
Type SMALLINT, {Precious Metal = 1, Other = 2}
Description VARCHAR,
Quantity SMALLINT,
Name VARCHAR,
Weight DECIMAL(5,1),
Purity SMALLINT,
Brand VARCHAR,
Model VARCHAR,
SerialNum VARCHAR
);
编辑更新:下面的示例数据存储在 tblItems.Type 和 tblItems.Description 中。请注意Description栏中的内容均为大写字符,可能还包含标点符号。
2|1LAPTOP APPLE 15.5" MODEL MACKBOOK PRO,S/N W80461WCAGX, WITH CHARGER||||||||
1|1RING 2.3PW 14K||||||||
2|DRILL RIOBY, MODEL D5521 S/N77720||||||||
2|TRIMMER TORO, MODEL 0242 S/N 66759||||||||
2|CELL SAMSUNG NOTE3, MODEL SM-N900T S/N RV8F90YLZ9W||||||||
我需要使用评论中提到的规则将示例项目描述解析到下面的列中:
Quantity, {if description string does not start with a number, then Quantity = 1}
Name, {Always the first element if description has no quantity, second element if quantity present]
Weight, {Always before "PW" if Type = 1, Default to zero if Type = 2}
Purity, {Always before "K" if Type = 1, Default to NULL if Type = 2}
Brand, {Always the second element in description, if present}
Model, {Always after "MODEL", with or without a space}
Serial Number {Always after "S/N", with or without a space}
我想使用 UPDATE 语句来执行此操作,但如果 Informix 具有导入实用工具,如 SQL-Server 的 SSIS,那么这可能是更好的选择。
更新,预期结果:
Quantity 1 1 1 1 1
Name LAPTOP RING DRILL TRIMMER CELL
Weight 0.0 2.3 0.0 0.0 0.0
Purity 14
Brand APPLE RIOBY TORO SAMSUNG
Model MACKBOOK PRO D5521 0242 SM-N900T
SerialNum W8046WCAGX 77720 66759 RV8F90YLZ9W
假设您使用的是Informix 12.10.XC8或以上版本,您可以尝试使用正则表达式来解析描述字符串(参见在线documentation这里)。
以序列号为例,您可以这样做:
UPDATE tblitems
SET
serialnum =
DECODE
(
regex_match(description, '(.*)(S\/N)(.*)', 3)
, 't'::BOOLEAN, regex_replace(description, '(.*)(S\/N)([[:blank:]]?)([[:alnum:]]*)(.*)', '', 0, 3)
, 'f'::BOOLEAN, ''
)
所以在前面的示例中,我正在测试描述是否包含 S/N
字符串,如果是,我使用 regex_replace
到 return 它后面的值,在这种情况下正则表达式中的第 4 个匹配组(我没有使用 regex_extract
来获取值,因为它似乎 return 多个值并且我得到错误 -686)。
您可以将此方法扩展到其余列,看看正则表达式是否足以解析描述列。
如果您正在寻找 SQL 服务器选项并打开维护序列 Split/Parse 的函数
例子
Select A.Type
,A.Description
,C.*
From YourTable A
Cross Apply (values ( replace(
replace(
replace(
replace(A.Description,',',' ')
,' ',' ')
,'Model ','Model')
,'S/N ','S/N')
)
)B(CleanString)
Cross Apply (
Select Quantity = IsNull(left(max(case when RetSeq=1 then RetVal end),NullIf(patindex('%[^0-9]%',max(case when RetSeq=1 then RetVal end)) -1,0)),1)
,Name = substring(max(case when RetSeq=1 then RetVal end),patindex('%[^0-9]%',max(case when RetSeq=1 then RetVal end)),charindex(' ',max(case when RetSeq=1 then RetVal end)+' ')-1)
,Weight = IIF(A.Type=2,null,try_convert(decimal(5,1),replace(max(case when RetVal like '%PW' then RetVal end),'PW','')))
,Purity = try_convert(smallint ,replace(max(case when RetVal like '%K' then RetVal end),'K',''))
,Brand = IIF(A.Type=1,null,max(case when RetSeq=2 then RetVal end))
,Model = replace(max(case when RetVal Like 'Model[0-9,A-Z]%' then RetVal end),'Model','')
,SerialNum = replace(max(case when RetVal Like 'S/N[0-9,A-Z]%' then RetVal end),'S/N','')
From [dbo].[tvf-Str-Parse](CleanString,' ') B1
) C
Returns
感兴趣的 TVF
CREATE FUNCTION [dbo].[tvf-Str-Parse] (@String varchar(max),@Delimiter varchar(10))
Returns Table
As
Return (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(@String,@Delimiter,'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
);
EDIT - If you don't want or can't use a TVF
Select A.Type
,A.Description
,C.*
From YourTable A
Cross Apply (values ( replace(
replace(
replace(
replace(A.Description,',',' ')
,' ',' ')
,'Model ','Model')
,'S/N ','S/N')
)
)B(CleanString)
Cross Apply (
Select Quantity = IsNull(left(max(case when RetSeq=1 then RetVal end),NullIf(patindex('%[^0-9]%',max(case when RetSeq=1 then RetVal end)) -1,0)),1)
,Name = substring(max(case when RetSeq=1 then RetVal end),patindex('%[^0-9]%',max(case when RetSeq=1 then RetVal end)),charindex(' ',max(case when RetSeq=1 then RetVal end)+' ')-1)
,Weight = IIF(A.Type=2,null,try_convert(decimal(5,1),replace(max(case when RetVal like '%PW' then RetVal end),'PW','')))
,Purity = try_convert(smallint ,replace(max(case when RetVal like '%K' then RetVal end),'K',''))
,Brand = IIF(A.Type=1,null,max(case when RetSeq=2 then RetVal end))
,Model = replace(max(case when RetVal Like 'Model[0-9,A-Z]%' then RetVal end),'Model','')
,SerialNum = replace(max(case when RetVal Like 'S/N[0-9,A-Z]%' then RetVal end),'S/N','')
From (
Select RetSeq = row_number() over (Order By (Select null))
,RetVal = ltrim(rtrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(CleanString,' ','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B1
) C
Informix 12.10
tblItems
(
Type SMALLINT, {Precious Metal = 1, Other = 2}
Description VARCHAR,
Quantity SMALLINT,
Name VARCHAR,
Weight DECIMAL(5,1),
Purity SMALLINT,
Brand VARCHAR,
Model VARCHAR,
SerialNum VARCHAR
);
编辑更新:下面的示例数据存储在 tblItems.Type 和 tblItems.Description 中。请注意Description栏中的内容均为大写字符,可能还包含标点符号。
2|1LAPTOP APPLE 15.5" MODEL MACKBOOK PRO,S/N W80461WCAGX, WITH CHARGER||||||||
1|1RING 2.3PW 14K||||||||
2|DRILL RIOBY, MODEL D5521 S/N77720||||||||
2|TRIMMER TORO, MODEL 0242 S/N 66759||||||||
2|CELL SAMSUNG NOTE3, MODEL SM-N900T S/N RV8F90YLZ9W||||||||
我需要使用评论中提到的规则将示例项目描述解析到下面的列中:
Quantity, {if description string does not start with a number, then Quantity = 1}
Name, {Always the first element if description has no quantity, second element if quantity present]
Weight, {Always before "PW" if Type = 1, Default to zero if Type = 2}
Purity, {Always before "K" if Type = 1, Default to NULL if Type = 2}
Brand, {Always the second element in description, if present}
Model, {Always after "MODEL", with or without a space}
Serial Number {Always after "S/N", with or without a space}
我想使用 UPDATE 语句来执行此操作,但如果 Informix 具有导入实用工具,如 SQL-Server 的 SSIS,那么这可能是更好的选择。
更新,预期结果:
Quantity 1 1 1 1 1
Name LAPTOP RING DRILL TRIMMER CELL
Weight 0.0 2.3 0.0 0.0 0.0
Purity 14
Brand APPLE RIOBY TORO SAMSUNG
Model MACKBOOK PRO D5521 0242 SM-N900T
SerialNum W8046WCAGX 77720 66759 RV8F90YLZ9W
假设您使用的是Informix 12.10.XC8或以上版本,您可以尝试使用正则表达式来解析描述字符串(参见在线documentation这里)。
以序列号为例,您可以这样做:
UPDATE tblitems
SET
serialnum =
DECODE
(
regex_match(description, '(.*)(S\/N)(.*)', 3)
, 't'::BOOLEAN, regex_replace(description, '(.*)(S\/N)([[:blank:]]?)([[:alnum:]]*)(.*)', '', 0, 3)
, 'f'::BOOLEAN, ''
)
所以在前面的示例中,我正在测试描述是否包含 S/N
字符串,如果是,我使用 regex_replace
到 return 它后面的值,在这种情况下正则表达式中的第 4 个匹配组(我没有使用 regex_extract
来获取值,因为它似乎 return 多个值并且我得到错误 -686)。
您可以将此方法扩展到其余列,看看正则表达式是否足以解析描述列。
如果您正在寻找 SQL 服务器选项并打开维护序列 Split/Parse 的函数
例子
Select A.Type
,A.Description
,C.*
From YourTable A
Cross Apply (values ( replace(
replace(
replace(
replace(A.Description,',',' ')
,' ',' ')
,'Model ','Model')
,'S/N ','S/N')
)
)B(CleanString)
Cross Apply (
Select Quantity = IsNull(left(max(case when RetSeq=1 then RetVal end),NullIf(patindex('%[^0-9]%',max(case when RetSeq=1 then RetVal end)) -1,0)),1)
,Name = substring(max(case when RetSeq=1 then RetVal end),patindex('%[^0-9]%',max(case when RetSeq=1 then RetVal end)),charindex(' ',max(case when RetSeq=1 then RetVal end)+' ')-1)
,Weight = IIF(A.Type=2,null,try_convert(decimal(5,1),replace(max(case when RetVal like '%PW' then RetVal end),'PW','')))
,Purity = try_convert(smallint ,replace(max(case when RetVal like '%K' then RetVal end),'K',''))
,Brand = IIF(A.Type=1,null,max(case when RetSeq=2 then RetVal end))
,Model = replace(max(case when RetVal Like 'Model[0-9,A-Z]%' then RetVal end),'Model','')
,SerialNum = replace(max(case when RetVal Like 'S/N[0-9,A-Z]%' then RetVal end),'S/N','')
From [dbo].[tvf-Str-Parse](CleanString,' ') B1
) C
Returns
感兴趣的 TVF
CREATE FUNCTION [dbo].[tvf-Str-Parse] (@String varchar(max),@Delimiter varchar(10))
Returns Table
As
Return (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(@String,@Delimiter,'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
);
EDIT - If you don't want or can't use a TVF
Select A.Type
,A.Description
,C.*
From YourTable A
Cross Apply (values ( replace(
replace(
replace(
replace(A.Description,',',' ')
,' ',' ')
,'Model ','Model')
,'S/N ','S/N')
)
)B(CleanString)
Cross Apply (
Select Quantity = IsNull(left(max(case when RetSeq=1 then RetVal end),NullIf(patindex('%[^0-9]%',max(case when RetSeq=1 then RetVal end)) -1,0)),1)
,Name = substring(max(case when RetSeq=1 then RetVal end),patindex('%[^0-9]%',max(case when RetSeq=1 then RetVal end)),charindex(' ',max(case when RetSeq=1 then RetVal end)+' ')-1)
,Weight = IIF(A.Type=2,null,try_convert(decimal(5,1),replace(max(case when RetVal like '%PW' then RetVal end),'PW','')))
,Purity = try_convert(smallint ,replace(max(case when RetVal like '%K' then RetVal end),'K',''))
,Brand = IIF(A.Type=1,null,max(case when RetSeq=2 then RetVal end))
,Model = replace(max(case when RetVal Like 'Model[0-9,A-Z]%' then RetVal end),'Model','')
,SerialNum = replace(max(case when RetVal Like 'S/N[0-9,A-Z]%' then RetVal end),'S/N','')
From (
Select RetSeq = row_number() over (Order By (Select null))
,RetVal = ltrim(rtrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(CleanString,' ','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B1
) C