根据特定模式删除子字符串
Remove Substring according to specific pattern
我需要根据模式在 SQL 服务器数据库中删除一个子字符串:
之前:Winter_QZ6P91712017_115BPM
之后:Winter_115BPM
或
之前:cpx_Note In My Calendar_QZ6P91707044
之后:cpx_Note In My Calendar
基本上删除具有模式 _ + 12 个字符.
的子字符串
我已经尝试 PatIndex('_\S{12}', myCol)
获取子字符串的索引,但它不匹配任何内容。
SQL 服务器不支持正则表达式。但是,考虑到您只想删除第一个 '_'
和之后的 12 个字符,您可以使用 CHARINDEX
找到所述下划线的位置,然后 STUFF
删除 13 个字符:
SELECT V.YourString,
STUFF(V.YourString, CHARINDEX('_',V.YourString),13,'') AS NewString
FROM (VALUES('Winter_QZ6P91712017_115BPM'))V(YourString);
假设您的意思是下划线后跟 12 个不是下划线的字符,您可以使用此模式:
SELECT *,
CASE WHEN PATINDEX('%[_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_]%', str) > 0
THEN STUFF(str, PATINDEX('%[_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_]%', str), 13, '')
ELSE str
END
FROM (VALUES
('Winter_QZ6P91712017_115BPM'),
('Winter_115BPM_QZ6P91712017')
) AS tests(str)
迟到了,但您也可以使用最新的 STRING_SPLIT 函数通过下划线分解字符串并计算下划线之间每个段的长度。如果长度 >=12,则必须通过替换函数递归地替换原始字符串中的这些部分。
drop table if exists Tbl;
drop table if exists #temptable;
create table Tbl (input nvarchar(max));
insert into Tbl VALUES
('Winter_QZ6P91712017_115BPM'),
('cpx_Note In My Calendar_QZ6P91707044'),
('stuff_asdasd_QZ6P91712017'),
('stuff_asdasd_QZ6P91712017_stuff_asdasd_QZ6P91712017'),
('stuff_asdasd_QZ6P917120117_stuff_asdasd_QZ6P91712017');
select
input, value as replacethisstring,
rn = row_number() over (partition by input order by (select 1))
into #temptable
from
(
select
input,value as hyphensplit
from Tbl
cross apply string_split(input,'_')
)T cross apply string_split(hyphensplit,' ')
where len(value)>=12
; with cte as (
select input, inputtrans= replace(input,replacethisstring,''), level=1 from #temptable where rn=1
union all
select T.input,inputtrans=replace(cte.inputtrans,T.replacethisstring,''),level=level+1
from cte inner join #temptable T on T.input=cte.input and rn=level+1
--where level=rn
)
select input, inputtrans
from (
select *, rn=row_number() over (partition by input order by level desc) from cte
) T where rn=1
示例输出
我需要根据模式在 SQL 服务器数据库中删除一个子字符串:
之前:Winter_QZ6P91712017_115BPM
之后:Winter_115BPM
或
之前:cpx_Note In My Calendar_QZ6P91707044
之后:cpx_Note In My Calendar
基本上删除具有模式 _ + 12 个字符.
的子字符串我已经尝试 PatIndex('_\S{12}', myCol)
获取子字符串的索引,但它不匹配任何内容。
SQL 服务器不支持正则表达式。但是,考虑到您只想删除第一个 '_'
和之后的 12 个字符,您可以使用 CHARINDEX
找到所述下划线的位置,然后 STUFF
删除 13 个字符:
SELECT V.YourString,
STUFF(V.YourString, CHARINDEX('_',V.YourString),13,'') AS NewString
FROM (VALUES('Winter_QZ6P91712017_115BPM'))V(YourString);
假设您的意思是下划线后跟 12 个不是下划线的字符,您可以使用此模式:
SELECT *,
CASE WHEN PATINDEX('%[_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_]%', str) > 0
THEN STUFF(str, PATINDEX('%[_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_][^_]%', str), 13, '')
ELSE str
END
FROM (VALUES
('Winter_QZ6P91712017_115BPM'),
('Winter_115BPM_QZ6P91712017')
) AS tests(str)
迟到了,但您也可以使用最新的 STRING_SPLIT 函数通过下划线分解字符串并计算下划线之间每个段的长度。如果长度 >=12,则必须通过替换函数递归地替换原始字符串中的这些部分。
drop table if exists Tbl;
drop table if exists #temptable;
create table Tbl (input nvarchar(max));
insert into Tbl VALUES
('Winter_QZ6P91712017_115BPM'),
('cpx_Note In My Calendar_QZ6P91707044'),
('stuff_asdasd_QZ6P91712017'),
('stuff_asdasd_QZ6P91712017_stuff_asdasd_QZ6P91712017'),
('stuff_asdasd_QZ6P917120117_stuff_asdasd_QZ6P91712017');
select
input, value as replacethisstring,
rn = row_number() over (partition by input order by (select 1))
into #temptable
from
(
select
input,value as hyphensplit
from Tbl
cross apply string_split(input,'_')
)T cross apply string_split(hyphensplit,' ')
where len(value)>=12
; with cte as (
select input, inputtrans= replace(input,replacethisstring,''), level=1 from #temptable where rn=1
union all
select T.input,inputtrans=replace(cte.inputtrans,T.replacethisstring,''),level=level+1
from cte inner join #temptable T on T.input=cte.input and rn=level+1
--where level=rn
)
select input, inputtrans
from (
select *, rn=row_number() over (partition by input order by level desc) from cte
) T where rn=1
示例输出