在字符串中搜索 char 的所有位置,并将 return 作为逗号分隔的字符串
Search all positions of char in string and return as comma separated string
我有只包含零或一的字符串 (VARCHAR(255)
)。
我需要搜索所有位置并将它们 return 作为逗号分隔的字符串。
我使用 https://dba.stackexchange.com/questions/41961/how-to-find-all-positions-of-a-string-within-another-string
中的解决方案构建了两个查询
到目前为止,这是我的代码:
DECLARE @TERM VARCHAR(5);
SET @TERM = '1';
DECLARE @STRING VARCHAR(255);
SET @STRING = '101011011000000000000000000000000000000000000000';
DECLARE @RESULT VARCHAR(100);
SET @RESULT = '';
SELECT
@RESULT = @RESULT + CAST(X.pos AS VARCHAR(10)) + ','
FROM
( SELECT
pos = Number - LEN(@TERM)
FROM
( SELECT
Number
,Item = LTRIM(RTRIM(SUBSTRING(@STRING, Number, CHARINDEX(@TERM, @STRING + @TERM, Number) - Number)))
FROM
( SELECT ROW_NUMBER () OVER (ORDER BY [object_id]) FROM sys.all_objects
) AS n ( Number )
WHERE
Number > 1
AND Number <= CONVERT(INT, LEN(@STRING))
AND SUBSTRING(@TERM + @STRING, Number, LEN(@TERM)) = @TERM
) AS y
) X;
SELECT
SUBSTRING(@RESULT, 0, LEN(@RESULT));
DECLARE @POS INT;
DECLARE @OLD_POS INT;
DECLARE @POSITIONS VARCHAR(100);
SELECT
@POSITIONS = '';
SELECT
@OLD_POS = 0;
SELECT
@POS = PATINDEX('%1%', @STRING);
WHILE @POS > 0
AND @OLD_POS <> @POS
BEGIN
SELECT
@POSITIONS = @POSITIONS + CAST(@POS AS VARCHAR(2)) + ',';
SELECT
@OLD_POS = @POS;
SELECT
@POS = PATINDEX('%1%', SUBSTRING(@STRING, @POS + 1, LEN(@STRING))) + @POS;
END;
SELECT
LEFT(@POSITIONS, LEN(@POSITIONS) - 1);
我想知道这是否可以做到 faster/better?我只搜索单个字符位置,并且我的字符串中只能出现两个字符(0 和 1)。
我已经使用此代码构建了两个函数,运行 它们用于 1000 条记录并同时得到相同的结果,所以我无法判断哪个更好。
对于单条记录,第二部分给出 CPU 并且在 Profiler 中读取等于 0,其中第一段代码给我 CPU=16 并且读取=17。
我需要得到如下所示的结果:1,3,5,6,8,9
(多次出现时),3
出现一次,NONE
如果没有出现。
一些tally
table和xml
的解决方案:
DECLARE @STRING NVARCHAR(100) = '101011011000000000000000000000000000000000000000';
;with cte as(select ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) p
from (values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t1(n) cross join
(values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t2(n) cross join
(values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t3(n))
SELECT STUFF((SELECT ',' + CAST(p AS VARCHAR(100))
FROM cte
WHERE p <= LEN(@STRING) AND SUBSTRING(@STRING, p, 1) = '1'
FOR XML PATH('')), 1, 1, '')
您只需生成从 1 到 1000 的数字(如果字符串的长度更大,则添加更多连接)并使用 substring
函数过滤所需的值。然后是将行连接到逗号分隔值的标准技巧。
对于旧版本:
;with cte as(SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) p
FROM sys.all_columns a CROSS JOIN sys.all_columns b)
SELECT STUFF((SELECT ',' + CAST(p AS VARCHAR(100))
FROM cte
WHERE p <= LEN(@STRING) AND SUBSTRING(@STRING, p, 1) = '1'
FOR XML PATH('')), 1, 1, '')
这里有一篇关于生成范围的好文章http://dwaincsql.com/2014/03/27/tally-tables-in-t-sql/
编辑:
;with cte as(SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) p
FROM (SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t1 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t2 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t3 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t4 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t5 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t6)
Giorgi 的回答非常聪明,但我更喜欢可读性更强的更老式的方法。我的建议,包括测试用例:
if object_id('UFN_CSVPOSITIONS') is not null
begin
drop function ufn_csvpositions;
end
go
create function dbo.UFN_CSVPOSITIONS
(
@string nvarchar(255)
,@delimiter nvarchar(1) = ','
)
returns nvarchar(255)
as
begin
--given a string that contains ones,
--return a comma-delimited list of the positions of those ones
--example: '1001' returns '1,4'
declare @result nvarchar(255) = '';
declare @i int = 1;
declare @slen int = len(@string);
declare @idx int = 0;
while @i < @slen
begin
set @idx = charindex('1',@string,@i);
if 0 = @idx
begin
set @i = @slen; --no more to be found, break out early
end
else
begin
set @result = @result + @delimiter + convert(nvarchar(3),@idx);
set @i = @idx; --jump ahead
end;
set @i = @i + 1;
end --while
if (0 < len(@result)) and (',' = substring(@result,1,1))
begin
set @result = substring(@result,2,len(@result)-1)
end
return @result;
end
go
--test cases
DECLARE @STRING NVARCHAR(255) = '';
set @string = '101011011000000000000000000000000000000000000000';
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = null;
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = '';
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = '1111111111111111111111111111111111111111111111111';
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = '0000000000000000000000000000000000000000000000000';
print dbo.UFN_CSVPOSITIONS(@string,',');
--lets try a very large # of test cases, see how fast it comes out
--255 "ones" should be the worst case scenario for performance, so lets run through 50k of those.
--on my laptop, here are test case results:
--all 1s : 13 seconds
--all 0s : 7 seconds
--all nulls: 1 second
declare @testinput nvarchar(255) = replicate('1',255);
declare @iterations int = 50000;
declare @i int = 0;
while @i < @iterations
begin
print dbo.ufn_csvpositions(@testinput,',');
set @i = @i + 1;
end;
--repeat the test using the CTE method.
--the same test cases are as follows on my local:
--all 1s : 18 seconds
--all 0s : 15 seconds
--all NULLs: 1 second
set nocount on;
set @i = 0;
set @iterations = 50000;
declare @result nvarchar(255) = '';
set @testinput = replicate('1',255);
while @i < @iterations
begin
;with cte as(SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) p
FROM sys.all_columns a CROSS JOIN sys.all_columns b)
SELECT @result = STUFF((SELECT ',' + CAST(p AS VARCHAR(100))
FROM cte
WHERE p <= LEN(@testinput) AND SUBSTRING(@testinput, p, 1) = '1'
FOR XML PATH('')), 1, 1, '')
print @result;
set @i = @i + 1;
end;
我有只包含零或一的字符串 (VARCHAR(255)
)。
我需要搜索所有位置并将它们 return 作为逗号分隔的字符串。
我使用 https://dba.stackexchange.com/questions/41961/how-to-find-all-positions-of-a-string-within-another-string
到目前为止,这是我的代码:
DECLARE @TERM VARCHAR(5);
SET @TERM = '1';
DECLARE @STRING VARCHAR(255);
SET @STRING = '101011011000000000000000000000000000000000000000';
DECLARE @RESULT VARCHAR(100);
SET @RESULT = '';
SELECT
@RESULT = @RESULT + CAST(X.pos AS VARCHAR(10)) + ','
FROM
( SELECT
pos = Number - LEN(@TERM)
FROM
( SELECT
Number
,Item = LTRIM(RTRIM(SUBSTRING(@STRING, Number, CHARINDEX(@TERM, @STRING + @TERM, Number) - Number)))
FROM
( SELECT ROW_NUMBER () OVER (ORDER BY [object_id]) FROM sys.all_objects
) AS n ( Number )
WHERE
Number > 1
AND Number <= CONVERT(INT, LEN(@STRING))
AND SUBSTRING(@TERM + @STRING, Number, LEN(@TERM)) = @TERM
) AS y
) X;
SELECT
SUBSTRING(@RESULT, 0, LEN(@RESULT));
DECLARE @POS INT;
DECLARE @OLD_POS INT;
DECLARE @POSITIONS VARCHAR(100);
SELECT
@POSITIONS = '';
SELECT
@OLD_POS = 0;
SELECT
@POS = PATINDEX('%1%', @STRING);
WHILE @POS > 0
AND @OLD_POS <> @POS
BEGIN
SELECT
@POSITIONS = @POSITIONS + CAST(@POS AS VARCHAR(2)) + ',';
SELECT
@OLD_POS = @POS;
SELECT
@POS = PATINDEX('%1%', SUBSTRING(@STRING, @POS + 1, LEN(@STRING))) + @POS;
END;
SELECT
LEFT(@POSITIONS, LEN(@POSITIONS) - 1);
我想知道这是否可以做到 faster/better?我只搜索单个字符位置,并且我的字符串中只能出现两个字符(0 和 1)。
我已经使用此代码构建了两个函数,运行 它们用于 1000 条记录并同时得到相同的结果,所以我无法判断哪个更好。
对于单条记录,第二部分给出 CPU 并且在 Profiler 中读取等于 0,其中第一段代码给我 CPU=16 并且读取=17。
我需要得到如下所示的结果:1,3,5,6,8,9
(多次出现时),3
出现一次,NONE
如果没有出现。
一些tally
table和xml
的解决方案:
DECLARE @STRING NVARCHAR(100) = '101011011000000000000000000000000000000000000000';
;with cte as(select ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) p
from (values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t1(n) cross join
(values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t2(n) cross join
(values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t3(n))
SELECT STUFF((SELECT ',' + CAST(p AS VARCHAR(100))
FROM cte
WHERE p <= LEN(@STRING) AND SUBSTRING(@STRING, p, 1) = '1'
FOR XML PATH('')), 1, 1, '')
您只需生成从 1 到 1000 的数字(如果字符串的长度更大,则添加更多连接)并使用 substring
函数过滤所需的值。然后是将行连接到逗号分隔值的标准技巧。
对于旧版本:
;with cte as(SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) p
FROM sys.all_columns a CROSS JOIN sys.all_columns b)
SELECT STUFF((SELECT ',' + CAST(p AS VARCHAR(100))
FROM cte
WHERE p <= LEN(@STRING) AND SUBSTRING(@STRING, p, 1) = '1'
FOR XML PATH('')), 1, 1, '')
这里有一篇关于生成范围的好文章http://dwaincsql.com/2014/03/27/tally-tables-in-t-sql/
编辑:
;with cte as(SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) p
FROM (SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t1 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t2 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t3 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t4 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t5 CROSS JOIN
(SELECT 1 AS rn UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ) t6)
Giorgi 的回答非常聪明,但我更喜欢可读性更强的更老式的方法。我的建议,包括测试用例:
if object_id('UFN_CSVPOSITIONS') is not null
begin
drop function ufn_csvpositions;
end
go
create function dbo.UFN_CSVPOSITIONS
(
@string nvarchar(255)
,@delimiter nvarchar(1) = ','
)
returns nvarchar(255)
as
begin
--given a string that contains ones,
--return a comma-delimited list of the positions of those ones
--example: '1001' returns '1,4'
declare @result nvarchar(255) = '';
declare @i int = 1;
declare @slen int = len(@string);
declare @idx int = 0;
while @i < @slen
begin
set @idx = charindex('1',@string,@i);
if 0 = @idx
begin
set @i = @slen; --no more to be found, break out early
end
else
begin
set @result = @result + @delimiter + convert(nvarchar(3),@idx);
set @i = @idx; --jump ahead
end;
set @i = @i + 1;
end --while
if (0 < len(@result)) and (',' = substring(@result,1,1))
begin
set @result = substring(@result,2,len(@result)-1)
end
return @result;
end
go
--test cases
DECLARE @STRING NVARCHAR(255) = '';
set @string = '101011011000000000000000000000000000000000000000';
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = null;
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = '';
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = '1111111111111111111111111111111111111111111111111';
print dbo.UFN_CSVPOSITIONS(@string,',');
set @string = '0000000000000000000000000000000000000000000000000';
print dbo.UFN_CSVPOSITIONS(@string,',');
--lets try a very large # of test cases, see how fast it comes out
--255 "ones" should be the worst case scenario for performance, so lets run through 50k of those.
--on my laptop, here are test case results:
--all 1s : 13 seconds
--all 0s : 7 seconds
--all nulls: 1 second
declare @testinput nvarchar(255) = replicate('1',255);
declare @iterations int = 50000;
declare @i int = 0;
while @i < @iterations
begin
print dbo.ufn_csvpositions(@testinput,',');
set @i = @i + 1;
end;
--repeat the test using the CTE method.
--the same test cases are as follows on my local:
--all 1s : 18 seconds
--all 0s : 15 seconds
--all NULLs: 1 second
set nocount on;
set @i = 0;
set @iterations = 50000;
declare @result nvarchar(255) = '';
set @testinput = replicate('1',255);
while @i < @iterations
begin
;with cte as(SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) p
FROM sys.all_columns a CROSS JOIN sys.all_columns b)
SELECT @result = STUFF((SELECT ',' + CAST(p AS VARCHAR(100))
FROM cte
WHERE p <= LEN(@testinput) AND SUBSTRING(@testinput, p, 1) = '1'
FOR XML PATH('')), 1, 1, '')
print @result;
set @i = @i + 1;
end;