从 T-SQL 中的字符串中提取最大数
Extract largest number from a string in T-SQL
我正在导入从 excel 文件导入的数据。有一列的字符串可以包含多个数字。我正在尝试提取字符串中的最大数字,如果没有字符串则为 0。
字符串的格式类似于:
“100% post-消费者回收纸,50%post-消费者回收封面,90%post-消费者回收线。”
"Paper contains 30% post-consumer content."
或者有时是一个空字符串或 null。
鉴于字符串格式不规则,我遇到了问题,如有任何帮助,我们将不胜感激。
- 按原样将数据拉入 SQL
- 编写查询以获取该列中不同的选项列表
- 添加一个新列来存储所需的值
- 编写更新语句来填充新列
至于确定最大尺寸,我认为您需要先查看您的数据集,但更新可以像这样简单:
DECLARE @COUNTER INT=1000
While EXISTS (SELECT * FROM <Table> WHERE NewColumn is NULL) AND @COUNTER>=0
BEGIN
UPDATE <Table> SET NewColumn=@COUNTER WHERE <SearchColumn> LIKE '%' + CONVERT(VARCHAR,@COUNTER) + '%' AND NewColumn is NULL
SET @COUNTER=@COUNTER-1
END
这是一个标量函数,它将一个字符串作为输入,return 它找到的最大整数(最多 3 位数字,但根据你的问题我假设你正在处理百分比。如果您需要更多数字,请无限重复 IF 语句。
将其粘贴到 SSMS 中并 运行 以创建函数。要调用它,请执行以下操作:
SELECT dbo.GetLargestNumberFromString(MyStringField) as [Largest Number in String]
FROM MyMessedUpData
函数:
CREATE FUNCTION GetLargestNumberFromString
(
@s varchar(max)
)
RETURNS int
AS
BEGIN
DECLARE @LargestNumber int, @i int
SET @i = 1
SET @LargestNumber = 0
WHILE @i <= LEN(@s)
BEGIN
IF SUBSTRING(@s, @i, 3) like '[0-9][0-9][0-9]'
BEGIN
IF CAST(SUBSTRING(@s, @i,3) as int) > @LargestNumber OR @LargestNumber IS NULL
SET @LargestNumber = CAST(SUBSTRING(@s, @i,3) as int);
END
IF SUBSTRING(@s, @i, 2) like '[0-9][0-9]'
BEGIN
IF CAST(SUBSTRING(@s, @i,2) as int) > @LargestNumber OR @LargestNumber IS NULL
SET @LargestNumber = CAST(SUBSTRING(@s, @i,2) as int);
END
IF SUBSTRING(@s, @i, 1) like '[0-9]' OR @LargestNumber IS NULL
BEGIN
IF CAST(SUBSTRING(@s, @i,1) as int) > @LargestNumber
SET @LargestNumber = CAST(SUBSTRING(@s, @i,1) as int);
END
SET @i = @i + 1
CONTINUE
END
RETURN @LargestNumber
END
生成 txt
的 LEN(txt)
个可能的 RIGHT()
片段。 Trim 每个片段的第一个非数字字符。测试余数是否为 int
。 Return MAX()
.
SELECT
txt
,MAX(TRY_CONVERT(int,LEFT(RIGHT(txt,i),PATINDEX('%[^0-9]%',RIGHT(txt,i)+' ')-1)))
FROM MyTable
CROSS APPLY (
SELECT TOP(LEN(txt)) ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) i FROM master.dbo.spt_values a, master.dbo.spt_values b
) x
GROUP BY txt
我最终创建了一个处理它的函数。这是代码:
CREATE FUNCTION [dbo].[cal_GetMaxPercentFromString]
RETURNS float
AS
BEGIN
declare @Numbers Table(number float)
insert into @Numbers
Select 0
declare @temp as varchar(2000) = @string
declare @position int, @length int, @offset int
WHILE CHARINDEX('%', @temp) > 0
BEGIN
set @position = CHARINDEX('%', @temp)
set @offset = 1
set @length = -1
WHILE @position - @offset > 0 and @length < 0
BEGIN
if SUBSTRING(@temp, @position - @offset, 1) not LIKE '[0-9]'
set @length = @offset - 1
set @offset = @offset + 1
END
if @length > 0
BEGIN
insert into @Numbers
select CAST(SUBSTRING(@temp, @position - @length, @length) as float)
END
set @temp = SUBSTRING(@temp, 1, @position - 1) + SUBSTRING(@temp, @position + 1, LEN(@temp) - @position)
END
declare @return as float
select @return = MAX(number) from @Numbers
return @return
END
我正在导入从 excel 文件导入的数据。有一列的字符串可以包含多个数字。我正在尝试提取字符串中的最大数字,如果没有字符串则为 0。 字符串的格式类似于: “100% post-消费者回收纸,50%post-消费者回收封面,90%post-消费者回收线。” "Paper contains 30% post-consumer content." 或者有时是一个空字符串或 null。
鉴于字符串格式不规则,我遇到了问题,如有任何帮助,我们将不胜感激。
- 按原样将数据拉入 SQL
- 编写查询以获取该列中不同的选项列表
- 添加一个新列来存储所需的值
- 编写更新语句来填充新列
至于确定最大尺寸,我认为您需要先查看您的数据集,但更新可以像这样简单:
DECLARE @COUNTER INT=1000
While EXISTS (SELECT * FROM <Table> WHERE NewColumn is NULL) AND @COUNTER>=0
BEGIN
UPDATE <Table> SET NewColumn=@COUNTER WHERE <SearchColumn> LIKE '%' + CONVERT(VARCHAR,@COUNTER) + '%' AND NewColumn is NULL
SET @COUNTER=@COUNTER-1
END
这是一个标量函数,它将一个字符串作为输入,return 它找到的最大整数(最多 3 位数字,但根据你的问题我假设你正在处理百分比。如果您需要更多数字,请无限重复 IF 语句。
将其粘贴到 SSMS 中并 运行 以创建函数。要调用它,请执行以下操作:
SELECT dbo.GetLargestNumberFromString(MyStringField) as [Largest Number in String]
FROM MyMessedUpData
函数:
CREATE FUNCTION GetLargestNumberFromString
(
@s varchar(max)
)
RETURNS int
AS
BEGIN
DECLARE @LargestNumber int, @i int
SET @i = 1
SET @LargestNumber = 0
WHILE @i <= LEN(@s)
BEGIN
IF SUBSTRING(@s, @i, 3) like '[0-9][0-9][0-9]'
BEGIN
IF CAST(SUBSTRING(@s, @i,3) as int) > @LargestNumber OR @LargestNumber IS NULL
SET @LargestNumber = CAST(SUBSTRING(@s, @i,3) as int);
END
IF SUBSTRING(@s, @i, 2) like '[0-9][0-9]'
BEGIN
IF CAST(SUBSTRING(@s, @i,2) as int) > @LargestNumber OR @LargestNumber IS NULL
SET @LargestNumber = CAST(SUBSTRING(@s, @i,2) as int);
END
IF SUBSTRING(@s, @i, 1) like '[0-9]' OR @LargestNumber IS NULL
BEGIN
IF CAST(SUBSTRING(@s, @i,1) as int) > @LargestNumber
SET @LargestNumber = CAST(SUBSTRING(@s, @i,1) as int);
END
SET @i = @i + 1
CONTINUE
END
RETURN @LargestNumber
END
生成 txt
的 LEN(txt)
个可能的 RIGHT()
片段。 Trim 每个片段的第一个非数字字符。测试余数是否为 int
。 Return MAX()
.
SELECT
txt
,MAX(TRY_CONVERT(int,LEFT(RIGHT(txt,i),PATINDEX('%[^0-9]%',RIGHT(txt,i)+' ')-1)))
FROM MyTable
CROSS APPLY (
SELECT TOP(LEN(txt)) ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) i FROM master.dbo.spt_values a, master.dbo.spt_values b
) x
GROUP BY txt
我最终创建了一个处理它的函数。这是代码:
CREATE FUNCTION [dbo].[cal_GetMaxPercentFromString]
RETURNS float
AS
BEGIN
declare @Numbers Table(number float)
insert into @Numbers
Select 0
declare @temp as varchar(2000) = @string
declare @position int, @length int, @offset int
WHILE CHARINDEX('%', @temp) > 0
BEGIN
set @position = CHARINDEX('%', @temp)
set @offset = 1
set @length = -1
WHILE @position - @offset > 0 and @length < 0
BEGIN
if SUBSTRING(@temp, @position - @offset, 1) not LIKE '[0-9]'
set @length = @offset - 1
set @offset = @offset + 1
END
if @length > 0
BEGIN
insert into @Numbers
select CAST(SUBSTRING(@temp, @position - @length, @length) as float)
END
set @temp = SUBSTRING(@temp, 1, @position - 1) + SUBSTRING(@temp, @position + 1, LEN(@temp) - @position)
END
declare @return as float
select @return = MAX(number) from @Numbers
return @return
END