构建更高效的 MS SQL 函数以 return 均匀分割范围
Build more efficient MS SQL function to return evenly split ranges
假设您有以下 SQL table:
-- create temp table
CREATE TABLE [tempNums]
(
id INT NOT NULL,
somedate datetime NULL
)
GO
有一些数据(见下面的 tempSplitStringToInts 定义):
-- with date
INSERT INTO [tempNums]
SELECT id, GETUTCDATE()
FROM [tempSplitStringToInts] ('1,2,3,5,10,100,101,102,103,233,1001,5002,5003,5005,5007,5010',',')
GO
-- without date
INSERT INTO [tempNums]
SELECT id, NULL
FROM [tempSplitStringToInts] ('6,7,8,150,151,152,153,433,2001,2002,2003,2005,3007,10010',',')
GO
How do you build BETTER/FASTER function that will take number of ranges, and a
flag bit as input and return a table of range values?
例如,这样的方法可行,但对于非常大的 tables 来说速度很慢:
-- create range function
CREATE FUNCTION [tempFnGetIdRanges]
(
@apps INT,
@has_date BIT
)
RETURNS @ret TABLE
(
RangeNum INT,
MinNum INT,
MaxNum INT
)
AS
BEGIN
DECLARE @i INT = 0;
DECLARE @count INT;
DECLARE @min INT;
DECLARE @max INT = 0;
IF @has_date = 1
BEGIN
SELECT @count = COUNT(id)
FROM [tempNums]
WHERE somedate IS NOT NULL
END
ELSE
BEGIN
SELECT @count = COUNT(id)
FROM [tempNums]
WHERE somedate IS NULL
END
DECLARE @top INT = @count/@apps;
WHILE @i<@apps
BEGIN
IF @i+1=@apps
BEGIN
-- on last get reminder
SET @top = @top + @apps
END
IF @has_date = 1
BEGIN
SELECT @min = MIN(id), @max = MAX(id)
FROM
(
SELECT TOP (@top) id
FROM [tempNums]
WHERE somedate IS NOT NULL
AND id > @max
ORDER BY id
) XX
END
ELSE
BEGIN
SELECT @min = MIN(id), @max = MAX(id)
FROM
(
SELECT TOP (@top) id
FROM [tempNums]
WHERE somedate IS NULL
AND id > @max
ORDER BY id
) XX
END
INSERT INTO @ret VALUES(@i, @min, @max)
SET @i = @i + 1;
CONTINUE
END
RETURN
END
GO
因此,当您 运行 以下内容时:
SELECT * FROM [tempFnGetIdRanges](4, 0)
SELECT * FROM [tempFnGetIdRanges](4, 1)
第一个语句的结果:
RangeNum MinNum MaxNum
0 6 8
1 150 152
2 153 2001
3 2002 10010
第二条语句的结果:
RangeNum MinNum MaxNum
0 1 5
1 10 102
2 103 5002
3 5003 5010
拆分功能(仅供参考,但不是本题重点):
-- create split string function
CREATE FUNCTION [tempSplitStringToInts] ( @SourceString VARCHAR(MAX) , @delimeter VARCHAR(10))
RETURNS @IntList TABLE
(
id INT
)
AS
BEGIN
IF RIGHT(@SourceString, LEN(@delimeter))<> @delimeter
BEGIN
SELECT @SourceString = @SourceString + @delimeter
END
DECLARE @LocalStr VARCHAR(MAX)
DECLARE @start INT
DECLARE @end INT
SELECT @start = 1
SELECT @end = CHARINDEX ( @delimeter , @SourceString , @start )
WHILE @end > 0
BEGIN
SELECT @LocalStr = SUBSTRING ( @SourceString , @start , @end - @start )
IF LTRIM(RTRIM(@LocalStr)) <> ''
BEGIN
INSERT @IntList (id) VALUES (CAST(@LocalStr AS INT))
END
SELECT @start = @end + LEN(@delimeter)
SELECT @end = CHARINDEX ( @delimeter , @SourceString , @start )
END
RETURN
END
GO
As I said this works, but it is slow for very large tables. Is there a
better way to write tempFnGetIdRanges
function? Something native to
SQL? I am using MS SQL 2012
if that is relevant.
不太确定您的 GetRanges 函数试图做什么,但您绝对不需要循环。当您将 HasDate 作为 1 传递时,此函数 returns 与您的值相同。
create function GetRanges
(
@NumGroups int
) returns table as return
with MyGroups as
(
select NTILE(@NumGroups) over(order by t.id) as GroupNum
, t.id
from tempnums t
)
select GroupNum
, MIN(id) as MinNum
, MAX(id) as MaxNum
from MyGroups
group by GroupNum
--编辑--
现在我看到你发布了两组示例数据,我明白了这个问题。
您可以通过以下方式调整它以在某个日期适应 NULL 或 NOT NULL。
alter function GetRanges
(
@NumGroups int
, @HasDate bit
) returns table as return
with MyGroups as
(
select NTILE(@NumGroups) over(order by t.id) as GroupNum
, t.id
from tempnums t
where
(
@HasDate = 1
AND
t.somedate is not null
)
OR
(
@HasDate = 0
AND
t.somedate is null
)
)
select GroupNum
, MIN(id) as MinNum
, MAX(id) as MaxNum
from MyGroups
group by GroupNum
我看到的问题是您只有 14 行有 NULL,所以不确定为什么您想要的输出是这样的。由于 NTILE 将不均匀的行分组的方式不同,使用 NTILE 会对样本数据产生略微不同的结果。
假设您有以下 SQL table:
-- create temp table
CREATE TABLE [tempNums]
(
id INT NOT NULL,
somedate datetime NULL
)
GO
有一些数据(见下面的 tempSplitStringToInts 定义):
-- with date
INSERT INTO [tempNums]
SELECT id, GETUTCDATE()
FROM [tempSplitStringToInts] ('1,2,3,5,10,100,101,102,103,233,1001,5002,5003,5005,5007,5010',',')
GO
-- without date
INSERT INTO [tempNums]
SELECT id, NULL
FROM [tempSplitStringToInts] ('6,7,8,150,151,152,153,433,2001,2002,2003,2005,3007,10010',',')
GO
How do you build BETTER/FASTER function that will take number of ranges, and a flag bit as input and return a table of range values?
例如,这样的方法可行,但对于非常大的 tables 来说速度很慢:
-- create range function
CREATE FUNCTION [tempFnGetIdRanges]
(
@apps INT,
@has_date BIT
)
RETURNS @ret TABLE
(
RangeNum INT,
MinNum INT,
MaxNum INT
)
AS
BEGIN
DECLARE @i INT = 0;
DECLARE @count INT;
DECLARE @min INT;
DECLARE @max INT = 0;
IF @has_date = 1
BEGIN
SELECT @count = COUNT(id)
FROM [tempNums]
WHERE somedate IS NOT NULL
END
ELSE
BEGIN
SELECT @count = COUNT(id)
FROM [tempNums]
WHERE somedate IS NULL
END
DECLARE @top INT = @count/@apps;
WHILE @i<@apps
BEGIN
IF @i+1=@apps
BEGIN
-- on last get reminder
SET @top = @top + @apps
END
IF @has_date = 1
BEGIN
SELECT @min = MIN(id), @max = MAX(id)
FROM
(
SELECT TOP (@top) id
FROM [tempNums]
WHERE somedate IS NOT NULL
AND id > @max
ORDER BY id
) XX
END
ELSE
BEGIN
SELECT @min = MIN(id), @max = MAX(id)
FROM
(
SELECT TOP (@top) id
FROM [tempNums]
WHERE somedate IS NULL
AND id > @max
ORDER BY id
) XX
END
INSERT INTO @ret VALUES(@i, @min, @max)
SET @i = @i + 1;
CONTINUE
END
RETURN
END
GO
因此,当您 运行 以下内容时:
SELECT * FROM [tempFnGetIdRanges](4, 0)
SELECT * FROM [tempFnGetIdRanges](4, 1)
第一个语句的结果:
RangeNum MinNum MaxNum
0 6 8
1 150 152
2 153 2001
3 2002 10010
第二条语句的结果:
RangeNum MinNum MaxNum
0 1 5
1 10 102
2 103 5002
3 5003 5010
拆分功能(仅供参考,但不是本题重点):
-- create split string function
CREATE FUNCTION [tempSplitStringToInts] ( @SourceString VARCHAR(MAX) , @delimeter VARCHAR(10))
RETURNS @IntList TABLE
(
id INT
)
AS
BEGIN
IF RIGHT(@SourceString, LEN(@delimeter))<> @delimeter
BEGIN
SELECT @SourceString = @SourceString + @delimeter
END
DECLARE @LocalStr VARCHAR(MAX)
DECLARE @start INT
DECLARE @end INT
SELECT @start = 1
SELECT @end = CHARINDEX ( @delimeter , @SourceString , @start )
WHILE @end > 0
BEGIN
SELECT @LocalStr = SUBSTRING ( @SourceString , @start , @end - @start )
IF LTRIM(RTRIM(@LocalStr)) <> ''
BEGIN
INSERT @IntList (id) VALUES (CAST(@LocalStr AS INT))
END
SELECT @start = @end + LEN(@delimeter)
SELECT @end = CHARINDEX ( @delimeter , @SourceString , @start )
END
RETURN
END
GO
As I said this works, but it is slow for very large tables. Is there a better way to write
tempFnGetIdRanges
function? Something native to SQL? I am usingMS SQL 2012
if that is relevant.
不太确定您的 GetRanges 函数试图做什么,但您绝对不需要循环。当您将 HasDate 作为 1 传递时,此函数 returns 与您的值相同。
create function GetRanges
(
@NumGroups int
) returns table as return
with MyGroups as
(
select NTILE(@NumGroups) over(order by t.id) as GroupNum
, t.id
from tempnums t
)
select GroupNum
, MIN(id) as MinNum
, MAX(id) as MaxNum
from MyGroups
group by GroupNum
--编辑--
现在我看到你发布了两组示例数据,我明白了这个问题。
您可以通过以下方式调整它以在某个日期适应 NULL 或 NOT NULL。
alter function GetRanges
(
@NumGroups int
, @HasDate bit
) returns table as return
with MyGroups as
(
select NTILE(@NumGroups) over(order by t.id) as GroupNum
, t.id
from tempnums t
where
(
@HasDate = 1
AND
t.somedate is not null
)
OR
(
@HasDate = 0
AND
t.somedate is null
)
)
select GroupNum
, MIN(id) as MinNum
, MAX(id) as MaxNum
from MyGroups
group by GroupNum
我看到的问题是您只有 14 行有 NULL,所以不确定为什么您想要的输出是这样的。由于 NTILE 将不均匀的行分组的方式不同,使用 NTILE 会对样本数据产生略微不同的结果。