Azure SQL table 分区当查询包含简单函数时被忽略
Azure SQL table partitions Ignored when queries contain simple function
使用 Azure SQL 服务器数据库。我在 90 天日期边界上分区了几个表。我们有一个存储过程来移动数据以维护正确的分区 breakpoint/range。我正在使用一个小函数为我的查询提供正确的日期断点,这样我就不必不断更新我的所有视图。
但仅仅凭借在我的查询中使用该函数,分区就被忽略了。我别无选择,只能在我的查询中随处放置硬编码值并不断修改它们吗?
这是一个重现问题的示例。
更新:根据标注的答案修改下面的PartitionDate
函数后,短时间没问题(出现分区消除)。然后,查询再次开始变得糟糕。当我 运行 通过日期函数过滤简单查询时,分区不再被消除。
------------------------------- setup
-- Create functions PartitionDate and PartitionQueryDate
create function PartitionDate() returns date as
begin
return GETDATE() - 91 -- returns 1/4/2019 today
end
go
create function PartitionQueryDate() returns date as
begin
return GETDATE() - 90 -- returns 1/5/2019
end
go
-- Create partition func and scheme using above functions
CREATE PARTITION FUNCTION order_pf (smalldatetime) AS RANGE RIGHT FOR VALUES (dbo.PartitionDate())
CREATE PARTITION SCHEME order_ps AS PARTITION order_pf ALL TO ([PRIMARY])
-- Create Order (pk, OrderDate, Fk), Customer (pk) tables. Order is partitioned
create table Customer
(
id int primary key identity(1,1),
FirstName varchar(255) not null
)
create table [Order]
(
id int identity(1,1), OrderDate smalldatetime not null,
CustomerId int not null,
CONSTRAINT [FK_Orders_Customer] FOREIGN KEY ([CustomerId]) REFERENCES Customer([id])
) on order_ps(OrderDate);
-- Add in indexes to Order: only OrderDate on the partition func
CREATE CLUSTERED INDEX [Order_OrderDate] ON [Order]([OrderDate] ASC) ON [order_ps] ([OrderDate]);
CREATE NONCLUSTERED INDEX [FK_Order_Customer] ON [Order](CustomerId, OrderDate) ON [order_ps] ([OrderDate]) -- seems to work the same with or without the partition reference.
go
-- Add some data before and after the partition break
insert Customer values ('bob')
insert [Order] values('12-31-2018', SCOPE_IDENTITY())
insert Customer values ('hank')
insert [Order] values('1-6-2019', SCOPE_IDENTITY())
---------------------------- test
-- verify a row per partition:
SELECT $PARTITION.order_pf(OrderDate) as Partition_Number, COUNT(*) as Row_Count
FROM [Order]
GROUP BY $PARTITION.order_pf(OrderDate)
-- Simple queries with actual execution plan turned on. The queries are logically equivalent.
select COUNT(1) from [Order] where OrderDate > '1-5-2019' -- Index seek Order_OrderDate; actual partition count 1
select COUNT(1) from [Order] where OrderDate > dbo.PartitionQueryDate() -- Index seek Order_OrderDate; actual partition count 2
-- Cleanup
drop table if exists [Order]
drop table if exists Customer
drop partition scheme order_ps
drop partition function order_pf
drop function if exists PartitionDate
drop function if exists PartitionQueryDate
一种解决方法是先将函数结果分配给一个变量。
declare @pqd smalldatetime = dbo.PartitionQueryDate();
select COUNT(1) from [Order] where OrderDate > @pqd
另一种选择是使用内联 TVF
CREATE FUNCTION dbo.PartitionQueryDateTVF ()
RETURNS TABLE
AS
RETURN
(
SELECT CAST(CAST( GETDATE() - 90 AS DATE) AS SMALLDATETIME) AS Date
)
GO
SELECT COUNT(1) from [Order] where OrderDate > (SELECT Date FROM dbo.PartitionQueryDateTVF())
这可能是通过内联标量 UDF 改进的,但我目前无法对此进行测试
使用 Azure SQL 服务器数据库。我在 90 天日期边界上分区了几个表。我们有一个存储过程来移动数据以维护正确的分区 breakpoint/range。我正在使用一个小函数为我的查询提供正确的日期断点,这样我就不必不断更新我的所有视图。
但仅仅凭借在我的查询中使用该函数,分区就被忽略了。我别无选择,只能在我的查询中随处放置硬编码值并不断修改它们吗?
这是一个重现问题的示例。
更新:根据标注的答案修改下面的PartitionDate
函数后,短时间没问题(出现分区消除)。然后,查询再次开始变得糟糕。当我 运行 通过日期函数过滤简单查询时,分区不再被消除。
------------------------------- setup
-- Create functions PartitionDate and PartitionQueryDate
create function PartitionDate() returns date as
begin
return GETDATE() - 91 -- returns 1/4/2019 today
end
go
create function PartitionQueryDate() returns date as
begin
return GETDATE() - 90 -- returns 1/5/2019
end
go
-- Create partition func and scheme using above functions
CREATE PARTITION FUNCTION order_pf (smalldatetime) AS RANGE RIGHT FOR VALUES (dbo.PartitionDate())
CREATE PARTITION SCHEME order_ps AS PARTITION order_pf ALL TO ([PRIMARY])
-- Create Order (pk, OrderDate, Fk), Customer (pk) tables. Order is partitioned
create table Customer
(
id int primary key identity(1,1),
FirstName varchar(255) not null
)
create table [Order]
(
id int identity(1,1), OrderDate smalldatetime not null,
CustomerId int not null,
CONSTRAINT [FK_Orders_Customer] FOREIGN KEY ([CustomerId]) REFERENCES Customer([id])
) on order_ps(OrderDate);
-- Add in indexes to Order: only OrderDate on the partition func
CREATE CLUSTERED INDEX [Order_OrderDate] ON [Order]([OrderDate] ASC) ON [order_ps] ([OrderDate]);
CREATE NONCLUSTERED INDEX [FK_Order_Customer] ON [Order](CustomerId, OrderDate) ON [order_ps] ([OrderDate]) -- seems to work the same with or without the partition reference.
go
-- Add some data before and after the partition break
insert Customer values ('bob')
insert [Order] values('12-31-2018', SCOPE_IDENTITY())
insert Customer values ('hank')
insert [Order] values('1-6-2019', SCOPE_IDENTITY())
---------------------------- test
-- verify a row per partition:
SELECT $PARTITION.order_pf(OrderDate) as Partition_Number, COUNT(*) as Row_Count
FROM [Order]
GROUP BY $PARTITION.order_pf(OrderDate)
-- Simple queries with actual execution plan turned on. The queries are logically equivalent.
select COUNT(1) from [Order] where OrderDate > '1-5-2019' -- Index seek Order_OrderDate; actual partition count 1
select COUNT(1) from [Order] where OrderDate > dbo.PartitionQueryDate() -- Index seek Order_OrderDate; actual partition count 2
-- Cleanup
drop table if exists [Order]
drop table if exists Customer
drop partition scheme order_ps
drop partition function order_pf
drop function if exists PartitionDate
drop function if exists PartitionQueryDate
一种解决方法是先将函数结果分配给一个变量。
declare @pqd smalldatetime = dbo.PartitionQueryDate();
select COUNT(1) from [Order] where OrderDate > @pqd
另一种选择是使用内联 TVF
CREATE FUNCTION dbo.PartitionQueryDateTVF ()
RETURNS TABLE
AS
RETURN
(
SELECT CAST(CAST( GETDATE() - 90 AS DATE) AS SMALLDATETIME) AS Date
)
GO
SELECT COUNT(1) from [Order] where OrderDate > (SELECT Date FROM dbo.PartitionQueryDateTVF())
这可能是通过内联标量 UDF 改进的,但我目前无法对此进行测试