在 Azure SQL 数据仓库上自动更新列级统计信息
Automation of UPDATE of column-level statistics on Azure SQL Data Warehouse
我计划在我的 Azure SQL 数据仓库数据库上自动刷新(即更新)列级统计信息。我计划在特定站点 table 中记录操作,然后使用以下方法动态生成 SQL 以刷新统计信息:
- DATE 列每天刷新,
- ID/code primary/foreign-key 列每两周刷新一次,
- Indicator/boolean 列每月刷新一次,
- QTY/AMT(事实)列每季度刷新一次。
我查看了 https://msdn.microsoft.com/library/ms190330.aspx 中的 STATS_DATE 函数,但该函数似乎不支持列级统计所需的详细信息。例如,我的 table 之一的输出具有三列,其中收集了统计信息,对于 STATS_DATE:
显示 NULL
SELECT
s.object_id,
s.name,
s.stats_id,
s.user_created,
STATS_DATE(object_id, stats_id) AS statistics_date
FROM sys.stats s
where object_id = 107141;
Returns
object_id name stats_id user_created statistics_date
107,141 MySchema_MyTable_Col1 2 1 [NULL]
107,141 MySchema_MyTable_Col2 3 1 [NULL]
107,141 MySchema_MyTable_Col3 4 1 [NULL]
我是否忽略或误解了此功能,我是否应该能够使用 STATS_DATE 来管理我的列的统计信息?
下面是更完整的演示:
--Create a columnar demonstration table
create table My_Schema.steve_test_table_columnar (c1_c integer, c2_c smallint, c3_c date, c4_c decimal(18,2) ) ;
--Create a heap demonstration table
create table My_Schema.steve_test_table_heap (c1_h integer, c2_h smallint, c3_h date, c4_h decimal(18,2) ) with (HEAP) ;
-CREATE STATISTICS statements:
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_COLUMNAR_C1_C ON My_Schema.STEVE_TEST_TABLE_COLUMNAR ( C1_C ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_COLUMNAR_C2_C ON My_Schema.STEVE_TEST_TABLE_COLUMNAR ( C2_C ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_COLUMNAR_C3_C ON My_Schema.STEVE_TEST_TABLE_COLUMNAR ( C3_C ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_COLUMNAR_C4_C ON My_Schema.STEVE_TEST_TABLE_COLUMNAR ( C4_C ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_HEAP_C1_H ON My_Schema.STEVE_TEST_TABLE_HEAP ( C1_H ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_HEAP_C2_H ON My_Schema.STEVE_TEST_TABLE_HEAP ( C2_H ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_HEAP_C3_H ON My_Schema.STEVE_TEST_TABLE_HEAP ( C3_H ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_HEAP_C4_H ON My_Schema.STEVE_TEST_TABLE_HEAP ( C4_H ) ;
--UPDATE (aka "REFRESH") STATISTICS statements:
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_COLUMNAR ( My_Schema_STEVE_TEST_TABLE_COLUMNAR_C3_C ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_HEAP ( My_Schema_STEVE_TEST_TABLE_HEAP_C3_H ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_COLUMNAR ( My_Schema_STEVE_TEST_TABLE_COLUMNAR_C2_C ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_HEAP ( My_Schema_STEVE_TEST_TABLE_HEAP_C2_H ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_COLUMNAR ( My_Schema_STEVE_TEST_TABLE_COLUMNAR_C1_C ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_COLUMNAR ( My_Schema_STEVE_TEST_TABLE_COLUMNAR_C1_C ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_HEAP ( My_Schema_STEVE_TEST_TABLE_HEAP_C1_H ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_COLUMNAR ( My_Schema_STEVE_TEST_TABLE_COLUMNAR_C4_C ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_HEAP ( My_Schema_STEVE_TEST_TABLE_HEAP_C4_H ) ;
--Validation Steps
select s.[schema_id] , s.[name] from sys.[schemas] s where s.[name] = 'My_Schema';
--Results:
schema_id name
24 My_Schema
--Get Table Object ID
select t.[object_id] , t.[name] from sys.[tables] t
inner join sys.[schemas] s
on t.[schema_id] = s.[schema_id]
where s.[name] = 'My_Schema' and t.[name] in ('STEVE_TEST_TABLE_HEAP' , 'STEVE_TEST_TABLE_COLUMNAR');
--Results:
object_id name
516,196,889 steve_test_table_columnar
532,196,946 steve_test_table_heap
--Get Columnd IDs
select t.[object_id] , c.[column_id], t.[name] , c.[name] as Column_Name
from
sys.[tables] t
inner join
sys.[schemas] s
on
t.[schema_id] = s.[schema_id]
INNER JOIN
sys.[columns] c
ON
t.[object_id] = c.[object_id]
where
s.[name] = 'My_Schema'
and t.[name] in ('STEVE_TEST_TABLE_HEAP' , 'STEVE_TEST_TABLE_COLUMNAR')
--Results:
object_id column_id name Column_Name
516,196,889 1 steve_test_table_columnar c1_c
516,196,889 2 steve_test_table_columnar c2_c
516,196,889 3 steve_test_table_columnar c3_c
516,196,889 4 steve_test_table_columnar c4_c
532,196,946 1 steve_test_table_heap c1_h
532,196,946 2 steve_test_table_heap c2_h
532,196,946 3 steve_test_table_heap c3_h
532,196,946 4 steve_test_table_heap c4_h
--Final review of statistics metadata
select t.[object_id] , c.[column_id], t.[name] as table_name
, c.[name] as Column_Name ,st.stats_id , st.name as Stats_Name
,stc.stats_column_id
,STATS_DATE(st.object_id, st.stats_id) AS statistics_date
from
sys.[tables] t
inner join
sys.[schemas] s
on
t.[schema_id] = s.[schema_id]
INNER JOIN
sys.[columns] c
ON
t.[object_id] = c.[object_id]
INNER JOIN
sys.stats st
ON
st.[object_id] = t.[object_id]
and user_created = 1
INNER JOIN
sys.[stats_columns] stc
on
st.stats_id = stc.stats_id
and st.[object_id] = stc.[object_id]
and c.[column_id] = stc.[column_id]
where
s.[name] = 'My_Schema'
and t.[name] in ('STEVE_TEST_TABLE_HEAP' , 'STEVE_TEST_TABLE_COLUMNAR')
;
object_id column_id table_name Column_Name stats_id Stats_Name stats_column_id statistics_date
516,196,889 1 steve_test_table_columnar c1_c 2 My_Schema_STEVE_TEST_TABLE_COLUMNAR_C1_C 1 [NULL]
516,196,889 2 steve_test_table_columnar c2_c 3 My_Schema_STEVE_TEST_TABLE_COLUMNAR_C2_C 1 [NULL]
516,196,889 3 steve_test_table_columnar c3_c 4 My_Schema_STEVE_TEST_TABLE_COLUMNAR_C3_C 1 [NULL]
516,196,889 4 steve_test_table_columnar c4_c 5 My_Schema_STEVE_TEST_TABLE_COLUMNAR_C4_C 1 [NULL]
532,196,946 1 steve_test_table_heap c1_h 2 My_Schema_STEVE_TEST_TABLE_HEAP_C1_H 1 [NULL]
532,196,946 2 steve_test_table_heap c2_h 3 My_Schema_STEVE_TEST_TABLE_HEAP_C2_H 1 [NULL]
532,196,946 3 steve_test_table_heap c3_h 4 My_Schema_STEVE_TEST_TABLE_HEAP_C3_H 1 [NULL]
532,196,946 4 steve_test_table_heap c4_h 5 My_Schema_STEVE_TEST_TABLE_HEAP_C4_H 1 [NULL]
我确认如果加载了表,STATS_DATE(id,id) 的 return 值不为空。我的实验只涉及创建表格。
我计划在我的 Azure SQL 数据仓库数据库上自动刷新(即更新)列级统计信息。我计划在特定站点 table 中记录操作,然后使用以下方法动态生成 SQL 以刷新统计信息:
- DATE 列每天刷新,
- ID/code primary/foreign-key 列每两周刷新一次,
- Indicator/boolean 列每月刷新一次,
- QTY/AMT(事实)列每季度刷新一次。
我查看了 https://msdn.microsoft.com/library/ms190330.aspx 中的 STATS_DATE 函数,但该函数似乎不支持列级统计所需的详细信息。例如,我的 table 之一的输出具有三列,其中收集了统计信息,对于 STATS_DATE:
显示 NULLSELECT
s.object_id,
s.name,
s.stats_id,
s.user_created,
STATS_DATE(object_id, stats_id) AS statistics_date
FROM sys.stats s
where object_id = 107141;
Returns
object_id name stats_id user_created statistics_date
107,141 MySchema_MyTable_Col1 2 1 [NULL]
107,141 MySchema_MyTable_Col2 3 1 [NULL]
107,141 MySchema_MyTable_Col3 4 1 [NULL]
我是否忽略或误解了此功能,我是否应该能够使用 STATS_DATE 来管理我的列的统计信息?
下面是更完整的演示:
--Create a columnar demonstration table
create table My_Schema.steve_test_table_columnar (c1_c integer, c2_c smallint, c3_c date, c4_c decimal(18,2) ) ;
--Create a heap demonstration table
create table My_Schema.steve_test_table_heap (c1_h integer, c2_h smallint, c3_h date, c4_h decimal(18,2) ) with (HEAP) ;
-CREATE STATISTICS statements:
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_COLUMNAR_C1_C ON My_Schema.STEVE_TEST_TABLE_COLUMNAR ( C1_C ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_COLUMNAR_C2_C ON My_Schema.STEVE_TEST_TABLE_COLUMNAR ( C2_C ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_COLUMNAR_C3_C ON My_Schema.STEVE_TEST_TABLE_COLUMNAR ( C3_C ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_COLUMNAR_C4_C ON My_Schema.STEVE_TEST_TABLE_COLUMNAR ( C4_C ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_HEAP_C1_H ON My_Schema.STEVE_TEST_TABLE_HEAP ( C1_H ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_HEAP_C2_H ON My_Schema.STEVE_TEST_TABLE_HEAP ( C2_H ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_HEAP_C3_H ON My_Schema.STEVE_TEST_TABLE_HEAP ( C3_H ) ;
CREATE STATISTICS My_Schema_STEVE_TEST_TABLE_HEAP_C4_H ON My_Schema.STEVE_TEST_TABLE_HEAP ( C4_H ) ;
--UPDATE (aka "REFRESH") STATISTICS statements:
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_COLUMNAR ( My_Schema_STEVE_TEST_TABLE_COLUMNAR_C3_C ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_HEAP ( My_Schema_STEVE_TEST_TABLE_HEAP_C3_H ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_COLUMNAR ( My_Schema_STEVE_TEST_TABLE_COLUMNAR_C2_C ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_HEAP ( My_Schema_STEVE_TEST_TABLE_HEAP_C2_H ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_COLUMNAR ( My_Schema_STEVE_TEST_TABLE_COLUMNAR_C1_C ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_COLUMNAR ( My_Schema_STEVE_TEST_TABLE_COLUMNAR_C1_C ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_HEAP ( My_Schema_STEVE_TEST_TABLE_HEAP_C1_H ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_COLUMNAR ( My_Schema_STEVE_TEST_TABLE_COLUMNAR_C4_C ) ;
UPDATE STATISTICS My_Schema.STEVE_TEST_TABLE_HEAP ( My_Schema_STEVE_TEST_TABLE_HEAP_C4_H ) ;
--Validation Steps
select s.[schema_id] , s.[name] from sys.[schemas] s where s.[name] = 'My_Schema';
--Results:
schema_id name
24 My_Schema
--Get Table Object ID
select t.[object_id] , t.[name] from sys.[tables] t
inner join sys.[schemas] s
on t.[schema_id] = s.[schema_id]
where s.[name] = 'My_Schema' and t.[name] in ('STEVE_TEST_TABLE_HEAP' , 'STEVE_TEST_TABLE_COLUMNAR');
--Results:
object_id name
516,196,889 steve_test_table_columnar
532,196,946 steve_test_table_heap
--Get Columnd IDs
select t.[object_id] , c.[column_id], t.[name] , c.[name] as Column_Name
from
sys.[tables] t
inner join
sys.[schemas] s
on
t.[schema_id] = s.[schema_id]
INNER JOIN
sys.[columns] c
ON
t.[object_id] = c.[object_id]
where
s.[name] = 'My_Schema'
and t.[name] in ('STEVE_TEST_TABLE_HEAP' , 'STEVE_TEST_TABLE_COLUMNAR')
--Results:
object_id column_id name Column_Name
516,196,889 1 steve_test_table_columnar c1_c
516,196,889 2 steve_test_table_columnar c2_c
516,196,889 3 steve_test_table_columnar c3_c
516,196,889 4 steve_test_table_columnar c4_c
532,196,946 1 steve_test_table_heap c1_h
532,196,946 2 steve_test_table_heap c2_h
532,196,946 3 steve_test_table_heap c3_h
532,196,946 4 steve_test_table_heap c4_h
--Final review of statistics metadata
select t.[object_id] , c.[column_id], t.[name] as table_name
, c.[name] as Column_Name ,st.stats_id , st.name as Stats_Name
,stc.stats_column_id
,STATS_DATE(st.object_id, st.stats_id) AS statistics_date
from
sys.[tables] t
inner join
sys.[schemas] s
on
t.[schema_id] = s.[schema_id]
INNER JOIN
sys.[columns] c
ON
t.[object_id] = c.[object_id]
INNER JOIN
sys.stats st
ON
st.[object_id] = t.[object_id]
and user_created = 1
INNER JOIN
sys.[stats_columns] stc
on
st.stats_id = stc.stats_id
and st.[object_id] = stc.[object_id]
and c.[column_id] = stc.[column_id]
where
s.[name] = 'My_Schema'
and t.[name] in ('STEVE_TEST_TABLE_HEAP' , 'STEVE_TEST_TABLE_COLUMNAR')
;
object_id column_id table_name Column_Name stats_id Stats_Name stats_column_id statistics_date
516,196,889 1 steve_test_table_columnar c1_c 2 My_Schema_STEVE_TEST_TABLE_COLUMNAR_C1_C 1 [NULL]
516,196,889 2 steve_test_table_columnar c2_c 3 My_Schema_STEVE_TEST_TABLE_COLUMNAR_C2_C 1 [NULL]
516,196,889 3 steve_test_table_columnar c3_c 4 My_Schema_STEVE_TEST_TABLE_COLUMNAR_C3_C 1 [NULL]
516,196,889 4 steve_test_table_columnar c4_c 5 My_Schema_STEVE_TEST_TABLE_COLUMNAR_C4_C 1 [NULL]
532,196,946 1 steve_test_table_heap c1_h 2 My_Schema_STEVE_TEST_TABLE_HEAP_C1_H 1 [NULL]
532,196,946 2 steve_test_table_heap c2_h 3 My_Schema_STEVE_TEST_TABLE_HEAP_C2_H 1 [NULL]
532,196,946 3 steve_test_table_heap c3_h 4 My_Schema_STEVE_TEST_TABLE_HEAP_C3_H 1 [NULL]
532,196,946 4 steve_test_table_heap c4_h 5 My_Schema_STEVE_TEST_TABLE_HEAP_C4_H 1 [NULL]
我确认如果加载了表,STATS_DATE(id,id) 的 return 值不为空。我的实验只涉及创建表格。