在 100 万行 table 上为 LIKE 查询创建聚集或非聚集索引?
Create clustered or nonclustered index on 1 million rows table for LIKE query?
我有一个包含邮政编码、城市、经度、纬度、省份的数据库table。
我对此 table 有一个用途,即 return 自动完成小部件的建议代码(加上城市和省份)。
city
和 postalcode
组成了唯一的记录。我从 3 个字符开始查询。
这个查询很慢,导致自动完成体验很差。我想知道根据以上信息哪种索引最有效?我正在使用 Azure SQL 数据存储,因此我无法 运行 查询 Analyzer/Tuning Advisor。
我在 postalCode
上尝试过非聚集索引,在 postalCode
和 city
上尝试过 2 列聚集索引。两者都在查询中产生了相同的结果:
Select *
From PostalCode
Where code LIKE '%L6J 0%'
我不会更新或插入此 table。
由于邮政编码的长度相当短且已知 (8),因此这是一个很好的分块候选者。将邮政编码分解成所有组成块,并存储它们的起始位置和长度以启用索引查找。
例如,对于 'OX1 1JZ' 这样的邮政编码,存储以下所有字符串:
start len postcodePart
1 2 OX
1 3 OX1
1 4 OX11
1 5 OX11J
1 6 OX11JZ
2 2 X1
2 3 X11
2 4 X11J
2 5 X11JZ
3 2 11
3 3 11J
3 4 11JZ
4 2 1J
4 3 1JZ
5 2 JZ
这是一个示例脚本,演示了该技术以及如何使用 100 个示例邮政编码和一个触发器来分解邮政编码。
注意!!这不是生产就绪代码,只是展示该技术的示例。
USE tempdb
GO
-- https://www.postcodelist.co.uk/
--uk-postcodes.csv
IF OBJECT_ID('dbo.postCodeParts') IS NOT NULL DROP TABLE dbo.postCodeParts
IF OBJECT_ID('dbo.postCodes') IS NOT NULL DROP TABLE dbo.postCodes
GO
CREATE TABLE dbo.postCodes (
postcodeId INT IDENTITY CONSTRAINT PK_postCodes PRIMARY KEY,
postcode VARCHAR(8) NOT NULL
--... the rest of your columns
)
GO
CREATE TABLE dbo.postCodeParts (
postcodePartId INT IDENTITY CONSTRAINT PK_postCodeParts PRIMARY KEY NONCLUSTERED,
postcodeId INT NOT NULL FOREIGN KEY REFERENCES dbo.postCodes ( postcodeId ),
totalLen TINYINT NOT NULL,
xStart TINYINT NOT NULL,
xLen TINYINT NOT NULL,
postcodePart VARCHAR(8) NOT NULL INDEX cdx_postCodeParts CLUSTERED
)
GO
-- Add a trimmed copy of the postcode to the parts table, chunked up.
CREATE TRIGGER dbo.trg_postCodes
ON dbo.postcodes
FOR INSERT
AS
BEGIN
;WITH cte AS
(
SELECT *
FROM (
VALUES ( 1 ), ( 2 ), ( 3 ), ( 4 ), ( 5 ), ( 6 ), ( 7 ), ( 8 )
) x(y)
)
INSERT INTO dbo.postCodeParts ( postcodeId, totalLen, xStart, xLen, postcodePart )
SELECT
p.postcodeId,
p.xTotalLen,
c1.y AS xstart,
c2.y AS xlen,
SUBSTRING( p.postCode, c1.y, c2.y ) AS xstring
FROM (
SELECT
postcodeId,
REPLACE( postcode, ' ', '' ) postCode,
LEN( REPLACE( postcode, ' ', '' ) ) AS xTotalLen
FROM inserted
) p
CROSS JOIN cte c1
CROSS JOIN cte c2
WHERE c2.y Between 2 And p.xTotalLen
AND ( ( c2.y ) + ( c1.y - 1 ) ) <= p.xTotalLen
END
GO
INSERT INTO dbo.postcodes ( postcode )
VALUES
( 'OX1 1AA' ),( 'OX1 1AB' ),( 'OX1 1AD' ),( 'OX1 1AE' ),( 'OX1 1AF' ),( 'OX1 1AG' ),( 'OX1 1AN' ),( 'OX1 1AS' ),( 'OX1 1AW' ),( 'OX1 1AY' ),
( 'OX1 1AZ' ),( 'OX1 1BD' ),( 'OX1 1BE' ),( 'OX1 1BN' ),( 'OX1 1BP' ),( 'OX1 1BS' ),( 'OX1 1BT' ),( 'OX1 1BU' ),( 'OX1 1BX' ),( 'OX1 1BY' ),
( 'OX1 1BZ' ),( 'OX1 1DA' ),( 'OX1 1DB' ),( 'OX1 1DE' ),( 'OX1 1DF' ),( 'OX1 1DG' ),( 'OX1 1DJ' ),( 'OX1 1DL' ),( 'OX1 1DP' ),( 'OX1 1DQ' ),
( 'OX1 1DS' ),( 'OX1 1DW' ),( 'OX1 1DZ' ),( 'OX1 1EA' ),( 'OX1 1EF' ),( 'OX1 1EJ' ),( 'OX1 1EN' ),( 'OX1 1EP' ),( 'OX1 1EQ' ),( 'OX1 1ER' ),
( 'OX1 1ES' ),( 'OX1 1ET' ),( 'OX1 1EU' ),( 'OX1 1EW' ),( 'OX1 1EX' ),( 'OX1 1GA' ),( 'OX1 1GB' ),( 'OX1 1GD' ),( 'OX1 1GE' ),( 'OX1 1GF' ),
( 'OX1 1GH' ),( 'OX1 1GJ' ),( 'OX1 1GL' ),( 'OX1 1HB' ),( 'OX1 1HD' ),( 'OX1 1HF' ),( 'OX1 1HG' ),( 'OX1 1HH' ),( 'OX1 1HN' ),( 'OX1 1HP' ),
( 'OX1 1HQ' ),( 'OX1 1HR' ),( 'OX1 1HS' ),( 'OX1 1HT' ),( 'OX1 1HU' ),( 'OX1 1HW' ),( 'OX1 1HX' ),( 'OX1 1HY' ),( 'OX1 1HZ' ),( 'OX1 1JA' ),
( 'OX1 1JB' ),( 'OX1 1JD' ),( 'OX1 1JE' ),( 'OX1 1JF' ),( 'OX1 1JG' ),( 'OX1 1JH' ),( 'OX1 1JJ' ),( 'OX1 1JL' ),( 'OX1 1JP' ),( 'OX1 1JQ' ),
( 'OX1 1JR' ),( 'OX1 1JS' ),( 'OX1 1JT' ),( 'OX1 1JU' ),( 'OX1 1JW' ),( 'OX1 1JX' ),( 'OX1 1JY' ),( 'OX1 1JZ' ),( 'OX1 1LB' ),( 'OX1 1LD' ),
( 'OX1 1LE' ),( 'OX1 1LF' ),( 'OX1 1LG' ),( 'OX1 1LJ' ),( 'OX1 1LL' ),( 'OX1 1LQ' ),( 'OX1 1LT' ),( 'OX1 1LU' ),( 'OX1 1LY' ),( 'OX1 1ND' )
GO
SELECT * FROM dbo.postCodes
SELECT * FROM dbo.postCodeParts ORDER BY xStart, xLen
SELECT *
FROM dbo.postCodes pc
INNER JOIN dbo.postCodeParts pcp ON pc.postcodeId = pcp.postcodeId
WHERE postcodePart = '1J'
ORDER BY xStart, xLen
GO
IF OBJECT_ID('dbo.usp_searchPostCodes') IS NOT NULL DROP PROC dbo.usp_searchPostCodes
GO
CREATE PROC dbo.usp_searchPostCodes
@searchString VARCHAR(8)
AS
SET NOCOUNT ON
--!!TODO add error handling
--!!TODO does not deal with middle wildcards or _ wildcard
DECLARE @leadingWildCard BIT
DECLARE @cleanSearchString VARCHAR(8)
SELECT @leadingWildCard = CASE WHEN LEFT( @searchString, 1 ) = '%' THEN 1 ELSE 0 END
SELECT @cleanSearchString = REPLACE( REPLACE( @searchString, ' ', '' ), '%', '' )
-- Debugging
--PRINT @leadingWildCard
--PRINT @cleanSearchString
IF @leadingWildCard = 0
-- No leading wildcard, start at position 1
SELECT pc.postcode
FROM dbo.postCodes pc
INNER JOIN dbo.postCodeParts pcp ON pc.postcodeId = pcp.postcodeId
WHERE pcp.postcodePart = @cleanSearchString
AND pcp.xstart = 1
ORDER BY xStart, xLen
ELSE
-- Leading wildcard, return all positions
SELECT pc.postcode
FROM dbo.postCodes pc
INNER JOIN dbo.postCodeParts pcp ON pc.postcodeId = pcp.postcodeId
WHERE pcp.postcodePart = @cleanSearchString
ORDER BY xStart, xLen
RETURN
GO
EXEC dbo.usp_searchPostCodes 'OX1 1J%'
EXEC dbo.usp_searchPostCodes '%X1 1J%'
GO
SELECT xStart, xLen, postcodePart
FROM dbo.postCodes pc
INNER JOIN dbo.postCodeParts pcp ON pc.postcodeId = pcp.postcodeId
WHERE pc.postcode= 'OX1 1JZ'
ORDER BY xStart, xLen
我有一个包含邮政编码、城市、经度、纬度、省份的数据库table。
我对此 table 有一个用途,即 return 自动完成小部件的建议代码(加上城市和省份)。
city
和 postalcode
组成了唯一的记录。我从 3 个字符开始查询。
这个查询很慢,导致自动完成体验很差。我想知道根据以上信息哪种索引最有效?我正在使用 Azure SQL 数据存储,因此我无法 运行 查询 Analyzer/Tuning Advisor。
我在 postalCode
上尝试过非聚集索引,在 postalCode
和 city
上尝试过 2 列聚集索引。两者都在查询中产生了相同的结果:
Select *
From PostalCode
Where code LIKE '%L6J 0%'
我不会更新或插入此 table。
由于邮政编码的长度相当短且已知 (8),因此这是一个很好的分块候选者。将邮政编码分解成所有组成块,并存储它们的起始位置和长度以启用索引查找。
例如,对于 'OX1 1JZ' 这样的邮政编码,存储以下所有字符串:
start len postcodePart
1 2 OX
1 3 OX1
1 4 OX11
1 5 OX11J
1 6 OX11JZ
2 2 X1
2 3 X11
2 4 X11J
2 5 X11JZ
3 2 11
3 3 11J
3 4 11JZ
4 2 1J
4 3 1JZ
5 2 JZ
这是一个示例脚本,演示了该技术以及如何使用 100 个示例邮政编码和一个触发器来分解邮政编码。
注意!!这不是生产就绪代码,只是展示该技术的示例。
USE tempdb
GO
-- https://www.postcodelist.co.uk/
--uk-postcodes.csv
IF OBJECT_ID('dbo.postCodeParts') IS NOT NULL DROP TABLE dbo.postCodeParts
IF OBJECT_ID('dbo.postCodes') IS NOT NULL DROP TABLE dbo.postCodes
GO
CREATE TABLE dbo.postCodes (
postcodeId INT IDENTITY CONSTRAINT PK_postCodes PRIMARY KEY,
postcode VARCHAR(8) NOT NULL
--... the rest of your columns
)
GO
CREATE TABLE dbo.postCodeParts (
postcodePartId INT IDENTITY CONSTRAINT PK_postCodeParts PRIMARY KEY NONCLUSTERED,
postcodeId INT NOT NULL FOREIGN KEY REFERENCES dbo.postCodes ( postcodeId ),
totalLen TINYINT NOT NULL,
xStart TINYINT NOT NULL,
xLen TINYINT NOT NULL,
postcodePart VARCHAR(8) NOT NULL INDEX cdx_postCodeParts CLUSTERED
)
GO
-- Add a trimmed copy of the postcode to the parts table, chunked up.
CREATE TRIGGER dbo.trg_postCodes
ON dbo.postcodes
FOR INSERT
AS
BEGIN
;WITH cte AS
(
SELECT *
FROM (
VALUES ( 1 ), ( 2 ), ( 3 ), ( 4 ), ( 5 ), ( 6 ), ( 7 ), ( 8 )
) x(y)
)
INSERT INTO dbo.postCodeParts ( postcodeId, totalLen, xStart, xLen, postcodePart )
SELECT
p.postcodeId,
p.xTotalLen,
c1.y AS xstart,
c2.y AS xlen,
SUBSTRING( p.postCode, c1.y, c2.y ) AS xstring
FROM (
SELECT
postcodeId,
REPLACE( postcode, ' ', '' ) postCode,
LEN( REPLACE( postcode, ' ', '' ) ) AS xTotalLen
FROM inserted
) p
CROSS JOIN cte c1
CROSS JOIN cte c2
WHERE c2.y Between 2 And p.xTotalLen
AND ( ( c2.y ) + ( c1.y - 1 ) ) <= p.xTotalLen
END
GO
INSERT INTO dbo.postcodes ( postcode )
VALUES
( 'OX1 1AA' ),( 'OX1 1AB' ),( 'OX1 1AD' ),( 'OX1 1AE' ),( 'OX1 1AF' ),( 'OX1 1AG' ),( 'OX1 1AN' ),( 'OX1 1AS' ),( 'OX1 1AW' ),( 'OX1 1AY' ),
( 'OX1 1AZ' ),( 'OX1 1BD' ),( 'OX1 1BE' ),( 'OX1 1BN' ),( 'OX1 1BP' ),( 'OX1 1BS' ),( 'OX1 1BT' ),( 'OX1 1BU' ),( 'OX1 1BX' ),( 'OX1 1BY' ),
( 'OX1 1BZ' ),( 'OX1 1DA' ),( 'OX1 1DB' ),( 'OX1 1DE' ),( 'OX1 1DF' ),( 'OX1 1DG' ),( 'OX1 1DJ' ),( 'OX1 1DL' ),( 'OX1 1DP' ),( 'OX1 1DQ' ),
( 'OX1 1DS' ),( 'OX1 1DW' ),( 'OX1 1DZ' ),( 'OX1 1EA' ),( 'OX1 1EF' ),( 'OX1 1EJ' ),( 'OX1 1EN' ),( 'OX1 1EP' ),( 'OX1 1EQ' ),( 'OX1 1ER' ),
( 'OX1 1ES' ),( 'OX1 1ET' ),( 'OX1 1EU' ),( 'OX1 1EW' ),( 'OX1 1EX' ),( 'OX1 1GA' ),( 'OX1 1GB' ),( 'OX1 1GD' ),( 'OX1 1GE' ),( 'OX1 1GF' ),
( 'OX1 1GH' ),( 'OX1 1GJ' ),( 'OX1 1GL' ),( 'OX1 1HB' ),( 'OX1 1HD' ),( 'OX1 1HF' ),( 'OX1 1HG' ),( 'OX1 1HH' ),( 'OX1 1HN' ),( 'OX1 1HP' ),
( 'OX1 1HQ' ),( 'OX1 1HR' ),( 'OX1 1HS' ),( 'OX1 1HT' ),( 'OX1 1HU' ),( 'OX1 1HW' ),( 'OX1 1HX' ),( 'OX1 1HY' ),( 'OX1 1HZ' ),( 'OX1 1JA' ),
( 'OX1 1JB' ),( 'OX1 1JD' ),( 'OX1 1JE' ),( 'OX1 1JF' ),( 'OX1 1JG' ),( 'OX1 1JH' ),( 'OX1 1JJ' ),( 'OX1 1JL' ),( 'OX1 1JP' ),( 'OX1 1JQ' ),
( 'OX1 1JR' ),( 'OX1 1JS' ),( 'OX1 1JT' ),( 'OX1 1JU' ),( 'OX1 1JW' ),( 'OX1 1JX' ),( 'OX1 1JY' ),( 'OX1 1JZ' ),( 'OX1 1LB' ),( 'OX1 1LD' ),
( 'OX1 1LE' ),( 'OX1 1LF' ),( 'OX1 1LG' ),( 'OX1 1LJ' ),( 'OX1 1LL' ),( 'OX1 1LQ' ),( 'OX1 1LT' ),( 'OX1 1LU' ),( 'OX1 1LY' ),( 'OX1 1ND' )
GO
SELECT * FROM dbo.postCodes
SELECT * FROM dbo.postCodeParts ORDER BY xStart, xLen
SELECT *
FROM dbo.postCodes pc
INNER JOIN dbo.postCodeParts pcp ON pc.postcodeId = pcp.postcodeId
WHERE postcodePart = '1J'
ORDER BY xStart, xLen
GO
IF OBJECT_ID('dbo.usp_searchPostCodes') IS NOT NULL DROP PROC dbo.usp_searchPostCodes
GO
CREATE PROC dbo.usp_searchPostCodes
@searchString VARCHAR(8)
AS
SET NOCOUNT ON
--!!TODO add error handling
--!!TODO does not deal with middle wildcards or _ wildcard
DECLARE @leadingWildCard BIT
DECLARE @cleanSearchString VARCHAR(8)
SELECT @leadingWildCard = CASE WHEN LEFT( @searchString, 1 ) = '%' THEN 1 ELSE 0 END
SELECT @cleanSearchString = REPLACE( REPLACE( @searchString, ' ', '' ), '%', '' )
-- Debugging
--PRINT @leadingWildCard
--PRINT @cleanSearchString
IF @leadingWildCard = 0
-- No leading wildcard, start at position 1
SELECT pc.postcode
FROM dbo.postCodes pc
INNER JOIN dbo.postCodeParts pcp ON pc.postcodeId = pcp.postcodeId
WHERE pcp.postcodePart = @cleanSearchString
AND pcp.xstart = 1
ORDER BY xStart, xLen
ELSE
-- Leading wildcard, return all positions
SELECT pc.postcode
FROM dbo.postCodes pc
INNER JOIN dbo.postCodeParts pcp ON pc.postcodeId = pcp.postcodeId
WHERE pcp.postcodePart = @cleanSearchString
ORDER BY xStart, xLen
RETURN
GO
EXEC dbo.usp_searchPostCodes 'OX1 1J%'
EXEC dbo.usp_searchPostCodes '%X1 1J%'
GO
SELECT xStart, xLen, postcodePart
FROM dbo.postCodes pc
INNER JOIN dbo.postCodeParts pcp ON pc.postcodeId = pcp.postcodeId
WHERE pc.postcode= 'OX1 1JZ'
ORDER BY xStart, xLen