SQL 全文搜索结果优先级
SQL Full Text Search result priority
我习惯使用 Lucene 进行全文搜索,效果很好,但我的主数据库是 SQL。我不喜欢数据库有两种机制的想法,因此决定使用 SQL 全文搜索。事情进展顺利,但仍有一些事情我还没有弄清楚。
假设我有下面的 table:
所有字段都已编入索引以进行全文搜索。
现在我想用文本 "Isaac" 对这个 table 进行全文搜索。我希望第 5 行位于顶部,其余结果位于该行下方。
我可以设置一些优先级到一个字段,以便在该字段上找到的任何内容都将保留在结果的顶部吗?基本上我想按领域优先考虑。在我的真实 table 中,我有 6 个字段。
据我所知MSSQL不支持字符串相似度比较..你必须自己写一套函数
-- get percentage diff
CREATE FUNCTION [dbo].[GetPercentageOfTwoStringMatching]
(
@string1 NVARCHAR(100)
,@string2 NVARCHAR(100)
)
RETURNS INT
AS
BEGIN
DECLARE @levenShteinNumber INT
DECLARE @string1Length INT = LEN(@string1)
, @string2Length INT = LEN(@string2)
DECLARE @maxLengthNumber INT = CASE WHEN @string1Length > @string2Length THEN @string1Length ELSE @string2Length END
SELECT @levenShteinNumber = [dbo].[LEVENSHTEIN] ( @string1 ,@string2)
DECLARE @percentageOfBadCharacters INT = @levenShteinNumber * 100 / @maxLengthNumber
DECLARE @percentageOfGoodCharacters INT = 100 - @percentageOfBadCharacters
-- Return the result of the function
RETURN @percentageOfGoodCharacters
END
-- get diff of strings
CREATE FUNCTION [dbo].[LEVENSHTEIN](@left VARCHAR(100),
@right VARCHAR(100))
returns INT
AS
BEGIN
DECLARE @difference INT,
@lenRight INT,
@lenLeft INT,
@leftIndex INT,
@rightIndex INT,
@left_char CHAR(1),
@right_char CHAR(1),
@compareLength INT
SET @lenLeft = LEN(@left)
SET @lenRight = LEN(@right)
SET @difference = 0
IF @lenLeft = 0
BEGIN
SET @difference = @lenRight
GOTO done
END
IF @lenRight = 0
BEGIN
SET @difference = @lenLeft
GOTO done
END
GOTO comparison
COMPARISON:
IF ( @lenLeft >= @lenRight )
SET @compareLength = @lenLeft
ELSE
SET @compareLength = @lenRight
SET @rightIndex = 1
SET @leftIndex = 1
WHILE @leftIndex <= @compareLength
BEGIN
SET @left_char = substring(@left, @leftIndex, 1)
SET @right_char = substring(@right, @rightIndex, 1)
IF @left_char <> @right_char
BEGIN -- Would an insertion make them re-align?
IF( @left_char = substring(@right, @rightIndex + 1, 1) )
SET @rightIndex = @rightIndex + 1
-- Would an deletion make them re-align?
ELSE IF( substring(@left, @leftIndex + 1, 1) = @right_char )
SET @leftIndex = @leftIndex + 1
SET @difference = @difference + 1
END
SET @leftIndex = @leftIndex + 1
SET @rightIndex = @rightIndex + 1
END
GOTO done
DONE:
RETURN @difference
END
然后您将在
之前将其添加到您的订单中
SELECT *
FROM [dbo].[some_table]
ORDER BY [dbo].[GetPercentageOfTwoStringMatching](col1 ,col2) DESC
根据您的 DTB 进行调整,但这应该适合您
或者您可以设置简单的 CASE WHEN 条件来创建订单列
SELECT *
FROM [dbo].[table]
ORDER BY
CASE
WHEN str = 'search_string' THEN 1
WHEN str LIKE '%search_string%' THEN 2
ELSE 3
END
我习惯使用 Lucene 进行全文搜索,效果很好,但我的主数据库是 SQL。我不喜欢数据库有两种机制的想法,因此决定使用 SQL 全文搜索。事情进展顺利,但仍有一些事情我还没有弄清楚。 假设我有下面的 table:
所有字段都已编入索引以进行全文搜索。
现在我想用文本 "Isaac" 对这个 table 进行全文搜索。我希望第 5 行位于顶部,其余结果位于该行下方。 我可以设置一些优先级到一个字段,以便在该字段上找到的任何内容都将保留在结果的顶部吗?基本上我想按领域优先考虑。在我的真实 table 中,我有 6 个字段。
据我所知MSSQL不支持字符串相似度比较..你必须自己写一套函数
-- get percentage diff
CREATE FUNCTION [dbo].[GetPercentageOfTwoStringMatching]
(
@string1 NVARCHAR(100)
,@string2 NVARCHAR(100)
)
RETURNS INT
AS
BEGIN
DECLARE @levenShteinNumber INT
DECLARE @string1Length INT = LEN(@string1)
, @string2Length INT = LEN(@string2)
DECLARE @maxLengthNumber INT = CASE WHEN @string1Length > @string2Length THEN @string1Length ELSE @string2Length END
SELECT @levenShteinNumber = [dbo].[LEVENSHTEIN] ( @string1 ,@string2)
DECLARE @percentageOfBadCharacters INT = @levenShteinNumber * 100 / @maxLengthNumber
DECLARE @percentageOfGoodCharacters INT = 100 - @percentageOfBadCharacters
-- Return the result of the function
RETURN @percentageOfGoodCharacters
END
-- get diff of strings
CREATE FUNCTION [dbo].[LEVENSHTEIN](@left VARCHAR(100),
@right VARCHAR(100))
returns INT
AS
BEGIN
DECLARE @difference INT,
@lenRight INT,
@lenLeft INT,
@leftIndex INT,
@rightIndex INT,
@left_char CHAR(1),
@right_char CHAR(1),
@compareLength INT
SET @lenLeft = LEN(@left)
SET @lenRight = LEN(@right)
SET @difference = 0
IF @lenLeft = 0
BEGIN
SET @difference = @lenRight
GOTO done
END
IF @lenRight = 0
BEGIN
SET @difference = @lenLeft
GOTO done
END
GOTO comparison
COMPARISON:
IF ( @lenLeft >= @lenRight )
SET @compareLength = @lenLeft
ELSE
SET @compareLength = @lenRight
SET @rightIndex = 1
SET @leftIndex = 1
WHILE @leftIndex <= @compareLength
BEGIN
SET @left_char = substring(@left, @leftIndex, 1)
SET @right_char = substring(@right, @rightIndex, 1)
IF @left_char <> @right_char
BEGIN -- Would an insertion make them re-align?
IF( @left_char = substring(@right, @rightIndex + 1, 1) )
SET @rightIndex = @rightIndex + 1
-- Would an deletion make them re-align?
ELSE IF( substring(@left, @leftIndex + 1, 1) = @right_char )
SET @leftIndex = @leftIndex + 1
SET @difference = @difference + 1
END
SET @leftIndex = @leftIndex + 1
SET @rightIndex = @rightIndex + 1
END
GOTO done
DONE:
RETURN @difference
END
然后您将在
之前将其添加到您的订单中SELECT *
FROM [dbo].[some_table]
ORDER BY [dbo].[GetPercentageOfTwoStringMatching](col1 ,col2) DESC
根据您的 DTB 进行调整,但这应该适合您
或者您可以设置简单的 CASE WHEN 条件来创建订单列
SELECT *
FROM [dbo].[table]
ORDER BY
CASE
WHEN str = 'search_string' THEN 1
WHEN str LIKE '%search_string%' THEN 2
ELSE 3
END