任意序列的字符串搜索
String search with any sequence
我有以下两个table:
Table 1:
CREATE TABLE tbl_str_match_1
(
enumber int,
ename varchar(100),
eaddress varchar(500)
);
INSERT INTO tbl_str_match_1 VALUES(1,'John Mak','Hno 12 Street Road, USA');
INSERT INTO tbl_str_match_1 VALUES(2,'Shai Lee','UK');
INSERT INTO tbl_str_match_1 VALUES(3,'Smith Watson','Street X01 UAE');
INSERT INTO tbl_str_match_1 VALUES(4,'Ray Gibbs','SA 124');
Table 2:
CREATE TABLE tbl_str_match_4
(
name varchar(100),
[address] varchar(500)
);
INSERT INTO tbl_str_match_4 VALUES('Mak John','Street Road, Hno 12, USA');
INSERT INTO tbl_str_match_4 VALUES('Shai A Lee','UK');
INSERT INTO tbl_str_match_4 VALUES('A watson Smeeth ','UAE Street X01');
INSERT INTO tbl_str_match_1 VALUES('Henry Jay','RUS OP124');
我想使用传递的号码从 table tbl_str_match_1
中搜索名称,然后使用名称作为输入进行下一次搜索,并从另一个名为 table 的名称和地址中查找名称和地址=18=].
注意:
姓名可以按任何顺序排列,例如 first mid last name 或 mid last first name 或 last first first mid name,任何概率都是可能的。
我想从第二个 table 中找到名称和地址,并增加一列,即字符串的百分比匹配。
将进行两次搜索,第一次在 table tbl_str_match_1
获取姓名,第二次在 table tbl_str_match_4
获取姓名和地址。
对于第一条记录 John Mak
它应该显示与 Mak John
.
100% 匹配
对于第二条记录,Shai Lee
应显示与 Shai A Lee
的 90% 匹配,因为 A
中间名出现。
最后一条记录 Ray Gibbs
不会显示在结果集中,因为它与其他 table 值不匹配。
--查询:
WITH CTE1 AS
(
SELECT ename FROM tbl_str_match_1 WHERE enumber = 1
)
SELECT name,[address] FROM tbl_str_match_4 WHERE name LIKE '%'+(SELECT ename from CTE1)+'%'
预期结果:
场景 1:如果我通过 enumber = 1
那么结果应该是:
Name Address Matching Percentage
------------------------------------------------------------
Mak John Street Road, Hno 12, USA 100
场景 2:如果我通过 enumber = 2
那么结果应该是:
Name Address Matching Percentage
------------------------------------------------------------
Shai A Lee UK 90
场景 3:如果我通过 enumber = 3
那么结果应该是:
Name Address Matching Percentage
------------------------------------------------------------
A watson Smeeth UAE Street X01 70
场景 4:如果我通过 enumber = 4
那么结果应该是:
没有结果,因为我们没有任何相关匹配。
Name Address Matching Percentage
------------------------------------------------------------
希望对您有所帮助。
with CTE1 as
(
Select enumber,Ltrim(SubString(ename,1,Isnull(Nullif(CHARINDEX(' ',ename),0),1000))) As Firstename,
Ltrim(SUBSTRING(ename,CharIndex(' ',ename),
CAse When (CHARINDEX(' ',ename,CHARINDEX(' ',ename)+1)-CHARINDEX(' ',ename))<=0 then 0
else CHARINDEX(' ',ename,CHARINDEX(' ',ename)+1)-CHARINDEX(' ',ename) end )) as Middleename,
Ltrim(SUBSTRING(ename,Isnull(Nullif(CHARINDEX(' ',ename,Charindex(' ',ename)+1),0),CHARINDEX(' ',ename)),
Case when Charindex(' ',ename)=0 then 0 else LEN(ename) end)) as Lastename
From tbl_str_match_1
),
CTE2 as
(
Select *,Ltrim(SubString(name,1,Isnull(Nullif(CHARINDEX(' ',name),0),1000))) As FirstName,
Ltrim(SUBSTRING(name,CharIndex(' ',name),
CAse When (CHARINDEX(' ',name,CHARINDEX(' ',name)+1)-CHARINDEX(' ',name))<=0 then 0
else CHARINDEX(' ',name,CHARINDEX(' ',name)+1)-CHARINDEX(' ',name) end )) as MiddleName,
Ltrim(SUBSTRING(name,Isnull(Nullif(CHARINDEX(' ',name,Charindex(' ',name)+1),0),CHARINDEX(' ',name)),
Case when Charindex(' ',name)=0 then 0 else LEN(name) end)) as LastName
From tbl_str_match_4
)
select CTE2.name,CTE2.address from CTE1 inner join CTE2 on CTE1.Firstename = CTE2.FirstName and CTE1.Lastename = CTE2.LastName
where CTE1.enumber = 1
希望以下内容对您有所帮助。
我首先将 tbl_1 和 tbl_4 名称中的名称标记为
之后我将 tbl_1 中的标记与 tbl_4
中的标记进行比较
一道关于匹配率的问题。
在 "Shai A Lee" 的示例中,您有 2 个匹配项("Shai","Lee"),共 3 个("Shai","A","Lee")那么匹配百分比不应该是 66.67 吗?
with split_ename_1
as (
SELECT a.enumber
,a.ename
,a.eaddress
,split.a.value('.', 'VARCHAR(100)') AS Data
FROM
(
SELECT enumber
,ename
,eaddress
,CAST ('<M>' + REPLACE(rtrim(ename), ' ', '</M><M>') + '</M>' AS XML) AS Data
FROM tbl_str_match_1
) AS A CROSS APPLY Data.nodes ('/M') AS Split(a)
)
,split_ename_4
as (SELECT a.name
,a.address
,split.a.value('.', 'VARCHAR(100)') AS Data
,COUNT(*) over(partition by a.name) as tot_cnt
FROM
(
SELECT name
,address
,CAST ('<M>' + REPLACE(rtrim(name), ' ', '</M><M>') + '</M>' AS XML) AS Data
FROM tbl_str_match_4
) AS A CROSS APPLY data.nodes ('/M') AS split(a)
)
select a.ename
,count(a.data) as tokens_1
,count(b.data) as tokens_4
,max(b.tot_cnt) as tot_tokens_4
,case when count(b.data)=0 then 0 else count(b.data)*1.00/max(b.tot_cnt)*1.00 end as matching_percentage
from split_ename_1 a
left join split_ename_4 b
on a.data=b.data
group by a.ename
您可以结合使用 CTE
和 STRING SPLIT
来完成工作
我在 tbl_str_match_4 中添加了一个身份列以简化此过程
DECLARE @enumber INT = 2
;WITH c1 AS
(
--To split the ename from first table
SELECT s.value AS name
FROM tbl_str_match_1 t
CROSS APPLY STRING_SPLIT(t.ename, ' ') AS s
WHERE enumber=@enumber
)
,c2 AS
(
--To split the matching names from second table of matched records
SELECT t.id,s.value AS name
FROM tbl_str_match_4 t
CROSS APPLY STRING_SPLIT(t.name, ' ') AS s
WHERE EXISTS(SELECT 1 FROM c1 c WHERE t.name LIKE '%'+c.name+'%')
)
,c3 AS
(
--To calculate the percentage of match
SELECT id,
CAST (COUNT(c1.name) AS FLOAT )/ CAST (COUNT(c2.name) AS FLOAT ) * 100 As Percentage
FROM c2
LEFT JOIN c1 on c1.name =c2.name
GROUP BY id
)
--display the details
SELECT t.*,c3.Percentage FROM tbl_str_match_4 t
JOIN c3 ON t.Id=c3.Id
对于DEMO
我有以下两个table:
Table 1:
CREATE TABLE tbl_str_match_1
(
enumber int,
ename varchar(100),
eaddress varchar(500)
);
INSERT INTO tbl_str_match_1 VALUES(1,'John Mak','Hno 12 Street Road, USA');
INSERT INTO tbl_str_match_1 VALUES(2,'Shai Lee','UK');
INSERT INTO tbl_str_match_1 VALUES(3,'Smith Watson','Street X01 UAE');
INSERT INTO tbl_str_match_1 VALUES(4,'Ray Gibbs','SA 124');
Table 2:
CREATE TABLE tbl_str_match_4
(
name varchar(100),
[address] varchar(500)
);
INSERT INTO tbl_str_match_4 VALUES('Mak John','Street Road, Hno 12, USA');
INSERT INTO tbl_str_match_4 VALUES('Shai A Lee','UK');
INSERT INTO tbl_str_match_4 VALUES('A watson Smeeth ','UAE Street X01');
INSERT INTO tbl_str_match_1 VALUES('Henry Jay','RUS OP124');
我想使用传递的号码从 table tbl_str_match_1
中搜索名称,然后使用名称作为输入进行下一次搜索,并从另一个名为 table 的名称和地址中查找名称和地址=18=].
注意:
姓名可以按任何顺序排列,例如 first mid last name 或 mid last first name 或 last first first mid name,任何概率都是可能的。
我想从第二个 table 中找到名称和地址,并增加一列,即字符串的百分比匹配。
将进行两次搜索,第一次在 table
tbl_str_match_1
获取姓名,第二次在 tabletbl_str_match_4
获取姓名和地址。对于第一条记录
John Mak
它应该显示与Mak John
. 100% 匹配
对于第二条记录,
Shai Lee
应显示与Shai A Lee
的 90% 匹配,因为A
中间名出现。最后一条记录
Ray Gibbs
不会显示在结果集中,因为它与其他 table 值不匹配。
--查询:
WITH CTE1 AS
(
SELECT ename FROM tbl_str_match_1 WHERE enumber = 1
)
SELECT name,[address] FROM tbl_str_match_4 WHERE name LIKE '%'+(SELECT ename from CTE1)+'%'
预期结果:
场景 1:如果我通过 enumber = 1
那么结果应该是:
Name Address Matching Percentage
------------------------------------------------------------
Mak John Street Road, Hno 12, USA 100
场景 2:如果我通过 enumber = 2
那么结果应该是:
Name Address Matching Percentage
------------------------------------------------------------
Shai A Lee UK 90
场景 3:如果我通过 enumber = 3
那么结果应该是:
Name Address Matching Percentage
------------------------------------------------------------
A watson Smeeth UAE Street X01 70
场景 4:如果我通过 enumber = 4
那么结果应该是:
没有结果,因为我们没有任何相关匹配。
Name Address Matching Percentage
------------------------------------------------------------
希望对您有所帮助。
with CTE1 as
(
Select enumber,Ltrim(SubString(ename,1,Isnull(Nullif(CHARINDEX(' ',ename),0),1000))) As Firstename,
Ltrim(SUBSTRING(ename,CharIndex(' ',ename),
CAse When (CHARINDEX(' ',ename,CHARINDEX(' ',ename)+1)-CHARINDEX(' ',ename))<=0 then 0
else CHARINDEX(' ',ename,CHARINDEX(' ',ename)+1)-CHARINDEX(' ',ename) end )) as Middleename,
Ltrim(SUBSTRING(ename,Isnull(Nullif(CHARINDEX(' ',ename,Charindex(' ',ename)+1),0),CHARINDEX(' ',ename)),
Case when Charindex(' ',ename)=0 then 0 else LEN(ename) end)) as Lastename
From tbl_str_match_1
),
CTE2 as
(
Select *,Ltrim(SubString(name,1,Isnull(Nullif(CHARINDEX(' ',name),0),1000))) As FirstName,
Ltrim(SUBSTRING(name,CharIndex(' ',name),
CAse When (CHARINDEX(' ',name,CHARINDEX(' ',name)+1)-CHARINDEX(' ',name))<=0 then 0
else CHARINDEX(' ',name,CHARINDEX(' ',name)+1)-CHARINDEX(' ',name) end )) as MiddleName,
Ltrim(SUBSTRING(name,Isnull(Nullif(CHARINDEX(' ',name,Charindex(' ',name)+1),0),CHARINDEX(' ',name)),
Case when Charindex(' ',name)=0 then 0 else LEN(name) end)) as LastName
From tbl_str_match_4
)
select CTE2.name,CTE2.address from CTE1 inner join CTE2 on CTE1.Firstename = CTE2.FirstName and CTE1.Lastename = CTE2.LastName
where CTE1.enumber = 1
希望以下内容对您有所帮助。
我首先将 tbl_1 和 tbl_4 名称中的名称标记为
之后我将 tbl_1 中的标记与 tbl_4
中的标记进行比较一道关于匹配率的问题。 在 "Shai A Lee" 的示例中,您有 2 个匹配项("Shai","Lee"),共 3 个("Shai","A","Lee")那么匹配百分比不应该是 66.67 吗?
with split_ename_1
as (
SELECT a.enumber
,a.ename
,a.eaddress
,split.a.value('.', 'VARCHAR(100)') AS Data
FROM
(
SELECT enumber
,ename
,eaddress
,CAST ('<M>' + REPLACE(rtrim(ename), ' ', '</M><M>') + '</M>' AS XML) AS Data
FROM tbl_str_match_1
) AS A CROSS APPLY Data.nodes ('/M') AS Split(a)
)
,split_ename_4
as (SELECT a.name
,a.address
,split.a.value('.', 'VARCHAR(100)') AS Data
,COUNT(*) over(partition by a.name) as tot_cnt
FROM
(
SELECT name
,address
,CAST ('<M>' + REPLACE(rtrim(name), ' ', '</M><M>') + '</M>' AS XML) AS Data
FROM tbl_str_match_4
) AS A CROSS APPLY data.nodes ('/M') AS split(a)
)
select a.ename
,count(a.data) as tokens_1
,count(b.data) as tokens_4
,max(b.tot_cnt) as tot_tokens_4
,case when count(b.data)=0 then 0 else count(b.data)*1.00/max(b.tot_cnt)*1.00 end as matching_percentage
from split_ename_1 a
left join split_ename_4 b
on a.data=b.data
group by a.ename
您可以结合使用 CTE
和 STRING SPLIT
来完成工作
我在 tbl_str_match_4 中添加了一个身份列以简化此过程
DECLARE @enumber INT = 2
;WITH c1 AS
(
--To split the ename from first table
SELECT s.value AS name
FROM tbl_str_match_1 t
CROSS APPLY STRING_SPLIT(t.ename, ' ') AS s
WHERE enumber=@enumber
)
,c2 AS
(
--To split the matching names from second table of matched records
SELECT t.id,s.value AS name
FROM tbl_str_match_4 t
CROSS APPLY STRING_SPLIT(t.name, ' ') AS s
WHERE EXISTS(SELECT 1 FROM c1 c WHERE t.name LIKE '%'+c.name+'%')
)
,c3 AS
(
--To calculate the percentage of match
SELECT id,
CAST (COUNT(c1.name) AS FLOAT )/ CAST (COUNT(c2.name) AS FLOAT ) * 100 As Percentage
FROM c2
LEFT JOIN c1 on c1.name =c2.name
GROUP BY id
)
--display the details
SELECT t.*,c3.Percentage FROM tbl_str_match_4 t
JOIN c3 ON t.Id=c3.Id
对于DEMO