从 SQL 服务器记录中解析 #Hashtag 评论
Parse #Hashtag Comments From SQL Server Record
我正在尝试从 SQL Server 2014 数据库的文本字段中提取所有“#”注释。我正在使用在此 MSDN thread 上找到的代码在一条记录中查找多个主题标签,并对其进行了轻微修改以满足我的需要,但我在结果集中看到了一些意想不到的结果。
我的 T-SQL 到目前为止:
IF OBJECT_ID('tempdb..#hashtag') IS NOT NULL DROP TABLE #hashtag;
IF OBJECT_ID('tempdb..#numbers') IS NOT NULL DROP TABLE #numbers;
IF OBJECT_ID('tempdb..#hashtagcounts') IS NOT NULL DROP TABLE #hashtagcounts;
CREATE TABLE #numbers ( N INT );
DECLARE @i INT;
SET @i = 1;
WHILE @i < 500
BEGIN
INSERT #numbers
VALUES ( @i );
SET @i = @i + 1;
END;
CREATE TABLE #hashtag ( tkt_desc VARCHAR(MAX) );
INSERT INTO #hashtag ( tkt_desc )
SELECT cst.ticketDescription
FROM dbo.Trending AS [cst]
WHERE cst.ticketDescription LIKE '%#%'
AND LTRIM(RTRIM(cst.ticketDescription)) NOT LIKE '%# %'
AND cst.ticketDescription NOT LIKE '%BATCH #%'
AND cst.ticketDescription NOT LIKE '%#[.:''1-9]%';
SELECT LOWER ('#' + SUBSTRING(tkt_desc, N,
CASE WHEN CHARINDEX(' ', tkt_desc, N) > 0
THEN CHARINDEX(' ', tkt_desc, N) - N
ELSE LEN(tkt_desc)
END)) AS tkt_desc
INTO #hashtagcounts
FROM #hashtag
CROSS JOIN #numbers
WHERE N <= LEN(tkt_desc) AND SUBSTRING(tkt_desc, N - 1, 1) = '#';
SELECT tkt_desc, COUNT(*) AS [Count]
FROM #hashtagcounts
GROUP BY tkt_desc
ORDER BY Count DESC, tkt_desc;
我的数据集如下:
tkt_desc Count
#updateinfo 6
#update 4
#update update 3
#update updated 3
#reprint 2
#callback 1
#nochargereprint 1
#nocostreprint 1
#notes update 1
#paperlicense please 1
我遇到了 #update 标记以三种不同方式表示的问题。理想情况下,我不希望在主题标签中包含空白 space ' ' 之后的任何内容 - #update 标签的计数应该为 10。
我最初的想法是,由于这是一个文本字段,该字段中可能存在换行符或回车 return,因此我尝试通过将 SELECT cst.ticketDescription
替换为 [=19] 来解决这个问题=] 但这只是将单独的词组合成一个标签。请参见下面的示例:
#updateinfo 6
#update 4
#updateupdate 3
#updateupdated 3
关于如何实现我想要的结果有什么建议吗?我在下面包含了一些样本数据,以防有人想试验。
我决定使用两个函数/交叉应用并清除 LIKE 和 NOT LIKE 语句的查询:
SELECT '#' + LOWER(B.RetVal) AS [HashTag] ,
COUNT(*) AS [Cnt]
FROM dbo.Common_SupportTickets AS [cst]
CROSS APPLY [dbo].[udf-Str-Extract]([dbo].[udf-Str-Strip-Control](cst.ticketDescription) + ' ', '#', ' ') AS [B]
WHERE cst.ticketDescription LIKE '%#%'
AND LTRIM(RTRIM(REPLACE(cst.ticketDescription,CHAR(13)+CHAR(10),''))) NOT LIKE '%#'
AND cst.ticketDescription NOT LIKE '%BATCH #%'
AND cst.ticketDescription NOT LIKE '%#[.:'')1-9]%'
AND CAST(cst.createDate AS DATE) >= CAST( @paramStartDate AS DATE )
AND CAST(cst.createDate AS DATE) <= CAST( @paramEndDate AS DATE )
GROUP BY B.RetVal
示例数据和记录:
USE [Sandbox]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[Trending](
[TicketDescription] [varchar](max) NULL
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#registrationnotreceived customer has not received registration for boat...')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#registrationnoreceived Customer called and still has not received duplicate registration...')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#callback (111) 111-1111
Agent''s POS is briefly turning on before "going to sleep" and entering sleep mode. Agent claims POS will not stay active for any length of time.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Corrected last name and driver''s license number.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Update customer''s last name.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Update last name, address')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#update - Profile updated. Corrected last name.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#updateinfo')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#updateinfo')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#updateinfo')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#updateinfo')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#question')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Update residency status')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#update #SSNdiscrepancy
John Doe called in claiming this was their SSN, please advise. Please contact John Doe at this number (111-111-2222) when the issue is resolved. He wishes to create an account once the issue is resolved.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Notes
Update Customer''s Hunter certificate number')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#update
Updated residency status')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Changed residency from in-state to out-of-state, likely didn''t update.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Updated Customer''s last name')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#updateinfo')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#updateinfo')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#void - Agent called in asking to void a duplicate license sale.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update - updated customer''s last name')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#permissions
Changed agent role from AGENT CLERK to AGENT MANAGER in order to order supplies.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#registrationreprint customer didn''t receive registration I sent to Twra It for reprint. Told to call if he has not received in 10days ')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#printerissue')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#update - Profile updated. Religious Exempt.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#reprint this is 2nd call from customer that they have not received there boat registration...')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#registratedincorrectly He send in check and info from Clerks office beginning of Dec, ')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#reprint #paperlicense Please reprint this license for the customer, he claims he has not yet received it. ')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#nocostreprint customer did not receive boat regst')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Updated customer''s address over the phone')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#nochargereprint ')
GO
当我 运行 同一脚本在不同的日期 运行ge 时,我刚刚注意到一个 st运行ge 行为 - 它正在创建不存在的 #hashtags。例如,它是 returning Order: 10 次,但字段中没有文本 #order:
示例数据:
CREATE TABLE #temptable ( [ticketDescription] varchar(max), [RetVal] varchar(max) )
INSERT INTO #temptable
VALUES
( 'DURABLE HARD CARD RETURN-WAS GOING TO CALL TO PICK UP NO# NUMBER ', 'DURABLE' ),
( 'Order: 30341143OrderItemId: 30517890License for: NATHAN TIMOTHY SUMNER Printed on: 10 Apr 2017 06:43:57:857 Shipped to: 7650 KIOWA ST, APT #01 MILLINGTON TN 38053 - 3219', 'Order:' ),
( 'Order: 30341143OrderItemId: 30517890License for: NATHAN TIMOTHY SUMNER Printed on: 10 Apr 2017 06:43:57:857 Shipped to: 7650 KIOWA ST, APT #01 MILLINGTON TN 38053 - 3219', '01' ),
( 'Order: 30346281OrderItemId: 30526511License for: STANLEY R ROWLAND Printed on: 07 Apr 2017 06:22:23:417 Shipped to: 25 COUNTRY WOOD LN # 601 WALNUT MS 38683 - 5367', 'Order:' ),
( 'Order: 30347906OrderItemId: 30529325License for: DOUGLAS R EASTRIDGE Printed on: 07 Apr 2017 09:29:51:643 Shipped to: 7980 HUFFS FERRY RD N # R LOUDON TN 37774 - 5910', 'Order:' ),
( 'Order: 30361947OrderItemId: 30554547License for: BLAKE R HADDON Printed on: 07 Apr 2017 14:01:42:637 Shipped to: 146 14TH AVE NW # 146 WINCHESTER TN 37398 - 1079', 'Order:' ),
( 'Order: 30362075OrderItemId: 30554740License for: KYLE JACKSON Printed on: 07 Apr 2017 14:04:23:473 Shipped to: 3765 E ANDREW JOHNSON HWY, APT # A4 MORRISTOWN TN 37814 - 6200', 'Order:' ),
( 'Order: 30369152OrderItemId: 30565137License for: DANIEL JAMES SOLA Printed on: 09 Apr 2017 07:08:28:683 Shipped to: 3212 MAYES LOOP RD # 1 PIGEON FORGE TN 37863 - 7721', 'Order:' ),
( 'Order: 30370097OrderItemId: 30566543License for: JAMES D JOHNSON Printed on: 09 Apr 2017 11:51:37:170 Shipped to: 230 FRANKLIN RD # 907 FRANKLIN TN 37064 - 2256', 'Order:' ),
( 'Order: 30372876OrderItemId: 30571083License for: MARCOS CLAUDIO P POLONIATO Printed on: 06 Apr 2017 08:15:01:093 Shipped to: 295 WINDING RIVER DR, # J SANDY SPRINGS GA 30350 - 1926', 'Order:' ),
( 'Order: 30396415OrderItemId: 30604206License for: GARY T GOODMAN Printed on: 07 Apr 2017 15:11:20:317 Shipped to: 1046 GREENBRIAR RD # RD.423 TALBOTT TN 37877 - 9055', 'Order:' ),
( 'Order: 30405689OrderItemId: 30617970License for: VANCE K JOHNSON Printed on: 09 Apr 2017 09:25:48:670 Shipped to: 614 GARRISON HOLLOW RD, LOT # 11 ELIZABETHTON TN 37643 - 4897', 'Order:' )
SELECT * FROM #temptable;
DROP TABLE #temptable
EDIT - To Fix
Select HashTag = '#'+B.RetVal
,Cnt = Count(*)
From #temptable A
Cross Apply [dbo].[udf-Str-Extract]([dbo].[udf-Str-Strip-Control]('#>>> '+A.TicketDescription)+' ','#',' ') B
Where B.RetVal <> '>>>'
and B.RetVal Not Like '[0-9][0-9]'
Group By B.RetVal
Order By 1
借助两个函数和一个 CROSS APPLY,以下内容可能会有所帮助。
第一个函数将去除所有控制字符并替换为 space 这样就不会像 John{13}{10}Smith
那样将 return 连接成 John Smith
第二个函数是经过修改的解析函数,可以接受两个不同的定界符 (begin/end)。在你的例子中是 # 和 space。 如果有多个hashtag,会return多条记录。
例子
Select HashTag = '#'+B.RetVal
,Cnt = Count(*)
From trending A
Cross Apply [dbo].[udf-Str-Extract]([dbo].[udf-Str-Strip-Control](A.TicketDescription)+' ','#',' ') B
Group By B.RetVal
Order By 1
Returns
HashTag Cnt
#callback 1
#nochargereprint 1
#nocostreprint 1
#Notes 1
#paperlicense 1 ---<< 2nd hashtag in text
#permissions 1
#printerissue 1
#question 1
#registratedincorrectly 1
#registrationnoreceived 1
#registrationnotreceived1
#registrationreprint 1
#reprint 2
#SSNdiscrepancy 1 ---<< 2nd hashtag in text
#Update 12
#updateinfo 6
#void 1
如果有兴趣,如果是 UDF
CREATE FUNCTION [dbo].[udf-Str-Strip-Control](@S varchar(max))
Returns varchar(max)
Begin
;with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(C) As (Select Top (32) Char(Row_Number() over (Order By (Select NULL))-1) From cte1 a,cte1 b)
Select @S = Replace(@S,C,' ')
From cte2
Return LTrim(RTrim(Replace(Replace(Replace(@S,' ','><'),'<>',''),'><',' ')))
End
--Select [dbo].[udf-Str-Strip-Control]('Michael '+char(13)+char(10)+'LastName') --Returns: Michael LastName
CREATE FUNCTION [dbo].[udf-Str-Extract] (@String varchar(max),@Delimiter1 varchar(100),@Delimiter2 varchar(100))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(@String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 N1,cte1 N2,cte1 N3,cte1 N4,cte1 N5,cte1 N6) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(@Delimiter1) From cte2 t Where Substring(@String,t.N,DataLength(@Delimiter1)) = @Delimiter1),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(@Delimiter1,@String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By N)
,RetPos = N
,RetVal = left(RetVal,charindex(@Delimiter2,RetVal)-1)
From (Select *,RetVal = Substring(@String, N, L) From cte4) A
Where charindex(@Delimiter2,RetVal)>1
)
/*
Max Length of String 1MM characters
Declare @String varchar(max) = 'Dear [[FirstName]] [[LastName]], ...'
Select * From [dbo].[udf-Str-Extract] (@String,'[[',']]')
*/
EDIT - May Help with the Visualization
如果您运行查询没有任何聚合
Select A.*,B.*
From trending A
Cross Apply [dbo].[udf-Str-Extract]([dbo].[udf-Str-Strip-Control](' '+replace(A.TicketDescription,'#','|||#'))+' .','|||',' ') B
Order By 1
你会得到
我正在尝试从 SQL Server 2014 数据库的文本字段中提取所有“#”注释。我正在使用在此 MSDN thread 上找到的代码在一条记录中查找多个主题标签,并对其进行了轻微修改以满足我的需要,但我在结果集中看到了一些意想不到的结果。
我的 T-SQL 到目前为止:
IF OBJECT_ID('tempdb..#hashtag') IS NOT NULL DROP TABLE #hashtag;
IF OBJECT_ID('tempdb..#numbers') IS NOT NULL DROP TABLE #numbers;
IF OBJECT_ID('tempdb..#hashtagcounts') IS NOT NULL DROP TABLE #hashtagcounts;
CREATE TABLE #numbers ( N INT );
DECLARE @i INT;
SET @i = 1;
WHILE @i < 500
BEGIN
INSERT #numbers
VALUES ( @i );
SET @i = @i + 1;
END;
CREATE TABLE #hashtag ( tkt_desc VARCHAR(MAX) );
INSERT INTO #hashtag ( tkt_desc )
SELECT cst.ticketDescription
FROM dbo.Trending AS [cst]
WHERE cst.ticketDescription LIKE '%#%'
AND LTRIM(RTRIM(cst.ticketDescription)) NOT LIKE '%# %'
AND cst.ticketDescription NOT LIKE '%BATCH #%'
AND cst.ticketDescription NOT LIKE '%#[.:''1-9]%';
SELECT LOWER ('#' + SUBSTRING(tkt_desc, N,
CASE WHEN CHARINDEX(' ', tkt_desc, N) > 0
THEN CHARINDEX(' ', tkt_desc, N) - N
ELSE LEN(tkt_desc)
END)) AS tkt_desc
INTO #hashtagcounts
FROM #hashtag
CROSS JOIN #numbers
WHERE N <= LEN(tkt_desc) AND SUBSTRING(tkt_desc, N - 1, 1) = '#';
SELECT tkt_desc, COUNT(*) AS [Count]
FROM #hashtagcounts
GROUP BY tkt_desc
ORDER BY Count DESC, tkt_desc;
我的数据集如下:
tkt_desc Count
#updateinfo 6
#update 4
#update update 3
#update updated 3
#reprint 2
#callback 1
#nochargereprint 1
#nocostreprint 1
#notes update 1
#paperlicense please 1
我遇到了 #update 标记以三种不同方式表示的问题。理想情况下,我不希望在主题标签中包含空白 space ' ' 之后的任何内容 - #update 标签的计数应该为 10。
我最初的想法是,由于这是一个文本字段,该字段中可能存在换行符或回车 return,因此我尝试通过将 SELECT cst.ticketDescription
替换为 [=19] 来解决这个问题=] 但这只是将单独的词组合成一个标签。请参见下面的示例:
#updateinfo 6
#update 4
#updateupdate 3
#updateupdated 3
关于如何实现我想要的结果有什么建议吗?我在下面包含了一些样本数据,以防有人想试验。
我决定使用两个函数/交叉应用并清除 LIKE 和 NOT LIKE 语句的查询:
SELECT '#' + LOWER(B.RetVal) AS [HashTag] ,
COUNT(*) AS [Cnt]
FROM dbo.Common_SupportTickets AS [cst]
CROSS APPLY [dbo].[udf-Str-Extract]([dbo].[udf-Str-Strip-Control](cst.ticketDescription) + ' ', '#', ' ') AS [B]
WHERE cst.ticketDescription LIKE '%#%'
AND LTRIM(RTRIM(REPLACE(cst.ticketDescription,CHAR(13)+CHAR(10),''))) NOT LIKE '%#'
AND cst.ticketDescription NOT LIKE '%BATCH #%'
AND cst.ticketDescription NOT LIKE '%#[.:'')1-9]%'
AND CAST(cst.createDate AS DATE) >= CAST( @paramStartDate AS DATE )
AND CAST(cst.createDate AS DATE) <= CAST( @paramEndDate AS DATE )
GROUP BY B.RetVal
示例数据和记录:
USE [Sandbox]
GO
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[Trending](
[TicketDescription] [varchar](max) NULL
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#registrationnotreceived customer has not received registration for boat...')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#registrationnoreceived Customer called and still has not received duplicate registration...')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#callback (111) 111-1111
Agent''s POS is briefly turning on before "going to sleep" and entering sleep mode. Agent claims POS will not stay active for any length of time.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Corrected last name and driver''s license number.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Update customer''s last name.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Update last name, address')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#update - Profile updated. Corrected last name.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#updateinfo')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#updateinfo')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#updateinfo')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#updateinfo')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#question')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Update residency status')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#update #SSNdiscrepancy
John Doe called in claiming this was their SSN, please advise. Please contact John Doe at this number (111-111-2222) when the issue is resolved. He wishes to create an account once the issue is resolved.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Notes
Update Customer''s Hunter certificate number')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#update
Updated residency status')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Changed residency from in-state to out-of-state, likely didn''t update.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Updated Customer''s last name')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#updateinfo')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#updateinfo')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#void - Agent called in asking to void a duplicate license sale.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update - updated customer''s last name')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#permissions
Changed agent role from AGENT CLERK to AGENT MANAGER in order to order supplies.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#registrationreprint customer didn''t receive registration I sent to Twra It for reprint. Told to call if he has not received in 10days ')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#printerissue')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#update - Profile updated. Religious Exempt.')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#reprint this is 2nd call from customer that they have not received there boat registration...')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#registratedincorrectly He send in check and info from Clerks office beginning of Dec, ')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#reprint #paperlicense Please reprint this license for the customer, he claims he has not yet received it. ')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#nocostreprint customer did not receive boat regst')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#Update
Updated customer''s address over the phone')
GO
INSERT [dbo].[Trending] ([TicketDescription]) VALUES (N'#nochargereprint ')
GO
当我 运行 同一脚本在不同的日期 运行ge 时,我刚刚注意到一个 st运行ge 行为 - 它正在创建不存在的 #hashtags。例如,它是 returning Order: 10 次,但字段中没有文本 #order:
示例数据:
CREATE TABLE #temptable ( [ticketDescription] varchar(max), [RetVal] varchar(max) )
INSERT INTO #temptable
VALUES
( 'DURABLE HARD CARD RETURN-WAS GOING TO CALL TO PICK UP NO# NUMBER ', 'DURABLE' ),
( 'Order: 30341143OrderItemId: 30517890License for: NATHAN TIMOTHY SUMNER Printed on: 10 Apr 2017 06:43:57:857 Shipped to: 7650 KIOWA ST, APT #01 MILLINGTON TN 38053 - 3219', 'Order:' ),
( 'Order: 30341143OrderItemId: 30517890License for: NATHAN TIMOTHY SUMNER Printed on: 10 Apr 2017 06:43:57:857 Shipped to: 7650 KIOWA ST, APT #01 MILLINGTON TN 38053 - 3219', '01' ),
( 'Order: 30346281OrderItemId: 30526511License for: STANLEY R ROWLAND Printed on: 07 Apr 2017 06:22:23:417 Shipped to: 25 COUNTRY WOOD LN # 601 WALNUT MS 38683 - 5367', 'Order:' ),
( 'Order: 30347906OrderItemId: 30529325License for: DOUGLAS R EASTRIDGE Printed on: 07 Apr 2017 09:29:51:643 Shipped to: 7980 HUFFS FERRY RD N # R LOUDON TN 37774 - 5910', 'Order:' ),
( 'Order: 30361947OrderItemId: 30554547License for: BLAKE R HADDON Printed on: 07 Apr 2017 14:01:42:637 Shipped to: 146 14TH AVE NW # 146 WINCHESTER TN 37398 - 1079', 'Order:' ),
( 'Order: 30362075OrderItemId: 30554740License for: KYLE JACKSON Printed on: 07 Apr 2017 14:04:23:473 Shipped to: 3765 E ANDREW JOHNSON HWY, APT # A4 MORRISTOWN TN 37814 - 6200', 'Order:' ),
( 'Order: 30369152OrderItemId: 30565137License for: DANIEL JAMES SOLA Printed on: 09 Apr 2017 07:08:28:683 Shipped to: 3212 MAYES LOOP RD # 1 PIGEON FORGE TN 37863 - 7721', 'Order:' ),
( 'Order: 30370097OrderItemId: 30566543License for: JAMES D JOHNSON Printed on: 09 Apr 2017 11:51:37:170 Shipped to: 230 FRANKLIN RD # 907 FRANKLIN TN 37064 - 2256', 'Order:' ),
( 'Order: 30372876OrderItemId: 30571083License for: MARCOS CLAUDIO P POLONIATO Printed on: 06 Apr 2017 08:15:01:093 Shipped to: 295 WINDING RIVER DR, # J SANDY SPRINGS GA 30350 - 1926', 'Order:' ),
( 'Order: 30396415OrderItemId: 30604206License for: GARY T GOODMAN Printed on: 07 Apr 2017 15:11:20:317 Shipped to: 1046 GREENBRIAR RD # RD.423 TALBOTT TN 37877 - 9055', 'Order:' ),
( 'Order: 30405689OrderItemId: 30617970License for: VANCE K JOHNSON Printed on: 09 Apr 2017 09:25:48:670 Shipped to: 614 GARRISON HOLLOW RD, LOT # 11 ELIZABETHTON TN 37643 - 4897', 'Order:' )
SELECT * FROM #temptable;
DROP TABLE #temptable
EDIT - To Fix
Select HashTag = '#'+B.RetVal
,Cnt = Count(*)
From #temptable A
Cross Apply [dbo].[udf-Str-Extract]([dbo].[udf-Str-Strip-Control]('#>>> '+A.TicketDescription)+' ','#',' ') B
Where B.RetVal <> '>>>'
and B.RetVal Not Like '[0-9][0-9]'
Group By B.RetVal
Order By 1
借助两个函数和一个 CROSS APPLY,以下内容可能会有所帮助。
第一个函数将去除所有控制字符并替换为 space 这样就不会像 John{13}{10}Smith
那样将 return 连接成 John Smith
第二个函数是经过修改的解析函数,可以接受两个不同的定界符 (begin/end)。在你的例子中是 # 和 space。 如果有多个hashtag,会return多条记录。
例子
Select HashTag = '#'+B.RetVal
,Cnt = Count(*)
From trending A
Cross Apply [dbo].[udf-Str-Extract]([dbo].[udf-Str-Strip-Control](A.TicketDescription)+' ','#',' ') B
Group By B.RetVal
Order By 1
Returns
HashTag Cnt
#callback 1
#nochargereprint 1
#nocostreprint 1
#Notes 1
#paperlicense 1 ---<< 2nd hashtag in text
#permissions 1
#printerissue 1
#question 1
#registratedincorrectly 1
#registrationnoreceived 1
#registrationnotreceived1
#registrationreprint 1
#reprint 2
#SSNdiscrepancy 1 ---<< 2nd hashtag in text
#Update 12
#updateinfo 6
#void 1
如果有兴趣,如果是 UDF
CREATE FUNCTION [dbo].[udf-Str-Strip-Control](@S varchar(max))
Returns varchar(max)
Begin
;with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(C) As (Select Top (32) Char(Row_Number() over (Order By (Select NULL))-1) From cte1 a,cte1 b)
Select @S = Replace(@S,C,' ')
From cte2
Return LTrim(RTrim(Replace(Replace(Replace(@S,' ','><'),'<>',''),'><',' ')))
End
--Select [dbo].[udf-Str-Strip-Control]('Michael '+char(13)+char(10)+'LastName') --Returns: Michael LastName
CREATE FUNCTION [dbo].[udf-Str-Extract] (@String varchar(max),@Delimiter1 varchar(100),@Delimiter2 varchar(100))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(@String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 N1,cte1 N2,cte1 N3,cte1 N4,cte1 N5,cte1 N6) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(@Delimiter1) From cte2 t Where Substring(@String,t.N,DataLength(@Delimiter1)) = @Delimiter1),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(@Delimiter1,@String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By N)
,RetPos = N
,RetVal = left(RetVal,charindex(@Delimiter2,RetVal)-1)
From (Select *,RetVal = Substring(@String, N, L) From cte4) A
Where charindex(@Delimiter2,RetVal)>1
)
/*
Max Length of String 1MM characters
Declare @String varchar(max) = 'Dear [[FirstName]] [[LastName]], ...'
Select * From [dbo].[udf-Str-Extract] (@String,'[[',']]')
*/
EDIT - May Help with the Visualization
如果您运行查询没有任何聚合
Select A.*,B.*
From trending A
Cross Apply [dbo].[udf-Str-Extract]([dbo].[udf-Str-Strip-Control](' '+replace(A.TicketDescription,'#','|||#'))+' .','|||',' ') B
Order By 1
你会得到