当组在 MS SQL 服务器(连续)中重复时,按组删除除第一个和最后一个值以外的所有值
Removing All but the first and last values by group when the group is repeated in MS SQL Server (contiguous)
我们有一个聊天系统,有时每秒为聊天期间的每个事件生成多个事件日志。问题是这些消耗了大量的数据存储(这在该平台上非常昂贵),我们希望简化我们实际存储的内容并删除真正不需要的东西。
为此,有一个事件类型表示聊天在队列中的位置。我们不关心每个位置,只要它们不干预该聊天的事件即可。因此,我们只想保留每个不同组中的第一个和最后一个,在该组中没有其他事件类型可以在那个时期获得 "total time in queue"。
更复杂的是,客户可以在按部门转移时进出队列,因此同一个聊天可以有多个队列位置记录块。我试过使用 FIRST_VALUE 和 LAST_VALUE 并且它让我大部分时间都在那里,但是当我们遇到这些事件的两个不同块的情况时失败了。
这是生成测试数据的脚本:
<!-- language: lang-sql -->
CREATE TABLE #testdata (
id varchar(18),
name varchar(8),
[type] varchar(20),
livechattranscriptid varchar(18),
groupid varchar(40))
INSERT INTO #testdata (id,name,[type],livechattranscriptid,groupid) VALUES
('0DZ14000003I2pOGAS','34128314','ChatRequest','57014000000ltfIAAQ','57014000000ltfIAAQChatRequest'),
('0DZ14000003IGmQGAW','34181980','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IHbqGAG','34185171','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003ILuHGAW','34201743','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IQ6cGAG','34217778','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IR7JGAW','34221794','PushAssignment','57014000000ltfIAAQ','57014000000ltfIAAQPushAssignment'),
('0DZ14000003IiDnGAK','34287448','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IiDoGAK','34287545','PushAssignment','57014000000ltfIAAQ','57014000000ltfIAAQPushAssignment'),
('0DZ14000003Iut5GAC','34336044','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003Iv7HGAS','34336906','Accept','57014000000ltfIAAQ','57014000000ltfIAAQAccept')
这里尝试识别任何按名称字段排序并按转录 ID 分组的组的第一个和最后一个 ID:
select *,FIRST_VALUE(id) OVER(Partition BY groupid order by livechattranscriptid,name asc) as firstinstancegroup,
LAST_VALUE(id) OVER(Partition BY groupid order by livechattranscriptid,name asc RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as lastinstancegroup from #testdata order by livechattranscriptid,name
问题是,它为整个组的所有记录提供了相同的第一个和最后一个 ID,而不是将每组 Enqueue 记录视为一个不同的组。我如何将 Enqueue 的每个不同的分组实例视为一个唯一的组?
这是一个类似的解决方案Grouping contiguous table data
不漂亮,但您会发现基于 OP 的逻辑。同一列上的连续数据
declare @mytable table (
id varchar(18),
name varchar(8),
[type] varchar(20),
livechattranscriptid varchar(18),
groupid varchar(100))
INSERT INTO @mytable (id,name,[type],livechattranscriptid,groupid) VALUES
('0DZ14000003I2pOGAS','34128314','ChatRequest','57014000000ltfIAAQ','57014000000ltfIAAQChatRequest'),
('0DZ14000003IGmQGAW','34181980','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IHbqGAG','34185171','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003ILuHGAW','34201743','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IQ6cGAG','34217778','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IR7JGAW','34221794','PushAssignment','57014000000ltfIAAQ','57014000000ltfIAAQPushAssignment'),
('0DZ14000003IiDnGAK','34287448','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IiDoGAK','34287545','PushAssignment','57014000000ltfIAAQ','57014000000ltfIAAQPushAssignment'),
('0DZ14000003Iut5GAC','34336044','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003Iv7HGAS','34336906','Accept','57014000000ltfIAAQ','57014000000ltfIAAQAccept')
;with myend as ( --- get all ends
select
*
from
(select
iif(groupid <> lead(groupid,1,groupid) over (order by name),
id,
'x') [newid],name
from @mytable
)x
where newid <> 'x'
)
, mystart as -- get all starts
(
select
*
from
(select
iif(groupid <> lag(groupid,1,groupid) over (order by name),
id,
'x') [newid], name,type,livechattranscriptid
from @mytable
)x
where newid <> 'x'
) ,
finalstart as ( --- get all starts including the first row
select id,
name,type,livechattranscriptid,
row_number() over (order by name) rn
from (
select id,name,type,livechattranscriptid
from (
select top 1 id, name,type,livechattranscriptid
from @mytable
order by name) x
union all
select newid,name,type,livechattranscriptid from mystart
) y
),
finalend as -- get all ends and add the last row
(
select id,
row_number() over (order by name) rn
from (
select id,name from (
select top 1 id,name
from @mytable
order by name desc) x
union all
select newid,name from myend
) y
)
select
s.id [startid]
,s.name
,s.type
,s.livechattranscriptid
,e.id [lastid]
from
finalend e
inner join finalstart s
on e.rn = s.rn --- bind the two results over the positions or row number
我们有一个聊天系统,有时每秒为聊天期间的每个事件生成多个事件日志。问题是这些消耗了大量的数据存储(这在该平台上非常昂贵),我们希望简化我们实际存储的内容并删除真正不需要的东西。
为此,有一个事件类型表示聊天在队列中的位置。我们不关心每个位置,只要它们不干预该聊天的事件即可。因此,我们只想保留每个不同组中的第一个和最后一个,在该组中没有其他事件类型可以在那个时期获得 "total time in queue"。
更复杂的是,客户可以在按部门转移时进出队列,因此同一个聊天可以有多个队列位置记录块。我试过使用 FIRST_VALUE 和 LAST_VALUE 并且它让我大部分时间都在那里,但是当我们遇到这些事件的两个不同块的情况时失败了。
这是生成测试数据的脚本:
<!-- language: lang-sql -->
CREATE TABLE #testdata (
id varchar(18),
name varchar(8),
[type] varchar(20),
livechattranscriptid varchar(18),
groupid varchar(40))
INSERT INTO #testdata (id,name,[type],livechattranscriptid,groupid) VALUES
('0DZ14000003I2pOGAS','34128314','ChatRequest','57014000000ltfIAAQ','57014000000ltfIAAQChatRequest'),
('0DZ14000003IGmQGAW','34181980','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IHbqGAG','34185171','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003ILuHGAW','34201743','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IQ6cGAG','34217778','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IR7JGAW','34221794','PushAssignment','57014000000ltfIAAQ','57014000000ltfIAAQPushAssignment'),
('0DZ14000003IiDnGAK','34287448','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IiDoGAK','34287545','PushAssignment','57014000000ltfIAAQ','57014000000ltfIAAQPushAssignment'),
('0DZ14000003Iut5GAC','34336044','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003Iv7HGAS','34336906','Accept','57014000000ltfIAAQ','57014000000ltfIAAQAccept')
这里尝试识别任何按名称字段排序并按转录 ID 分组的组的第一个和最后一个 ID:
select *,FIRST_VALUE(id) OVER(Partition BY groupid order by livechattranscriptid,name asc) as firstinstancegroup,
LAST_VALUE(id) OVER(Partition BY groupid order by livechattranscriptid,name asc RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as lastinstancegroup from #testdata order by livechattranscriptid,name
问题是,它为整个组的所有记录提供了相同的第一个和最后一个 ID,而不是将每组 Enqueue 记录视为一个不同的组。我如何将 Enqueue 的每个不同的分组实例视为一个唯一的组?
这是一个类似的解决方案Grouping contiguous table data
不漂亮,但您会发现基于 OP 的逻辑。同一列上的连续数据
declare @mytable table (
id varchar(18),
name varchar(8),
[type] varchar(20),
livechattranscriptid varchar(18),
groupid varchar(100))
INSERT INTO @mytable (id,name,[type],livechattranscriptid,groupid) VALUES
('0DZ14000003I2pOGAS','34128314','ChatRequest','57014000000ltfIAAQ','57014000000ltfIAAQChatRequest'),
('0DZ14000003IGmQGAW','34181980','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IHbqGAG','34185171','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003ILuHGAW','34201743','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IQ6cGAG','34217778','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IR7JGAW','34221794','PushAssignment','57014000000ltfIAAQ','57014000000ltfIAAQPushAssignment'),
('0DZ14000003IiDnGAK','34287448','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003IiDoGAK','34287545','PushAssignment','57014000000ltfIAAQ','57014000000ltfIAAQPushAssignment'),
('0DZ14000003Iut5GAC','34336044','Enqueue','57014000000ltfIAAQ','57014000000ltfIAAQEnqueue'),
('0DZ14000003Iv7HGAS','34336906','Accept','57014000000ltfIAAQ','57014000000ltfIAAQAccept')
;with myend as ( --- get all ends
select
*
from
(select
iif(groupid <> lead(groupid,1,groupid) over (order by name),
id,
'x') [newid],name
from @mytable
)x
where newid <> 'x'
)
, mystart as -- get all starts
(
select
*
from
(select
iif(groupid <> lag(groupid,1,groupid) over (order by name),
id,
'x') [newid], name,type,livechattranscriptid
from @mytable
)x
where newid <> 'x'
) ,
finalstart as ( --- get all starts including the first row
select id,
name,type,livechattranscriptid,
row_number() over (order by name) rn
from (
select id,name,type,livechattranscriptid
from (
select top 1 id, name,type,livechattranscriptid
from @mytable
order by name) x
union all
select newid,name,type,livechattranscriptid from mystart
) y
),
finalend as -- get all ends and add the last row
(
select id,
row_number() over (order by name) rn
from (
select id,name from (
select top 1 id,name
from @mytable
order by name desc) x
union all
select newid,name from myend
) y
)
select
s.id [startid]
,s.name
,s.type
,s.livechattranscriptid
,e.id [lastid]
from
finalend e
inner join finalstart s
on e.rn = s.rn --- bind the two results over the positions or row number