T/SQL - 时间相关计算
T/SQL - Time related calculations
IF OBJECT_ID('Tempdb..#TempTable') IS NOT NULL
DROP TABLE #TempTable
CREATE TABLE #TempTable
(
[ID] INT NOT NULL ,
[Value] VARCHAR(50) NULL ,
[Date] DATE NULL ,
[Time] TIME(7) NULL ,
[Duration] INT NULL ,
[srcFile] VARCHAR(50) NULL,
)
INSERT #TempTable
( [ID], [Value], [Date], [Time], [Duration], [srcFile] )
VALUES ( 1, N'One', CAST(N'2014-07-29' AS DATE), CAST(N'23:34:00' AS TIME), 1710, N'sF1' ),
( 2, N'One', CAST(N'2014-07-30' AS DATE), CAST(N'00:00:10' AS TIME), 1710, N'sF1' ),
( 3, N'One', CAST(N'2014-07-30' AS DATE), CAST(N'01:30:00' AS TIME), 1710, N'sF1' ),
( 4, N'One', CAST(N'2014-07-30' AS DATE), CAST(N'01:54:00' AS TIME), 1710, N'sF1' ),
( 5, N'One', CAST(N'2014-07-30' AS DATE), CAST(N'13:30:00' AS TIME), 1710, N'sF1' ),
( 6, N'One', CAST(N'2014-07-30' AS DATE), CAST(N'13:57:00' AS TIME), 1710, N'sF2' ),
( 7, N'One', CAST(N'2014-07-30' AS DATE), CAST(N'23:34:00' AS TIME), 1710, N'sF1' ),
( 8, N'One', CAST(N'2014-07-31' AS DATE), CAST(N'00:00:10' AS TIME), 1710, N'sF2' ),
( 9, N'One', CAST(N'2014-07-31' AS DATE), CAST(N'00:10:10' AS TIME), 1710, N'sF3' ),
( 10, N'One', CAST(N'2014-08-01' AS DATE), CAST(N'00:00:00' AS TIME), 1710, N'sF2' ),
( 11, N'One', CAST(N'2014-08-01' AS DATE), CAST(N'00:00:00' AS TIME), 1710, N'sF1' ),
( 12, N'One', CAST(N'2014-08-01' AS DATE), CAST(N'01:00:00' AS TIME), 1710, N'sF3' ),
( 13, N'One', CAST(N'2014-08-01' AS DATE), CAST(N'01:00:00' AS TIME), 1710, N'sF4' ),
( 14, N'Two', CAST(N'2014-08-01' AS DATE), CAST(N'00:01:00' AS TIME), 1710, N'sF2' )
SELECT *
FROM #TempTable
基地Table
ID Value Date Time Duration srcFile
1 One 7/29/2014 23:34:00 1710 sF1
2 One 7/30/2014 0:00:10 1710 sF1
3 One 7/30/2014 1:30:00 1710 sF1
4 One 7/30/2014 1:54:00 1710 sF1
5 One 7/30/2014 13:30:00 1710 sF1
6 One 7/30/2014 13:57:00 1710 sF2
7 One 7/30/2014 23:34:00 1710 sF1
8 One 7/31/2014 0:00:10 1710 sF2
9 One 8/1/2014 0:00:00 1710 sF2
10 Two 8/1/2014 0:01:00 1710 sF2
11 One 8/1/2014 0:00:00 1710 sF1
要求:
当 [Value] + [Date] + [Time] 匹配时 Dup
输出:用 1 标记 isDup 标志,用 srcFile 标记 dupFIle,用于 dup 条件匹配的两个或多个记录。
当[Value]匹配且任意两条或多条记录的[Date]+[Time]都在[Date]+[Time]PLUS(+)[Duration]内时Overlap(注:当所有匹配的记录都是DUP......它们也不能重叠......但重叠可以有至少一个独特的记录和落在持续时间范围内的多个重复)。
输出:用 1 标记 isOverlap 标志,用 srcFile 标记 overlapFile,用于重叠条件匹配的两个或多个记录。
这是我试过的
;WITH dupCTE AS (
SELECT ID, Value, [Date], [Time], Duration, srcFile
,CASE
WHEN COUNT(*) OVER (PARTITION BY Value, [Date], [Time]) > 1 THEN 1
ELSE 0
END AS isDup
,CASE WHEN COUNT(*) OVER (PARTITION BY Value, [Date], [Time]) > 1 THEN STUFF((SELECT ' - ' + srcFile
FROM #TempTable T
WHERE T.Value = TT.Value
AND T.[Date] = TT.[Date]
AND T.[Time] = TT.[Time]
FOR XML PATH('')), 1, 3, '')
ELSE NULL
END AS dupFIle
FROM #TempTable TT
)
, overlapCTE AS (
SELECT A. ID, A.Value, A.[Date], A.[Time], A.Duration, A.srcFile, A.isDup, A.dupFIle
,CASE WHEN B.ID IS NOT NULL THEN 1
ELSE 0
END AS 'isOverlap'
,CASE WHEN b.ID IS NOT NULL THEN STUFF((SELECT ' - ' + srcFile
FROM #TempTable T
WHERE T.Value = A.Value
AND ((CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2) > CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2) AND CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2) < DATEADD(SECOND, A.Duration, CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2)))
OR (CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2) > CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2) AND CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2) < DATEADD(SECOND, B.Duration, CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2))))
FOR XML PATH('')), 1, 3, '')
ELSE NULL
END AS 'overlapFiles'
FROM dupCTE A LEFT JOIN dupCTE B
ON A.Value = B.Value
AND ((CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2) > CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2) AND CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2) < DATEADD(SECOND, A.Duration, CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2)))
OR (CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2) > CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2) AND CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2) < DATEADD(SECOND, B.Duration, CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2))))
WHERE A.isDup = 1 OR
B.ID IS NOT NULL
)
SELECT * FROM overlapCTE
DROP TABLE #TempTable
当前输出
ID Value Date Time Duration srcFile isDup dupFIle isOverlap overlapFiles
1 One 2014-07-29 23:34:00 1710 sF1 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
2 One 2014-07-30 00:00:10 1710 sF1 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
3 One 2014-07-30 01:30:00 1710 sF1 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
4 One 2014-07-30 01:54:00 1710 sF1 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
5 One 2014-07-30 13:30:00 1710 sF1 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
6 One 2014-07-30 13:57:00 1710 sF2 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
7 One 2014-07-30 23:34:00 1710 sF1 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
8 One 2014-07-31 00:00:10 1710 sF2 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
9 One 2014-08-01 00:00:00 1710 sF2 1 sF2 - sF1 0 NULL
11 One 2014-08-01 00:00:00 1710 sF1 1 sF2 - sF1 0 NULL
期望的输出
ID Value Date Time Duration srcFile isDup dupFIle isOverLap overlapFile
1 One 2014-07-29 24:34:00 1710 sF1 0 NULL 1 sF1 - sF1
2 One 2014-07-30 00:00:10 1710 sF1 0 NULL 1 sF1 - sF1
3 One 2014-07-30 01:30:00 1710 sF1 0 NULL 1 sF1 - sF1
4 One 2014-07-30 01:54:00 1710 sF1 0 NULL 1 sF1 - sF1
5 One 2014-07-30 13:30:00 1710 sF1 0 NULL 1 sF1 - sF2
6 One 2014-07-30 13:57:00 1710 sF2 0 NULL 1 sF2 - sF1
7 One 2014-07-30 24:34:00 1710 sF1 0 NULL 1 sF1 - sF2
8 One 2014-07-31 00:00:10 1710 sF2 0 NULL 1 sF2 - sF1
9 One 2014-08-01 00:00:00 1710 sF2 1 sF2 - sF1 0 NULL
10 Two 2014-08-01 00:01:00 1710 sF2 0 NULL 0 NULL
11 One 2014-08-01 00:00:00 1710 sF1 1 sF1 - sF2 0 NULL
我不符合要求。任何帮助将不胜感激。
谢谢
更新:
添加电流输出
更新2:
在 Dup CTE 中发现错误(使用 ID 而不是 Value)。
所需的输出仍然有待改进。
更新3:
进步人士,我们非常接近。现在重叠逻辑是 "working." 一个主要问题是重叠文件。它应该只列出相互重叠的记录的文件(现在,它列出了 overlapCTE 输出的所有文件,而不是专门列出那些在 STUFF 查询中满足 WHERE 的文件)。另外,有没有办法列出该唯一记录?
更新4:添加了更多记录以查看重复和重叠查询是否可以容纳不止两条记录。
我觉得我被吸引到在可能不需要的地方使用 CTE。我不知道你的用例,但这个 table 结构在我看来很奇怪;这是作业吗?无论如何,这应该可以解决问题:
WITH
tFullCTE AS (
SELECT ID as ID, Value as Value, [Date] as Date, [Time] as Time, CAST(CAST([Date] AS VARCHAR(10)) + ' ' + CAST([Time] AS VARCHAR(16)) AS DateTime2) as DateTime, Duration as Duration, srcFile as srcFile
FROM #TempTable TT
)
,dupCTE AS (
SELECT main.ID as FirstID, dups.ID as SecondID
FROM tFullCTE main
INNER join tFullCTE dups on main.value = dups.value and main.DateTime = dups.DateTime and main.id <> dups.id
)
,
overlapCTE AS (
SELECT main.ID as FirstID, ovlp.ID as SecondID
FROM tFullCTE main
INNER JOIN tFullCTE ovlp
ON main.Value = ovlp.Value
AND ((ovlp.DateTime > main.DateTime AND ovlp.DateTime < DATEADD(SECOND, main.Duration, main.DateTime))
OR (main.DateTime > ovlp.DateTime AND main.DateTime < DATEADD(SECOND, ovlp.Duration, ovlp.DateTime)))
AND main.ID <> ovlp.ID
)
SELECT main.*,
CASE WHEN EXISTS (SELECT dup.SecondID FROM dupCTE dup WHERE dup.FirstID = main.ID) THEN 1 ELSE 0 END as isDup,
STUFF(( SELECT DISTINCT ' - ' + dupDetails.srcFile
FROM dupCTE dup
INNER JOIN tFullCTE dupDetails on dup.SecondID = dupDetails.ID
WHERE dup.FirstID = main.ID
FOR XML PATH('')), 1, 3, '') as dupFile,
CASE WHEN EXISTS (SELECT ovlp.SecondID FROM overlapCTE ovlp WHERE ovlp.FirstID = main.ID) THEN 1 ELSE 0 END as isOverlap,
STUFF(( SELECT DISTINCT ' - ' + ovlpDetails.srcFile
FROM overlapCTE ovlp
INNER JOIN tFullCTE ovlpDetails on ovlp.SecondID = ovlpDetails.ID
WHERE ovlp.FirstID = main.ID
FOR XML PATH('')), 1, 3, '') as overlapFile
FROM tFullCTE main
;WITH ModifiedData AS(
SELECT
*,
[DateTime] = DATEADD(S, DATEDIFF(S, 0, [Time]), CAST([Date] AS DATETIME)),
DateTimeWithDuration = DATEADD(S, Duration, DATEADD(S, DATEDIFF(S, 0, [Time]), CAST([Date] AS DATETIME)))
FROM #TempTable
)
, Flags AS(
SELECT
m.*,
isDup = CASE
WHEN COUNT(*) OVER(PARTITION BY m.Value, m.[DateTime]) > 1 THEN 1
ELSE 0
END,
dupFile = CASE
WHEN COUNT(*) OVER(PARTITION BY m.Value, m.[DateTime]) > 1 THEN
STUFF((
SELECT ' - ' + srcFile
FROM ModifiedData
WHERE
Value = m.Value
AND m.DateTime = [DateTime]
FOR XML PATH('')), 1, 3, '')
ELSE NULL
END,
isOverlap = CASE
WHEN COUNT(m2.Value) > 0 THEN 1
ELSE 0
END
FROM ModifiedData m
LEFT JOIN ModifiedData m2
ON m2.value = m.Value
AND
(
(m2.DateTime > m.DateTime AND m2.DateTime < m.DateTimeWithDuration)
OR (m.DateTime > m2.DateTime AND m.DateTime < m2.DateTimeWithDuration)
)
AND m2.ID <> m.ID
GROUP BY
m.ID, m2.ID, m.Value, m.Date, m.Time, m.Duration, m.srcFile, m.DateTime, m.DateTimeWithDuration
)
SELECT
f.ID,
f.Value,
f.[Date],
f.[Time],
f.Duration,
f.SrcFile,
dupFile = CASE
WHEN isOverlap = 1 THEN
(SELECT f.srcFile + ' - ' + srcFile
FROM ModifiedData
WHERE
Value = f.Value
AND
(
([DateTime] > f.DateTime AND [DateTime] < f.DateTimeWithDuration)
OR (f.DateTime > [DateTime] AND f.DateTime < DateTimeWithDuration)
)
AND ID <> f.ID
FOR XML PATH(''))
ELSE NULL
END
FROM Flags f
ORDER BY f.ID
这应该能准确地告诉您您想要什么:
With CTE as (Select T.ID ID1, T.srcFile + ' - ' + c.srcFile over1, '1' as isDup from
#TempTable T
INNER JOIN #TempTable c on T.Value = c.Value and c.ID <> T.ID and (Cast(C.Date as datetime) + Cast(C.Time as datetime)) = (Cast(T.Date as datetime) + Cast(T.Time as datetime))),
CTE2 as
(Select T.ID ID1, c.ID ID2, T.srcFile + ' - ' + c.srcFile over1, c.srcFile + ' - ' + T.srcFile over2, '1' as isOverLap from
#TempTable T
INNER JOIN #TempTable c on T.Value = c.Value and c.ID <> T.ID
Where DateAdd(second, c.Duration, Cast(C.Date as datetime) + Cast(C.Time as datetime)) > (Cast(T.Date as datetime) + Cast(T.Time as datetime)) and (Cast(C.Date as datetime) + Cast(C.Time as datetime)) < (Cast (T.Date as datetime) + Cast(T.Time as datetime)))
Select T.*, ISNULL((Select top 1 c.isDup from CTE c where c.ID1 = T.ID) ,0) isDup
,(Select substring((select ',' + c1.over1 as [text()] from CTE c1 where c1.ID1 = T.ID for xml path ('')),2,1000)) dupFile
,ISNULL((select Top 1 case isOverLap when 1 then 1 else 0 end from CTE2 c where c.ID1 = T.ID or C.ID2 = T.ID),0) isOverLap
,(Select substring((select case when T.ID = C.ID1 then ',' + c.over1 else ',' + c.over2 end as [text()] from CTE2 c where c.ID1 = T.ID or C.ID2 = T.ID for xml path('')),2,1000)) OverlapFile
from #TempTable T
代码符合您的要求(希望如此)。
我通过添加更多的重叠和重复来测试它,它不仅适用于 2 个重复的重叠文件(例如 srcFile='sF3'),而且具有以下观察结果:
- DupFile - 始终按文件名排序的列表
- overlapfile - 如果只有一个文件,则没有一对 "sF1 - sF1",只有 "sF1" - 我不确定这是否是生产目的所必需的,但可以进行调整(在这种情况下还没有)
with rows
(
select [ID],[Value], [Date], [Time], [Duration], [srcFile],
cast(cast([date] as varchar(10))+' ' +cast(time as varchar(8)) as datetime) as datetime,
dateadd(ss,-duration,cast(cast([date] as varchar(10))+' ' +cast(time as varchar(8)) as datetime)) as date_from,
dateadd(ss,duration,cast(cast([date] as varchar(10))+' ' +cast(time as varchar(8)) as datetime)) as date_to
from #TempTable
)
, dups
as
(
SELECT [value], [Date], [Time]
FROM rows
group by [value], [Date], [Time]
having count([ID])>1
)
, dups_files
as
(
select r.*
,
STUFF((select ' - '+d.srcFile
FROM rows as d
WHERE (r.[value]=d.value and r.time=d.time
and r.date=d.date)
order by d.srcFile
FOR xml path('')),1,3,'') as dupFile
FROM dups r
)
, duplicities
as
(
select a.id, d.dupFile
from rows a join dups_files d
on ( a.value=d.value and a.date=d.date
and a.time=d.time)
)
, overlaps_pairs
as
(
select f.id as id_a, d.id as id_b, f.srcfile as srcfile_a, d.srcfile as srcfile_b
from rows f JOIN rows d
ON (f.id<>d.id
AND f.value=d.value
and f.datetime between d.date_from and d.date_to)
where not exists
(select 1 FROM duplicities du
where f.id=du.id)
)
, overlaps
as
(
select DISTINCT op.id_a as id, 1 as isoverlap
, STUFF((select distinct ' - ' +aa.srcFile_b
from overlaps_pairs aa
where aa.id_a = op.id_a
or op.id_b=aa.id_a
for xml path ('')),1,3,'') as overlapfiles
from overlaps_pairs op
)
select a.id, a.value, a.date, a.time, a.duration, a.srcFile,
case when d.id is not null then 1 else 0 end as isDup, d.dupFile,
o.isoverlap, o.overlapfiles
from rows a LEFT OUTER JOIN overlaps o
on (a.id=o.id)
LEFT OUTER JOIN duplicities d
on (a.id=d.id);
IF OBJECT_ID('Tempdb..#TempTable') IS NOT NULL
DROP TABLE #TempTable
CREATE TABLE #TempTable
(
[ID] INT NOT NULL ,
[Value] VARCHAR(50) NULL ,
[Date] DATE NULL ,
[Time] TIME(7) NULL ,
[Duration] INT NULL ,
[srcFile] VARCHAR(50) NULL,
)
INSERT #TempTable
( [ID], [Value], [Date], [Time], [Duration], [srcFile] )
VALUES ( 1, N'One', CAST(N'2014-07-29' AS DATE), CAST(N'23:34:00' AS TIME), 1710, N'sF1' ),
( 2, N'One', CAST(N'2014-07-30' AS DATE), CAST(N'00:00:10' AS TIME), 1710, N'sF1' ),
( 3, N'One', CAST(N'2014-07-30' AS DATE), CAST(N'01:30:00' AS TIME), 1710, N'sF1' ),
( 4, N'One', CAST(N'2014-07-30' AS DATE), CAST(N'01:54:00' AS TIME), 1710, N'sF1' ),
( 5, N'One', CAST(N'2014-07-30' AS DATE), CAST(N'13:30:00' AS TIME), 1710, N'sF1' ),
( 6, N'One', CAST(N'2014-07-30' AS DATE), CAST(N'13:57:00' AS TIME), 1710, N'sF2' ),
( 7, N'One', CAST(N'2014-07-30' AS DATE), CAST(N'23:34:00' AS TIME), 1710, N'sF1' ),
( 8, N'One', CAST(N'2014-07-31' AS DATE), CAST(N'00:00:10' AS TIME), 1710, N'sF2' ),
( 9, N'One', CAST(N'2014-07-31' AS DATE), CAST(N'00:10:10' AS TIME), 1710, N'sF3' ),
( 10, N'One', CAST(N'2014-08-01' AS DATE), CAST(N'00:00:00' AS TIME), 1710, N'sF2' ),
( 11, N'One', CAST(N'2014-08-01' AS DATE), CAST(N'00:00:00' AS TIME), 1710, N'sF1' ),
( 12, N'One', CAST(N'2014-08-01' AS DATE), CAST(N'01:00:00' AS TIME), 1710, N'sF3' ),
( 13, N'One', CAST(N'2014-08-01' AS DATE), CAST(N'01:00:00' AS TIME), 1710, N'sF4' ),
( 14, N'Two', CAST(N'2014-08-01' AS DATE), CAST(N'00:01:00' AS TIME), 1710, N'sF2' )
SELECT *
FROM #TempTable
基地Table
ID Value Date Time Duration srcFile
1 One 7/29/2014 23:34:00 1710 sF1
2 One 7/30/2014 0:00:10 1710 sF1
3 One 7/30/2014 1:30:00 1710 sF1
4 One 7/30/2014 1:54:00 1710 sF1
5 One 7/30/2014 13:30:00 1710 sF1
6 One 7/30/2014 13:57:00 1710 sF2
7 One 7/30/2014 23:34:00 1710 sF1
8 One 7/31/2014 0:00:10 1710 sF2
9 One 8/1/2014 0:00:00 1710 sF2
10 Two 8/1/2014 0:01:00 1710 sF2
11 One 8/1/2014 0:00:00 1710 sF1
要求:
当 [Value] + [Date] + [Time] 匹配时 Dup 输出:用 1 标记 isDup 标志,用 srcFile 标记 dupFIle,用于 dup 条件匹配的两个或多个记录。
当[Value]匹配且任意两条或多条记录的[Date]+[Time]都在[Date]+[Time]PLUS(+)[Duration]内时Overlap(注:当所有匹配的记录都是DUP......它们也不能重叠......但重叠可以有至少一个独特的记录和落在持续时间范围内的多个重复)。 输出:用 1 标记 isOverlap 标志,用 srcFile 标记 overlapFile,用于重叠条件匹配的两个或多个记录。
这是我试过的
;WITH dupCTE AS (
SELECT ID, Value, [Date], [Time], Duration, srcFile
,CASE
WHEN COUNT(*) OVER (PARTITION BY Value, [Date], [Time]) > 1 THEN 1
ELSE 0
END AS isDup
,CASE WHEN COUNT(*) OVER (PARTITION BY Value, [Date], [Time]) > 1 THEN STUFF((SELECT ' - ' + srcFile
FROM #TempTable T
WHERE T.Value = TT.Value
AND T.[Date] = TT.[Date]
AND T.[Time] = TT.[Time]
FOR XML PATH('')), 1, 3, '')
ELSE NULL
END AS dupFIle
FROM #TempTable TT
)
, overlapCTE AS (
SELECT A. ID, A.Value, A.[Date], A.[Time], A.Duration, A.srcFile, A.isDup, A.dupFIle
,CASE WHEN B.ID IS NOT NULL THEN 1
ELSE 0
END AS 'isOverlap'
,CASE WHEN b.ID IS NOT NULL THEN STUFF((SELECT ' - ' + srcFile
FROM #TempTable T
WHERE T.Value = A.Value
AND ((CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2) > CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2) AND CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2) < DATEADD(SECOND, A.Duration, CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2)))
OR (CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2) > CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2) AND CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2) < DATEADD(SECOND, B.Duration, CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2))))
FOR XML PATH('')), 1, 3, '')
ELSE NULL
END AS 'overlapFiles'
FROM dupCTE A LEFT JOIN dupCTE B
ON A.Value = B.Value
AND ((CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2) > CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2) AND CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2) < DATEADD(SECOND, A.Duration, CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2)))
OR (CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2) > CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2) AND CAST(CAST(A.[Date] AS VARCHAR(10)) + ' ' + CAST(A.[Time] AS VARCHAR(16)) AS DateTime2) < DATEADD(SECOND, B.Duration, CAST(CAST(B.[Date] AS VARCHAR(10)) + ' ' + CAST(B.[Time] AS VARCHAR(16)) AS DateTime2))))
WHERE A.isDup = 1 OR
B.ID IS NOT NULL
)
SELECT * FROM overlapCTE
DROP TABLE #TempTable
当前输出
ID Value Date Time Duration srcFile isDup dupFIle isOverlap overlapFiles
1 One 2014-07-29 23:34:00 1710 sF1 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
2 One 2014-07-30 00:00:10 1710 sF1 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
3 One 2014-07-30 01:30:00 1710 sF1 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
4 One 2014-07-30 01:54:00 1710 sF1 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
5 One 2014-07-30 13:30:00 1710 sF1 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
6 One 2014-07-30 13:57:00 1710 sF2 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
7 One 2014-07-30 23:34:00 1710 sF1 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
8 One 2014-07-31 00:00:10 1710 sF2 0 NULL 1 sF1 - sF1 - sF1 - sF1 - sF1 - sF2 - sF1 - sF2 - sF2 - sF1
9 One 2014-08-01 00:00:00 1710 sF2 1 sF2 - sF1 0 NULL
11 One 2014-08-01 00:00:00 1710 sF1 1 sF2 - sF1 0 NULL
期望的输出
ID Value Date Time Duration srcFile isDup dupFIle isOverLap overlapFile
1 One 2014-07-29 24:34:00 1710 sF1 0 NULL 1 sF1 - sF1
2 One 2014-07-30 00:00:10 1710 sF1 0 NULL 1 sF1 - sF1
3 One 2014-07-30 01:30:00 1710 sF1 0 NULL 1 sF1 - sF1
4 One 2014-07-30 01:54:00 1710 sF1 0 NULL 1 sF1 - sF1
5 One 2014-07-30 13:30:00 1710 sF1 0 NULL 1 sF1 - sF2
6 One 2014-07-30 13:57:00 1710 sF2 0 NULL 1 sF2 - sF1
7 One 2014-07-30 24:34:00 1710 sF1 0 NULL 1 sF1 - sF2
8 One 2014-07-31 00:00:10 1710 sF2 0 NULL 1 sF2 - sF1
9 One 2014-08-01 00:00:00 1710 sF2 1 sF2 - sF1 0 NULL
10 Two 2014-08-01 00:01:00 1710 sF2 0 NULL 0 NULL
11 One 2014-08-01 00:00:00 1710 sF1 1 sF1 - sF2 0 NULL
我不符合要求。任何帮助将不胜感激。
谢谢
更新: 添加电流输出
更新2: 在 Dup CTE 中发现错误(使用 ID 而不是 Value)。 所需的输出仍然有待改进。
更新3: 进步人士,我们非常接近。现在重叠逻辑是 "working." 一个主要问题是重叠文件。它应该只列出相互重叠的记录的文件(现在,它列出了 overlapCTE 输出的所有文件,而不是专门列出那些在 STUFF 查询中满足 WHERE 的文件)。另外,有没有办法列出该唯一记录?
更新4:添加了更多记录以查看重复和重叠查询是否可以容纳不止两条记录。
我觉得我被吸引到在可能不需要的地方使用 CTE。我不知道你的用例,但这个 table 结构在我看来很奇怪;这是作业吗?无论如何,这应该可以解决问题:
WITH
tFullCTE AS (
SELECT ID as ID, Value as Value, [Date] as Date, [Time] as Time, CAST(CAST([Date] AS VARCHAR(10)) + ' ' + CAST([Time] AS VARCHAR(16)) AS DateTime2) as DateTime, Duration as Duration, srcFile as srcFile
FROM #TempTable TT
)
,dupCTE AS (
SELECT main.ID as FirstID, dups.ID as SecondID
FROM tFullCTE main
INNER join tFullCTE dups on main.value = dups.value and main.DateTime = dups.DateTime and main.id <> dups.id
)
,
overlapCTE AS (
SELECT main.ID as FirstID, ovlp.ID as SecondID
FROM tFullCTE main
INNER JOIN tFullCTE ovlp
ON main.Value = ovlp.Value
AND ((ovlp.DateTime > main.DateTime AND ovlp.DateTime < DATEADD(SECOND, main.Duration, main.DateTime))
OR (main.DateTime > ovlp.DateTime AND main.DateTime < DATEADD(SECOND, ovlp.Duration, ovlp.DateTime)))
AND main.ID <> ovlp.ID
)
SELECT main.*,
CASE WHEN EXISTS (SELECT dup.SecondID FROM dupCTE dup WHERE dup.FirstID = main.ID) THEN 1 ELSE 0 END as isDup,
STUFF(( SELECT DISTINCT ' - ' + dupDetails.srcFile
FROM dupCTE dup
INNER JOIN tFullCTE dupDetails on dup.SecondID = dupDetails.ID
WHERE dup.FirstID = main.ID
FOR XML PATH('')), 1, 3, '') as dupFile,
CASE WHEN EXISTS (SELECT ovlp.SecondID FROM overlapCTE ovlp WHERE ovlp.FirstID = main.ID) THEN 1 ELSE 0 END as isOverlap,
STUFF(( SELECT DISTINCT ' - ' + ovlpDetails.srcFile
FROM overlapCTE ovlp
INNER JOIN tFullCTE ovlpDetails on ovlp.SecondID = ovlpDetails.ID
WHERE ovlp.FirstID = main.ID
FOR XML PATH('')), 1, 3, '') as overlapFile
FROM tFullCTE main
;WITH ModifiedData AS(
SELECT
*,
[DateTime] = DATEADD(S, DATEDIFF(S, 0, [Time]), CAST([Date] AS DATETIME)),
DateTimeWithDuration = DATEADD(S, Duration, DATEADD(S, DATEDIFF(S, 0, [Time]), CAST([Date] AS DATETIME)))
FROM #TempTable
)
, Flags AS(
SELECT
m.*,
isDup = CASE
WHEN COUNT(*) OVER(PARTITION BY m.Value, m.[DateTime]) > 1 THEN 1
ELSE 0
END,
dupFile = CASE
WHEN COUNT(*) OVER(PARTITION BY m.Value, m.[DateTime]) > 1 THEN
STUFF((
SELECT ' - ' + srcFile
FROM ModifiedData
WHERE
Value = m.Value
AND m.DateTime = [DateTime]
FOR XML PATH('')), 1, 3, '')
ELSE NULL
END,
isOverlap = CASE
WHEN COUNT(m2.Value) > 0 THEN 1
ELSE 0
END
FROM ModifiedData m
LEFT JOIN ModifiedData m2
ON m2.value = m.Value
AND
(
(m2.DateTime > m.DateTime AND m2.DateTime < m.DateTimeWithDuration)
OR (m.DateTime > m2.DateTime AND m.DateTime < m2.DateTimeWithDuration)
)
AND m2.ID <> m.ID
GROUP BY
m.ID, m2.ID, m.Value, m.Date, m.Time, m.Duration, m.srcFile, m.DateTime, m.DateTimeWithDuration
)
SELECT
f.ID,
f.Value,
f.[Date],
f.[Time],
f.Duration,
f.SrcFile,
dupFile = CASE
WHEN isOverlap = 1 THEN
(SELECT f.srcFile + ' - ' + srcFile
FROM ModifiedData
WHERE
Value = f.Value
AND
(
([DateTime] > f.DateTime AND [DateTime] < f.DateTimeWithDuration)
OR (f.DateTime > [DateTime] AND f.DateTime < DateTimeWithDuration)
)
AND ID <> f.ID
FOR XML PATH(''))
ELSE NULL
END
FROM Flags f
ORDER BY f.ID
这应该能准确地告诉您您想要什么:
With CTE as (Select T.ID ID1, T.srcFile + ' - ' + c.srcFile over1, '1' as isDup from
#TempTable T
INNER JOIN #TempTable c on T.Value = c.Value and c.ID <> T.ID and (Cast(C.Date as datetime) + Cast(C.Time as datetime)) = (Cast(T.Date as datetime) + Cast(T.Time as datetime))),
CTE2 as
(Select T.ID ID1, c.ID ID2, T.srcFile + ' - ' + c.srcFile over1, c.srcFile + ' - ' + T.srcFile over2, '1' as isOverLap from
#TempTable T
INNER JOIN #TempTable c on T.Value = c.Value and c.ID <> T.ID
Where DateAdd(second, c.Duration, Cast(C.Date as datetime) + Cast(C.Time as datetime)) > (Cast(T.Date as datetime) + Cast(T.Time as datetime)) and (Cast(C.Date as datetime) + Cast(C.Time as datetime)) < (Cast (T.Date as datetime) + Cast(T.Time as datetime)))
Select T.*, ISNULL((Select top 1 c.isDup from CTE c where c.ID1 = T.ID) ,0) isDup
,(Select substring((select ',' + c1.over1 as [text()] from CTE c1 where c1.ID1 = T.ID for xml path ('')),2,1000)) dupFile
,ISNULL((select Top 1 case isOverLap when 1 then 1 else 0 end from CTE2 c where c.ID1 = T.ID or C.ID2 = T.ID),0) isOverLap
,(Select substring((select case when T.ID = C.ID1 then ',' + c.over1 else ',' + c.over2 end as [text()] from CTE2 c where c.ID1 = T.ID or C.ID2 = T.ID for xml path('')),2,1000)) OverlapFile
from #TempTable T
代码符合您的要求(希望如此)。 我通过添加更多的重叠和重复来测试它,它不仅适用于 2 个重复的重叠文件(例如 srcFile='sF3'),而且具有以下观察结果:
- DupFile - 始终按文件名排序的列表
- overlapfile - 如果只有一个文件,则没有一对 "sF1 - sF1",只有 "sF1" - 我不确定这是否是生产目的所必需的,但可以进行调整(在这种情况下还没有)
with rows
(
select [ID],[Value], [Date], [Time], [Duration], [srcFile],
cast(cast([date] as varchar(10))+' ' +cast(time as varchar(8)) as datetime) as datetime,
dateadd(ss,-duration,cast(cast([date] as varchar(10))+' ' +cast(time as varchar(8)) as datetime)) as date_from,
dateadd(ss,duration,cast(cast([date] as varchar(10))+' ' +cast(time as varchar(8)) as datetime)) as date_to
from #TempTable
)
, dups
as
(
SELECT [value], [Date], [Time]
FROM rows
group by [value], [Date], [Time]
having count([ID])>1
)
, dups_files
as
(
select r.*
,
STUFF((select ' - '+d.srcFile
FROM rows as d
WHERE (r.[value]=d.value and r.time=d.time
and r.date=d.date)
order by d.srcFile
FOR xml path('')),1,3,'') as dupFile
FROM dups r
)
, duplicities
as
(
select a.id, d.dupFile
from rows a join dups_files d
on ( a.value=d.value and a.date=d.date
and a.time=d.time)
)
, overlaps_pairs
as
(
select f.id as id_a, d.id as id_b, f.srcfile as srcfile_a, d.srcfile as srcfile_b
from rows f JOIN rows d
ON (f.id<>d.id
AND f.value=d.value
and f.datetime between d.date_from and d.date_to)
where not exists
(select 1 FROM duplicities du
where f.id=du.id)
)
, overlaps
as
(
select DISTINCT op.id_a as id, 1 as isoverlap
, STUFF((select distinct ' - ' +aa.srcFile_b
from overlaps_pairs aa
where aa.id_a = op.id_a
or op.id_b=aa.id_a
for xml path ('')),1,3,'') as overlapfiles
from overlaps_pairs op
)
select a.id, a.value, a.date, a.time, a.duration, a.srcFile,
case when d.id is not null then 1 else 0 end as isDup, d.dupFile,
o.isoverlap, o.overlapfiles
from rows a LEFT OUTER JOIN overlaps o
on (a.id=o.id)
LEFT OUTER JOIN duplicities d
on (a.id=d.id);