按时间戳合并事实记录
Consolidating fact records by timestamp
我在 Microsoft SQL 服务器数据库中。我想合并员工的开始和结束时间。例如,给定以下简单的 table:
create table dbo.foo (
employee_key bigint,
effective_start_date date,
effective_end_date date,
account_name varchar(100));
insert into foo (employee_key,
effective_start_date,
effective_end_date,
account_name)
values (1
,'2017-01-01'
,'2017-01-31'
,'Google')
,(1
,'2017-02-01'
,'2017-02-28'
,'Apple')
,(1
,'2017-03-01'
,'2017-03-31'
,'Google')
,(1
,'2017-04-01'
,'9999-12-31'
,'Google')
该员工已多次移动帐户。我想在 2 月保持更改并再次返回。但是,我不想在最后看到 2 条记录。相反,我希望从 2017-03-01 到 9999-12-31 Google 到 运行 的任期。
顺便说一句,这是一个例子。也就是说,"broken" 记录并不总是在员工任期结束时。
预期结果:
employee_key | effective_start_date | effective_end_date | account_name
1 | 2017-01-01 | 2017-01-31 | Google
1 | 2017-02-01 | 2017-02-28 | Apple
1 | 2017-03-01 | 9999-12-31 | Google
你可以试试这个SQL
-- Table to group by
SELECT Row_number()
OVER (
ORDER BY employee_key, effective_start_date, effective_end_date) RN,
*,
Cast(NULL AS BIGINT)
GroupBy
INTO #tmp
FROM #foo
-- Variables
DECLARE @GroupBy BIGINT = 0
DECLARE @lastEmployee_key BIGINT
DECLARE @lastAccount_Name VARCHAR(100)
--- Creating the data group
UPDATE #tmp
SET @GroupBy = groupby = CASE
WHEN employee_key = Isnull(@lastEmployee_key, -1)
AND account_name = Isnull(@lastAccount_Name,
'')
THEN @GroupBy
ELSE @GroupBy + 1
END,
@lastEmployee_key = employee_key = employee_key,
@lastAccount_Name = account_name = account_name
--- Output
SELECT employee_key,
Min(effective_start_date) effective_start_date,
Max(effective_end_date) effective_end_date,
account_name
FROM #tmp
GROUP BY groupby,
employee_key,
account_name
结果是
employee_key | effective_start_date|effective_end_date | account_name
-------------| --------------------|------------------ | -------------
1 | 2017-01-01 |2017-01-31 | Google
1 | 2017-02-01 |2017-02-28 | Apple
1 | 2017-03-01 |9999-12-31 | Google
您可以使用 row_number 并获得此结果...我使用了两个子查询,但没有它也可以解决:
;With Cte as (
select *, Bucket = sum(accn) over(partition by employee_key order by effective_start_date)
from (
Select *,case when account_name <> lag(account_name) over(partition by employee_key order by effective_start_date)
then 1 else 0 end as AccN
from foo ) A
)
Select top (1) with ties employee_key, Min(effective_start_date) over(partition by employee_key,Bucket) AS [effective_start_date],
Max(effective_end_date) over(partition by employee_key,Bucket) AS [effective_end_date], account_name
from cte
order by row_number() over(partition by employee_key, bucket order by effective_start_date)
输出如下:
+--------------+----------------------+--------------------+--------------+
| employee_key | Effective_start_Date | Effective_End_date | account_name |
+--------------+----------------------+--------------------+--------------+
| 1 | 2017-01-01 | 2017-01-31 | Google |
| 1 | 2017-02-01 | 2017-02-28 | Apple |
| 1 | 2017-03-01 | 9999-12-31 | Google |
+--------------+----------------------+--------------------+--------------+
我在 Microsoft SQL 服务器数据库中。我想合并员工的开始和结束时间。例如,给定以下简单的 table:
create table dbo.foo (
employee_key bigint,
effective_start_date date,
effective_end_date date,
account_name varchar(100));
insert into foo (employee_key,
effective_start_date,
effective_end_date,
account_name)
values (1
,'2017-01-01'
,'2017-01-31'
,'Google')
,(1
,'2017-02-01'
,'2017-02-28'
,'Apple')
,(1
,'2017-03-01'
,'2017-03-31'
,'Google')
,(1
,'2017-04-01'
,'9999-12-31'
,'Google')
该员工已多次移动帐户。我想在 2 月保持更改并再次返回。但是,我不想在最后看到 2 条记录。相反,我希望从 2017-03-01 到 9999-12-31 Google 到 运行 的任期。
顺便说一句,这是一个例子。也就是说,"broken" 记录并不总是在员工任期结束时。
预期结果:
employee_key | effective_start_date | effective_end_date | account_name
1 | 2017-01-01 | 2017-01-31 | Google
1 | 2017-02-01 | 2017-02-28 | Apple
1 | 2017-03-01 | 9999-12-31 | Google
你可以试试这个SQL
-- Table to group by
SELECT Row_number()
OVER (
ORDER BY employee_key, effective_start_date, effective_end_date) RN,
*,
Cast(NULL AS BIGINT)
GroupBy
INTO #tmp
FROM #foo
-- Variables
DECLARE @GroupBy BIGINT = 0
DECLARE @lastEmployee_key BIGINT
DECLARE @lastAccount_Name VARCHAR(100)
--- Creating the data group
UPDATE #tmp
SET @GroupBy = groupby = CASE
WHEN employee_key = Isnull(@lastEmployee_key, -1)
AND account_name = Isnull(@lastAccount_Name,
'')
THEN @GroupBy
ELSE @GroupBy + 1
END,
@lastEmployee_key = employee_key = employee_key,
@lastAccount_Name = account_name = account_name
--- Output
SELECT employee_key,
Min(effective_start_date) effective_start_date,
Max(effective_end_date) effective_end_date,
account_name
FROM #tmp
GROUP BY groupby,
employee_key,
account_name
结果是
employee_key | effective_start_date|effective_end_date | account_name
-------------| --------------------|------------------ | -------------
1 | 2017-01-01 |2017-01-31 | Google
1 | 2017-02-01 |2017-02-28 | Apple
1 | 2017-03-01 |9999-12-31 | Google
您可以使用 row_number 并获得此结果...我使用了两个子查询,但没有它也可以解决:
;With Cte as (
select *, Bucket = sum(accn) over(partition by employee_key order by effective_start_date)
from (
Select *,case when account_name <> lag(account_name) over(partition by employee_key order by effective_start_date)
then 1 else 0 end as AccN
from foo ) A
)
Select top (1) with ties employee_key, Min(effective_start_date) over(partition by employee_key,Bucket) AS [effective_start_date],
Max(effective_end_date) over(partition by employee_key,Bucket) AS [effective_end_date], account_name
from cte
order by row_number() over(partition by employee_key, bucket order by effective_start_date)
输出如下:
+--------------+----------------------+--------------------+--------------+
| employee_key | Effective_start_Date | Effective_End_date | account_name |
+--------------+----------------------+--------------------+--------------+
| 1 | 2017-01-01 | 2017-01-31 | Google |
| 1 | 2017-02-01 | 2017-02-28 | Apple |
| 1 | 2017-03-01 | 9999-12-31 | Google |
+--------------+----------------------+--------------------+--------------+