识别 SSIS SQL 服务器中 SCD 类型 2 中更改的列
Identify the Column changed in SCD Type 2 in SSIS SQL server
我正在使用 SSIS 包跟踪我的 SCD table 中的数据,如下图所示。
我需要添加一个新列,即“已更新列”(如上所示),它表示在 N 和 N-1 事务之间更新了哪些列。这可以通过 Cursor 来实现,但是我正在寻找以有效方式执行此操作的建议。是否可以在 SCD 或任何其他内置 SQL 服务器功能中执行?
添加脚本:
Create table SCDtest
(
id int ,
empid int ,
Deptid varchar(10),
Ename varchar(50),
DeptName varchar(50),
city varchar(50),
startdate datetime,
Enddate datetime ,
ColumnUpdated varchar(500)
)
Insert into SCDtest values (1, 1, 'D1', 'Mike', 'Account', 'Atlanta', '7/31/2020', '8/3/2020','' )
Insert into SCDtest values (2, 2, 'D2', 'Roy', 'IT', 'New York', '7/31/2020', '8/5/2020','' )
Insert into SCDtest values (3, 1, 'D1', 'Ross', 'Account', 'Atlanta', '8/4/2020', '8/7/2020','' )
Insert into SCDtest values (4, 2, 'D2', 'Roy', 'IT', 'Los angeles', '8/5/2020',NULL ,'' )
Insert into SCDtest values (5, 1, 'D1', 'John', 'Marketing', 'Boston', '8/8/2020', NULL,'')
谢谢
老实说,我真的不知道您为什么需要此功能,因为您可以非常轻松地查看这两行以查看任何更改,如果您确实需要查看它们的话。我从来不需要 ColumnUpdated
类型值,我认为生成一个值所需的处理和保存数据的存储不值得拥有它。
也就是说,这是一种可以根据给定测试数据计算所需输出的方法。理想情况下,作为 ETL 过程的一部分,您将以更有效的方式执行此操作,该过程在行进入时更新而不是一次更新所有行。虽然这显然需要有关您的 ETL 的信息,但您没有包含在您的问题中:
查询
declare @SCDtest table(id int,empid int,Deptid varchar(10),Ename varchar(50),DeptName varchar(50),city varchar(50),startdate datetime,Enddate datetime);
Insert into @SCDtest values(1, 1, 'D1', 'Mike', 'Account', 'Atlanta', '7/31/2020', '8/3/2020'),(2, 2, 'D2', 'Roy', 'IT', 'New York', '7/31/2020', '8/5/2020'),(3, 1, 'D1', 'Ross', 'Account', 'Atlanta', '8/4/2020', '8/7/2020'),(4, 2, 'D2', 'Roy', 'IT', 'Los angeles', '8/5/2020',NULL),(5, 1, 'D1', 'John', 'Marketing', 'Boston', '8/8/2020', NULL);
with l as
(
select *
,lag(id,1) over (partition by empid order by id) as l
from @SCDtest
)
select l.id
,l.empid
,l.Deptid
,l.Ename
,l.DeptName
,l.city
,l.startdate
,l.Enddate
,stuff(concat(case when l.Deptid <> t.Deptid then ', Deptid' end
,case when l.Ename <> t.Ename then ', Ename' end
,case when l.DeptName <> t.DeptName then ', DeptName' end
,case when l.city <> t.city then ', city' end
)
,1,2,''
) as ColumnUpdated
from l
left join @SCDtest as t
on l.l = t.id
order by l.empid
,l.startdate;
输出
+----+-------+--------+-------+-----------+-------------+-------------------------+-------------------------+-----------------------+
| id | empid | Deptid | Ename | DeptName | city | startdate | Enddate | ColumnUpdated |
+----+-------+--------+-------+-----------+-------------+-------------------------+-------------------------+-----------------------+
| 1 | 1 | D1 | Mike | Account | Atlanta | 2020-07-31 00:00:00.000 | 2020-08-03 00:00:00.000 | NULL |
| 3 | 1 | D1 | Ross | Account | Atlanta | 2020-08-04 00:00:00.000 | 2020-08-07 00:00:00.000 | Ename |
| 5 | 1 | D1 | John | Marketing | Boston | 2020-08-08 00:00:00.000 | NULL | Ename, DeptName, city |
| 2 | 2 | D2 | Roy | IT | New York | 2020-07-31 00:00:00.000 | 2020-08-05 00:00:00.000 | NULL |
| 4 | 2 | D2 | Roy | IT | Los angeles | 2020-08-05 00:00:00.000 | NULL | city |
+----+-------+--------+-------+-----------+-------------+-------------------------+-------------------------+-----------------------+
我正在使用 SSIS 包跟踪我的 SCD table 中的数据,如下图所示。
我需要添加一个新列,即“已更新列”(如上所示),它表示在 N 和 N-1 事务之间更新了哪些列。这可以通过 Cursor 来实现,但是我正在寻找以有效方式执行此操作的建议。是否可以在 SCD 或任何其他内置 SQL 服务器功能中执行?
添加脚本:
Create table SCDtest
(
id int ,
empid int ,
Deptid varchar(10),
Ename varchar(50),
DeptName varchar(50),
city varchar(50),
startdate datetime,
Enddate datetime ,
ColumnUpdated varchar(500)
)
Insert into SCDtest values (1, 1, 'D1', 'Mike', 'Account', 'Atlanta', '7/31/2020', '8/3/2020','' )
Insert into SCDtest values (2, 2, 'D2', 'Roy', 'IT', 'New York', '7/31/2020', '8/5/2020','' )
Insert into SCDtest values (3, 1, 'D1', 'Ross', 'Account', 'Atlanta', '8/4/2020', '8/7/2020','' )
Insert into SCDtest values (4, 2, 'D2', 'Roy', 'IT', 'Los angeles', '8/5/2020',NULL ,'' )
Insert into SCDtest values (5, 1, 'D1', 'John', 'Marketing', 'Boston', '8/8/2020', NULL,'')
谢谢
老实说,我真的不知道您为什么需要此功能,因为您可以非常轻松地查看这两行以查看任何更改,如果您确实需要查看它们的话。我从来不需要 ColumnUpdated
类型值,我认为生成一个值所需的处理和保存数据的存储不值得拥有它。
也就是说,这是一种可以根据给定测试数据计算所需输出的方法。理想情况下,作为 ETL 过程的一部分,您将以更有效的方式执行此操作,该过程在行进入时更新而不是一次更新所有行。虽然这显然需要有关您的 ETL 的信息,但您没有包含在您的问题中:
查询
declare @SCDtest table(id int,empid int,Deptid varchar(10),Ename varchar(50),DeptName varchar(50),city varchar(50),startdate datetime,Enddate datetime);
Insert into @SCDtest values(1, 1, 'D1', 'Mike', 'Account', 'Atlanta', '7/31/2020', '8/3/2020'),(2, 2, 'D2', 'Roy', 'IT', 'New York', '7/31/2020', '8/5/2020'),(3, 1, 'D1', 'Ross', 'Account', 'Atlanta', '8/4/2020', '8/7/2020'),(4, 2, 'D2', 'Roy', 'IT', 'Los angeles', '8/5/2020',NULL),(5, 1, 'D1', 'John', 'Marketing', 'Boston', '8/8/2020', NULL);
with l as
(
select *
,lag(id,1) over (partition by empid order by id) as l
from @SCDtest
)
select l.id
,l.empid
,l.Deptid
,l.Ename
,l.DeptName
,l.city
,l.startdate
,l.Enddate
,stuff(concat(case when l.Deptid <> t.Deptid then ', Deptid' end
,case when l.Ename <> t.Ename then ', Ename' end
,case when l.DeptName <> t.DeptName then ', DeptName' end
,case when l.city <> t.city then ', city' end
)
,1,2,''
) as ColumnUpdated
from l
left join @SCDtest as t
on l.l = t.id
order by l.empid
,l.startdate;
输出
+----+-------+--------+-------+-----------+-------------+-------------------------+-------------------------+-----------------------+
| id | empid | Deptid | Ename | DeptName | city | startdate | Enddate | ColumnUpdated |
+----+-------+--------+-------+-----------+-------------+-------------------------+-------------------------+-----------------------+
| 1 | 1 | D1 | Mike | Account | Atlanta | 2020-07-31 00:00:00.000 | 2020-08-03 00:00:00.000 | NULL |
| 3 | 1 | D1 | Ross | Account | Atlanta | 2020-08-04 00:00:00.000 | 2020-08-07 00:00:00.000 | Ename |
| 5 | 1 | D1 | John | Marketing | Boston | 2020-08-08 00:00:00.000 | NULL | Ename, DeptName, city |
| 2 | 2 | D2 | Roy | IT | New York | 2020-07-31 00:00:00.000 | 2020-08-05 00:00:00.000 | NULL |
| 4 | 2 | D2 | Roy | IT | Los angeles | 2020-08-05 00:00:00.000 | NULL | city |
+----+-------+--------+-------+-----------+-------------+-------------------------+-------------------------+-----------------------+