在多个引用表中逐行插入大数据
Insert Large data row by row in multiple reference tables
I went through a lot of posts on SO. However, they do not fit my situation.
我们有一种情况,我们想要将 sqlserver 2017
上的大型数据集存储到多个引用 table 中。
我们已经尝试过 cursor
,它工作正常。但是,我们担心加载大数据(1+百万行)的性能问题
Example
T_Bulk is a input table, T_Bulk_Orignal is destination table and T_Bulk_reference is a reference table for t_Bulk_orignal
create table T_Bulk
(
Id uniqueidentifier,
ElementType nvarchar(max),
[Description] nvarchar(max)
)
create table T_Bulk_orignal
(
Id uniqueidentifier,
ElementType nvarchar(max),
[Description] nvarchar(max)
)
create table T_Bulk_reference
(
Id uniqueidentifier,
Description2 nvarchar(max)
)
create proc UseCursor
(
@udtT_Bulk as dbo.udt_T_Bulk READONLY
)
as
begin
DECLARE @Id uniqueidentifier, @ElementType varchar(500), @Description varchar(500),@Description2 varchar(500)
DECLARE MY_CURSOR CURSOR
LOCAL STATIC READ_ONLY FORWARD_ONLY
FOR
SELECT Id, ElementType, [Description]
FROM dbo.T_BULK
OPEN MY_CURSOR
FETCH NEXT FROM MY_CURSOR INTO @Id, @ElementType, @Description,@Description2
WHILE @@FETCH_STATUS = 0
BEGIN
BEGIN Transaction Trans1
BEgin TRy
IF EXISTS (select Id from T_Bulk_orignal where ElementType=@ElementType and Description=@Description)
select @Id = Id from T_Bulk_orignal where ElementType=@ElementType and Description=@Description
ELSE
BEGIN
insert T_Bulk_orignal(Id,ElementType,Description) values (@id, @ElementType,@Description)
END
INSERT T_Bulk_reference(Id,description2)
SELECT Id, Description2
FROM (select @Id as Id, @Description2 as Description2) F
WHERE NOT EXISTS (SELECT * FROM T_Bulk_reference C WHERE C.Id = F.Id and C.Description2 = F.Description2);
COMMIT TRANSACTION [DeleteTransaction]
FETCH NEXT FROM MY_CURSOR INTO @Id, @ElementType, @Description,@Description2
END TRY
BEGIN CATCH
ROLLBACK TRANSACTION [Trans1]
SELECT @@Error
END CATCH
END
CLOSE MY_CURSOR
DEALLOCATE MY_CURSOR
end
We want this operation to execute in one go like bulk insertion however we also need to crosscheck any data discrepancy and if one row is not able to insert we need to rollback only that specific record
批量插入的唯一问题是存在参考 table 数据。
请就此提出最佳方法
这听起来像是 SSIS(SQL 服务器集成服务)的工作。
https://docs.microsoft.com/en-us/sql/integration-services/ssis-how-to-create-an-etl-package
在 SSIS 中,您可以创建一个可以进行引用检查的数据迁移作业。您可以将其设置为在每个阶段失败、警告或忽略错误。在此 google 上查找 ETL 和 SSIS 的资源。
我已经在 50+ 百万行上完成了像你这样的工作。
当然需要一段时间,并且它会在出现错误时回滚所有内容(如果这样设置),但它是完成此类工作的最佳工具。
我找到了一个上传大文件的解决方案,就像批量插入一样。
SQL 中存在 Merge
语句。
The MERGE statement is used to make changes in one table based on
values matched from anther. It can be used to combine insert,
update, and delete operations into one statement
因此我们可以使用 DataTable
将数据传递给 StoredProcedure
,然后源将是您的 UserDefinedDataTable
,目标将是您的实际 SQL Table
I went through a lot of posts on SO. However, they do not fit my situation.
我们有一种情况,我们想要将 sqlserver 2017
上的大型数据集存储到多个引用 table 中。
我们已经尝试过 cursor
,它工作正常。但是,我们担心加载大数据(1+百万行)的性能问题
Example
T_Bulk is a input table, T_Bulk_Orignal is destination table and T_Bulk_reference is a reference table for t_Bulk_orignal
create table T_Bulk
(
Id uniqueidentifier,
ElementType nvarchar(max),
[Description] nvarchar(max)
)
create table T_Bulk_orignal
(
Id uniqueidentifier,
ElementType nvarchar(max),
[Description] nvarchar(max)
)
create table T_Bulk_reference
(
Id uniqueidentifier,
Description2 nvarchar(max)
)
create proc UseCursor
(
@udtT_Bulk as dbo.udt_T_Bulk READONLY
)
as
begin
DECLARE @Id uniqueidentifier, @ElementType varchar(500), @Description varchar(500),@Description2 varchar(500)
DECLARE MY_CURSOR CURSOR
LOCAL STATIC READ_ONLY FORWARD_ONLY
FOR
SELECT Id, ElementType, [Description]
FROM dbo.T_BULK
OPEN MY_CURSOR
FETCH NEXT FROM MY_CURSOR INTO @Id, @ElementType, @Description,@Description2
WHILE @@FETCH_STATUS = 0
BEGIN
BEGIN Transaction Trans1
BEgin TRy
IF EXISTS (select Id from T_Bulk_orignal where ElementType=@ElementType and Description=@Description)
select @Id = Id from T_Bulk_orignal where ElementType=@ElementType and Description=@Description
ELSE
BEGIN
insert T_Bulk_orignal(Id,ElementType,Description) values (@id, @ElementType,@Description)
END
INSERT T_Bulk_reference(Id,description2)
SELECT Id, Description2
FROM (select @Id as Id, @Description2 as Description2) F
WHERE NOT EXISTS (SELECT * FROM T_Bulk_reference C WHERE C.Id = F.Id and C.Description2 = F.Description2);
COMMIT TRANSACTION [DeleteTransaction]
FETCH NEXT FROM MY_CURSOR INTO @Id, @ElementType, @Description,@Description2
END TRY
BEGIN CATCH
ROLLBACK TRANSACTION [Trans1]
SELECT @@Error
END CATCH
END
CLOSE MY_CURSOR
DEALLOCATE MY_CURSOR
end
We want this operation to execute in one go like bulk insertion however we also need to crosscheck any data discrepancy and if one row is not able to insert we need to rollback only that specific record
批量插入的唯一问题是存在参考 table 数据。
请就此提出最佳方法
这听起来像是 SSIS(SQL 服务器集成服务)的工作。 https://docs.microsoft.com/en-us/sql/integration-services/ssis-how-to-create-an-etl-package
在 SSIS 中,您可以创建一个可以进行引用检查的数据迁移作业。您可以将其设置为在每个阶段失败、警告或忽略错误。在此 google 上查找 ETL 和 SSIS 的资源。
我已经在 50+ 百万行上完成了像你这样的工作。
当然需要一段时间,并且它会在出现错误时回滚所有内容(如果这样设置),但它是完成此类工作的最佳工具。
我找到了一个上传大文件的解决方案,就像批量插入一样。
SQL 中存在 Merge
语句。
The MERGE statement is used to make changes in one table based on values matched from anther. It can be used to combine insert, update, and delete operations into one statement
因此我们可以使用 DataTable
将数据传递给 StoredProcedure
,然后源将是您的 UserDefinedDataTable
,目标将是您的实际 SQL Table