在 SQL 服务器中并行导入数据
Importing Data in Parallel in SQL Server
我有 100 多个文件要导入到 sql 服务器中,其中大部分是 500 MB。我想利用 SQL 服务器的并行导入实用程序并阅读了许多网页,例如以下网页:
如何在 30 分钟内加载 1TB 数据
https://technet.microsoft.com/en-us/library/dd537533(v=sql.100).aspx
使用 Table 级别锁定并行导入数据
https://technet.microsoft.com/en-us/library/ms186341(v=sql.105).aspx
控制批量导入的锁定行为
https://technet.microsoft.com/en-us/library/ms180876(v=sql.105).aspx
以及 Whosebug 中的答案
Fastest way to insert in parallel to a single table
然而,none 他们给出了一个简单的代码示例。我知道如何使用批量 insert/bcp,但我不知道从哪里开始并行导入?有人可以帮我吗?
我的系统是Windows,我使用的是SQL服务器2016。源数据文件是txt格式。
在此先感谢您的帮助!
贾森
将文件路径详细信息加载到跟踪中table
Create table FileListCollection TABLE (Id int identity(1,1), filepath VARCHAR(500), ThreadNo tinyint, isLoaded int)
DECLARE @FileListCollection TABLE (filepath VARCHAR(500))
DECLARE @folderpath NVARCHAR(500)
DECLARE @cmd NVARCHAR(100)
SET @folderpath = '<FolderPath>'
SET @cmd = 'dir ' + @folderpath + ' /b /s'
INSERT INTO @FileListCollection
EXECUTE xp_cmdshell @cmd
DELETE
FROM @FileListCollection
WHERE filepath IS NULL
insert into FileListCollection(filepath, isLoaded)
select filepath, 0
from @FileListCollection
每个线程的调度
declare @ThreadNo int = 3
update f set ThreadNo=(id%@ThreadNo)
from FileListCollection f
打开三个会话并为每个会话分配线程号
运行 下面的脚本加载数据
DECLARE @filepath NVARCHAR(500)
DECLARE @filepath NVARCHAR(500)
DECLARE @bcpquery NVARCHAR(MAX);
DECLARE @ThreadNo int = 1
WHILE EXISTS (
SELECT TOP 1 *
FROM FileListCollection
where ThreadNo = @ThreadNo
and isLoaded = 0
)
BEGIN
SELECT TOP 1 @filepath = filepath
FROM FileListCollection
where ThreadNo = @ThreadNo
and isLoaded = 0
SET @bcpquery = 'bulk insert <Database>.dbo.Table from '''+ @filepath+''' with (fieldterminator = ''|'', rowterminator = ''\n'')';
print @bcpquery
--Load the Content in table
execute sp_executesql @bcpquery;
Update FileListCollection set isLoaded = 1
WHERE filepath = @filepath
END
我有 100 多个文件要导入到 sql 服务器中,其中大部分是 500 MB。我想利用 SQL 服务器的并行导入实用程序并阅读了许多网页,例如以下网页:
如何在 30 分钟内加载 1TB 数据
https://technet.microsoft.com/en-us/library/dd537533(v=sql.100).aspx
使用 Table 级别锁定并行导入数据
https://technet.microsoft.com/en-us/library/ms186341(v=sql.105).aspx
控制批量导入的锁定行为
https://technet.microsoft.com/en-us/library/ms180876(v=sql.105).aspx
以及 Whosebug 中的答案
Fastest way to insert in parallel to a single table
然而,none 他们给出了一个简单的代码示例。我知道如何使用批量 insert/bcp,但我不知道从哪里开始并行导入?有人可以帮我吗?
我的系统是Windows,我使用的是SQL服务器2016。源数据文件是txt格式。
在此先感谢您的帮助!
贾森
将文件路径详细信息加载到跟踪中table
Create table FileListCollection TABLE (Id int identity(1,1), filepath VARCHAR(500), ThreadNo tinyint, isLoaded int) DECLARE @FileListCollection TABLE (filepath VARCHAR(500)) DECLARE @folderpath NVARCHAR(500) DECLARE @cmd NVARCHAR(100) SET @folderpath = '<FolderPath>' SET @cmd = 'dir ' + @folderpath + ' /b /s' INSERT INTO @FileListCollection EXECUTE xp_cmdshell @cmd DELETE FROM @FileListCollection WHERE filepath IS NULL insert into FileListCollection(filepath, isLoaded) select filepath, 0 from @FileListCollection
每个线程的调度
declare @ThreadNo int = 3 update f set ThreadNo=(id%@ThreadNo) from FileListCollection f
打开三个会话并为每个会话分配线程号
运行 下面的脚本加载数据
DECLARE @filepath NVARCHAR(500) DECLARE @filepath NVARCHAR(500) DECLARE @bcpquery NVARCHAR(MAX); DECLARE @ThreadNo int = 1 WHILE EXISTS ( SELECT TOP 1 * FROM FileListCollection where ThreadNo = @ThreadNo and isLoaded = 0 ) BEGIN SELECT TOP 1 @filepath = filepath FROM FileListCollection where ThreadNo = @ThreadNo and isLoaded = 0 SET @bcpquery = 'bulk insert <Database>.dbo.Table from '''+ @filepath+''' with (fieldterminator = ''|'', rowterminator = ''\n'')'; print @bcpquery --Load the Content in table execute sp_executesql @bcpquery; Update FileListCollection set isLoaded = 1 WHERE filepath = @filepath END