如何一次性将共享目录和类似命名方案的一堆 csv 导入到单个 table
How to import a bunch of csv that share a directory and similar naming scheme into a single table in one go
自学经验不足,还请多多包涵
我正在使用 SQL Server 2016。我进行了以下查询以将一些 CSV 文件导入 SQL。你看到的table就是他们应该进入的table。
USE [open secrets]
CREATE TABLE [dbo].[Lobbyists] (
[uniqID] [varchar] (36) NOT NULL,
[lobbyist] [varchar] (50) NULL,
[lobbyist_raw] [varchar] (50) NULL,
[lobbyist_id] [char] (12) NULL,
[year] [char] (4) NULL,
[Official Position] [varchar] (254) NULL,
[cid] [char] (9) NULL,
[formercongmem] [char] (1) NULL
) ON [PRIMARY]
BULK
INSERT Lobbyists
FROM 'C:\aaa open secrets\Lobby\LOB_LOBBYIST\??????????????????'
WITH
(
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
GO
SELECT*FROM Lobbyists
以下是我要导入的文件:
如您所见,它们都位于目录中并且具有相似的命名方案。我需要导入从 lob_lobbyist000 一直到 lob_lobbyist123 的所有内容。没有遗漏的数字。
我的目标是一次导入它们,而不是只使用 SQL Server Management Studio 一次导入一个。请告诉我正确的查询来完成这个。对查询中发生的事情的解释也将不胜感激。
更新
当前查询:
USE [open secrets]
CREATE TABLE [dbo].[Lobbyists] (
[uniqID] [varchar] (36) NOT NULL,
[lobbyist] [varchar] (50) NULL,
[lobbyist_raw] [varchar] (50) NULL,
[lobbyist_id] [char] (12) NULL,
[year] [char] (4) NULL,
[Official Position] [varchar] (254) NULL,
[cid] [char] (9) NULL,
[formercongmem] [char] (1) NULL
) ON [PRIMARY]
--BULK
--INSERT Lobbyists
--FROM 'C:\aaa open secrets\Lobby\LOB_LOBBYIST\??????????????????'
--WITH
--(
--FIELDTERMINATOR = ',',
--ROWTERMINATOR = '\n'
--)
--GO
--SELECT*FROM Lobbyists
--SELECT * INTO Lobbyists_20170309 FROM Lobbyists
DECLARE @ALLFILENAMES TABLE (WHICHPATH VARCHAR(255),WHICHFILE varchar(255))
declare @filename varchar(255),
@path varchar(255),
@sql varchar(8000),
@cmd varchar(1000)
--get the list of files to process:
SET @path = '"C:\aaaopensecrets\LOB_LOBBYIST\'
SET @cmd = 'dir ' + @path + '*.txt" /b'
INSERT INTO @ALLFILENAMES(WHICHFILE)
EXEC Master..xp_cmdShell @cmd
UPDATE @ALLFILENAMES SET WHICHPATH = @path where WHICHPATH is null
SELECT * FROM @ALLFILENAMES
--cursor loop
declare c1 cursor
for SELECT WHICHPATH,WHICHFILE
FROM @ALLFILENAMES
open c1
fetch next from c1 into @path,@filename
While @@fetch_status <> -1
begin
--bulk insert won't take a variable name, so make a sql and execute it instead:
set @sql = 'BULK INSERT Lobbyists FROM ''' + @path + @filename + ''' '
+ ' WITH (
FIELDTERMINATOR = '','',
ROWTERMINATOR = ''\n''
) '
print @sql
exec (@sql)
fetch next from c1 into @path,@filename
end
close c1
deallocate c1
结果:
(125 row(s) affected)
(125 row(s) affected)
(125 row(s) affected)
BULK INSERT Lobbyists FROM '"C:\aaaopensecrets\LOB_LOBBYIST\lob_lobbyist000.txt' WITH (
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
Msg 4861, Level 16, State 1, Line 1
Cannot bulk load because the file ""C:\aaaopensecrets\LOB_LOBBYIST\lob_lobbyist000.txt" could not be opened. Operating system error code 123(The filename, directory name, or volume label syntax is incorrect.).
每个文件的错误循环
改编自此处:
Import Multiple CSV Files to SQL Server from a Folder
在你 运行 下面的代码之前,通过 运行 执行以下命令备份你的 table:
SELECT * INTO Lobbyists_20170309 FROM Lobbyists
完成后,您就拥有了 table 的副本。
现在尝试 运行宁此:
DECLARE @ALLFILENAMES TABLE (WHICHPATH VARCHAR(255),WHICHFILE varchar(255))
declare @filename varchar(255),
@path varchar(255),
@sql varchar(8000),
@cmd varchar(1000)
--get the list of files to process:
SET @path = '"C:\aaa open secrets\Lobby\LOB_LOBBYIST\'
SET @cmd = 'dir ' + @path + '*.txt" /b'
INSERT INTO @ALLFILENAMES(WHICHFILE)
EXEC Master..xp_cmdShell @cmd
UPDATE @ALLFILENAMES SET WHICHPATH = @path where WHICHPATH is null
SELECT * FROM @ALLFILENAMES
--cursor loop
declare c1 cursor
for SELECT WHICHPATH,WHICHFILE
FROM @ALLFILENAMES
open c1
fetch next from c1 into @path,@filename
While @@fetch_status <> -1
begin
--bulk insert won't take a variable name, so make a sql and execute it instead:
set @sql = 'BULK INSERT Lobbyists FROM ''' + @path + @filename + ''' '
+ ' WITH (
FIELDTERMINATOR = '','',
ROWTERMINATOR = ''\n''
) '
print @sql
exec (@sql)
fetch next from c1 into @path,@filename
end
close c1
deallocate c1
Create procedure usp_ImportMultipleFiles @filepath varchar(500),
@pattern varchar(100), @TableName varchar(128)
as
set quoted_identifier off
declare @query varchar(1000)
declare @max1 int
declare @count1 int
Declare @filename varchar(100)
set @count1 =0
create table #x (name varchar(200))
set @query ='master.dbo.xp_cmdshell "dir '+@filepath+@pattern +' /b"'
insert #x exec (@query)
delete from #x where name is NULL
select identity(int,1,1) as ID, name into #y from #x
drop table #x
set @max1 = (select max(ID) from #y)
--print @max1
--print @count1
While @count1 <= @max1
begin
set @count1=@count1+1
set @filename = (select name from #y where [id] = @count1)
set @Query ='BULK INSERT '+ @Tablename + ' FROM "'+ @Filepath+@Filename+'"
WITH ( FIELDTERMINATOR = ",",ROWTERMINATOR = "\n")'
--print @query
exec (@query)
insert into logtable (query) select @query
end
执行 usp_ImportMultipleFiles 'c:\myimport\', '*.csv', 'Account'
自学经验不足,还请多多包涵
我正在使用 SQL Server 2016。我进行了以下查询以将一些 CSV 文件导入 SQL。你看到的table就是他们应该进入的table。
USE [open secrets]
CREATE TABLE [dbo].[Lobbyists] (
[uniqID] [varchar] (36) NOT NULL,
[lobbyist] [varchar] (50) NULL,
[lobbyist_raw] [varchar] (50) NULL,
[lobbyist_id] [char] (12) NULL,
[year] [char] (4) NULL,
[Official Position] [varchar] (254) NULL,
[cid] [char] (9) NULL,
[formercongmem] [char] (1) NULL
) ON [PRIMARY]
BULK
INSERT Lobbyists
FROM 'C:\aaa open secrets\Lobby\LOB_LOBBYIST\??????????????????'
WITH
(
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
GO
SELECT*FROM Lobbyists
以下是我要导入的文件:
如您所见,它们都位于目录中并且具有相似的命名方案。我需要导入从 lob_lobbyist000 一直到 lob_lobbyist123 的所有内容。没有遗漏的数字。
我的目标是一次导入它们,而不是只使用 SQL Server Management Studio 一次导入一个。请告诉我正确的查询来完成这个。对查询中发生的事情的解释也将不胜感激。
更新 当前查询:
USE [open secrets]
CREATE TABLE [dbo].[Lobbyists] (
[uniqID] [varchar] (36) NOT NULL,
[lobbyist] [varchar] (50) NULL,
[lobbyist_raw] [varchar] (50) NULL,
[lobbyist_id] [char] (12) NULL,
[year] [char] (4) NULL,
[Official Position] [varchar] (254) NULL,
[cid] [char] (9) NULL,
[formercongmem] [char] (1) NULL
) ON [PRIMARY]
--BULK
--INSERT Lobbyists
--FROM 'C:\aaa open secrets\Lobby\LOB_LOBBYIST\??????????????????'
--WITH
--(
--FIELDTERMINATOR = ',',
--ROWTERMINATOR = '\n'
--)
--GO
--SELECT*FROM Lobbyists
--SELECT * INTO Lobbyists_20170309 FROM Lobbyists
DECLARE @ALLFILENAMES TABLE (WHICHPATH VARCHAR(255),WHICHFILE varchar(255))
declare @filename varchar(255),
@path varchar(255),
@sql varchar(8000),
@cmd varchar(1000)
--get the list of files to process:
SET @path = '"C:\aaaopensecrets\LOB_LOBBYIST\'
SET @cmd = 'dir ' + @path + '*.txt" /b'
INSERT INTO @ALLFILENAMES(WHICHFILE)
EXEC Master..xp_cmdShell @cmd
UPDATE @ALLFILENAMES SET WHICHPATH = @path where WHICHPATH is null
SELECT * FROM @ALLFILENAMES
--cursor loop
declare c1 cursor
for SELECT WHICHPATH,WHICHFILE
FROM @ALLFILENAMES
open c1
fetch next from c1 into @path,@filename
While @@fetch_status <> -1
begin
--bulk insert won't take a variable name, so make a sql and execute it instead:
set @sql = 'BULK INSERT Lobbyists FROM ''' + @path + @filename + ''' '
+ ' WITH (
FIELDTERMINATOR = '','',
ROWTERMINATOR = ''\n''
) '
print @sql
exec (@sql)
fetch next from c1 into @path,@filename
end
close c1
deallocate c1
结果:
(125 row(s) affected)
(125 row(s) affected)
(125 row(s) affected)
BULK INSERT Lobbyists FROM '"C:\aaaopensecrets\LOB_LOBBYIST\lob_lobbyist000.txt' WITH (
FIELDTERMINATOR = ',',
ROWTERMINATOR = '\n'
)
Msg 4861, Level 16, State 1, Line 1
Cannot bulk load because the file ""C:\aaaopensecrets\LOB_LOBBYIST\lob_lobbyist000.txt" could not be opened. Operating system error code 123(The filename, directory name, or volume label syntax is incorrect.).
每个文件的错误循环
改编自此处:
Import Multiple CSV Files to SQL Server from a Folder
在你 运行 下面的代码之前,通过 运行 执行以下命令备份你的 table:
SELECT * INTO Lobbyists_20170309 FROM Lobbyists
完成后,您就拥有了 table 的副本。
现在尝试 运行宁此:
DECLARE @ALLFILENAMES TABLE (WHICHPATH VARCHAR(255),WHICHFILE varchar(255))
declare @filename varchar(255),
@path varchar(255),
@sql varchar(8000),
@cmd varchar(1000)
--get the list of files to process:
SET @path = '"C:\aaa open secrets\Lobby\LOB_LOBBYIST\'
SET @cmd = 'dir ' + @path + '*.txt" /b'
INSERT INTO @ALLFILENAMES(WHICHFILE)
EXEC Master..xp_cmdShell @cmd
UPDATE @ALLFILENAMES SET WHICHPATH = @path where WHICHPATH is null
SELECT * FROM @ALLFILENAMES
--cursor loop
declare c1 cursor
for SELECT WHICHPATH,WHICHFILE
FROM @ALLFILENAMES
open c1
fetch next from c1 into @path,@filename
While @@fetch_status <> -1
begin
--bulk insert won't take a variable name, so make a sql and execute it instead:
set @sql = 'BULK INSERT Lobbyists FROM ''' + @path + @filename + ''' '
+ ' WITH (
FIELDTERMINATOR = '','',
ROWTERMINATOR = ''\n''
) '
print @sql
exec (@sql)
fetch next from c1 into @path,@filename
end
close c1
deallocate c1
Create procedure usp_ImportMultipleFiles @filepath varchar(500),
@pattern varchar(100), @TableName varchar(128)
as
set quoted_identifier off
declare @query varchar(1000)
declare @max1 int
declare @count1 int
Declare @filename varchar(100)
set @count1 =0
create table #x (name varchar(200))
set @query ='master.dbo.xp_cmdshell "dir '+@filepath+@pattern +' /b"'
insert #x exec (@query)
delete from #x where name is NULL
select identity(int,1,1) as ID, name into #y from #x
drop table #x
set @max1 = (select max(ID) from #y)
--print @max1
--print @count1
While @count1 <= @max1
begin
set @count1=@count1+1
set @filename = (select name from #y where [id] = @count1)
set @Query ='BULK INSERT '+ @Tablename + ' FROM "'+ @Filepath+@Filename+'"
WITH ( FIELDTERMINATOR = ",",ROWTERMINATOR = "\n")'
--print @query
exec (@query)
insert into logtable (query) select @query
end
执行 usp_ImportMultipleFiles 'c:\myimport\', '*.csv', 'Account'