当省略结束列时,将忽略 widechar CSV 中的 BULK INSERT ROWTERMINATOR \n
BULK INSERT ROWTERMINATOR \n in widechar CSV is ignored when ending columns omitted
我有正确格式的 CSV - 它是 unicode 制表符分隔的 CSV,用 CRLF 分隔,十六进制 00 0D 00 0A
- 请参见下面的 Notepad++ 图片:
当我在 SQL Server 2008 (10.0.2531.0) 上从 SSMS 2016 运行 SQL 命令时:
BULK
INSERT #ZCSD84
FROM 'x:\ZCSD84.csv'
WITH
(
FIELDTERMINATOR = '\t',
ROWTERMINATOR = '\n',
FIRSTROW = 1,
CODEPAGE = 1250,
DATAFILETYPE='widechar'
)
除了在 CSV 记录末尾省略制表符的行外,我得到了很好的结果。在那里,我将两行错误地合并为一行,如下所示。甚至下一行的第一列值也会添加到该行的最后一列值,从而创建一个值,例如Jednorázový zákazník CB30
.
我曾尝试指定其他各种行终止符,例如 0x0A
,但是对于这些,我总是遇到 The bulk load failed. The column is too long ...
错误。
如何在遇到 CRLF 时强制创建新的 table 行?或者其他一些解决方法,例如正确准备更多 CSV?
我发现 this answer that shows there will be probably no simple solution for this problem. I have decided to fix CSV before bulk inserting using C# console app and CsvHelper 这很简单:
class Program
{
public class ZCSD84
{
public string PrOrg { get; set; }
public string Odberatel { get; set; }
public string Dodavatel { get; set; }
public string Jmeno1 { get; set; }
public string Jmeno2 { get; set; }
public string Ulice { get; set; }
public string PSC { get; set; }
public string Misto { get; set; }
public string MistniCast { get; set; }
public string DIC { get; set; }
public string ICO { get; set; }
public string StareCislo { get; set; }
public string HeadOffice { get; set; }
public string HeadOfficeName { get; set; }
public string Payer { get; set; }
public string PayerName { get; set; }
}
public sealed class ZCSD84Map : CsvClassMap<ZCSD84>
{
public ZCSD84Map()
{
Map(m => m.PrOrg).Index(0);
Map(m => m.Odberatel).Index(1);
Map(m => m.Dodavatel).Index(2);
Map(m => m.Jmeno1).Index(3);
Map(m => m.Jmeno2).Index(4);
Map(m => m.Ulice).Index(5);
Map(m => m.PSC).Index(7);
Map(m => m.Misto).Index(8);
Map(m => m.MistniCast).Index(9);
Map(m => m.DIC).Index(10);
Map(m => m.ICO).Index(11);
Map(m => m.StareCislo).Index(12);
Map(m => m.HeadOffice).Index(13);
Map(m => m.HeadOfficeName).Index(14);
Map(m => m.Payer).Index(15);
Map(m => m.PayerName).Index(17);
}
}
static void Main(string[] args)
{
string file1 = @"C:\JOBS\ZCSD84.csv";
string file1Out = @"C:\JOBS\ZCSD84_out.csv";
if (!File.Exists(file1))
throw new FileNotFoundException(String.Format("Soubor csv nelze otevřít {0}.", file1));
List<ZCSD84> ZCSD84s;
CsvConfiguration config = new CsvConfiguration();
config.Delimiter = "\t";
config.IgnoreBlankLines = true;
config.HasHeaderRecord = true;
config.DetectColumnCountChanges = false;
config.IgnoreReadingExceptions = false;
config.SkipEmptyRecords = true;
config.WillThrowOnMissingField = false;
Encoding ce = System.Text.Encoding.GetEncoding(1250);
config.Encoding = ce;
config.QuoteNoFields = true;
config.RegisterClassMap<ZCSD84Map>();
Console.WriteLine(file1);
using (CsvReader csvIn = new CsvReader(new StreamReader(file1), config))
{
ZCSD84s = csvIn.GetRecords<ZCSD84>().ToList();
}
Console.WriteLine(file1Out);
using (CsvWriter csvOut = new CsvWriter(new StreamWriter(file1Out, false, ce), config))
{
csvOut.WriteRecords(ZCSD84s);
}
}
}
我有正确格式的 CSV - 它是 unicode 制表符分隔的 CSV,用 CRLF 分隔,十六进制 00 0D 00 0A
- 请参见下面的 Notepad++ 图片:
当我在 SQL Server 2008 (10.0.2531.0) 上从 SSMS 2016 运行 SQL 命令时:
BULK
INSERT #ZCSD84
FROM 'x:\ZCSD84.csv'
WITH
(
FIELDTERMINATOR = '\t',
ROWTERMINATOR = '\n',
FIRSTROW = 1,
CODEPAGE = 1250,
DATAFILETYPE='widechar'
)
除了在 CSV 记录末尾省略制表符的行外,我得到了很好的结果。在那里,我将两行错误地合并为一行,如下所示。甚至下一行的第一列值也会添加到该行的最后一列值,从而创建一个值,例如Jednorázový zákazník CB30
.
我曾尝试指定其他各种行终止符,例如 0x0A
,但是对于这些,我总是遇到 The bulk load failed. The column is too long ...
错误。
如何在遇到 CRLF 时强制创建新的 table 行?或者其他一些解决方法,例如正确准备更多 CSV?
我发现 this answer that shows there will be probably no simple solution for this problem. I have decided to fix CSV before bulk inserting using C# console app and CsvHelper 这很简单:
class Program
{
public class ZCSD84
{
public string PrOrg { get; set; }
public string Odberatel { get; set; }
public string Dodavatel { get; set; }
public string Jmeno1 { get; set; }
public string Jmeno2 { get; set; }
public string Ulice { get; set; }
public string PSC { get; set; }
public string Misto { get; set; }
public string MistniCast { get; set; }
public string DIC { get; set; }
public string ICO { get; set; }
public string StareCislo { get; set; }
public string HeadOffice { get; set; }
public string HeadOfficeName { get; set; }
public string Payer { get; set; }
public string PayerName { get; set; }
}
public sealed class ZCSD84Map : CsvClassMap<ZCSD84>
{
public ZCSD84Map()
{
Map(m => m.PrOrg).Index(0);
Map(m => m.Odberatel).Index(1);
Map(m => m.Dodavatel).Index(2);
Map(m => m.Jmeno1).Index(3);
Map(m => m.Jmeno2).Index(4);
Map(m => m.Ulice).Index(5);
Map(m => m.PSC).Index(7);
Map(m => m.Misto).Index(8);
Map(m => m.MistniCast).Index(9);
Map(m => m.DIC).Index(10);
Map(m => m.ICO).Index(11);
Map(m => m.StareCislo).Index(12);
Map(m => m.HeadOffice).Index(13);
Map(m => m.HeadOfficeName).Index(14);
Map(m => m.Payer).Index(15);
Map(m => m.PayerName).Index(17);
}
}
static void Main(string[] args)
{
string file1 = @"C:\JOBS\ZCSD84.csv";
string file1Out = @"C:\JOBS\ZCSD84_out.csv";
if (!File.Exists(file1))
throw new FileNotFoundException(String.Format("Soubor csv nelze otevřít {0}.", file1));
List<ZCSD84> ZCSD84s;
CsvConfiguration config = new CsvConfiguration();
config.Delimiter = "\t";
config.IgnoreBlankLines = true;
config.HasHeaderRecord = true;
config.DetectColumnCountChanges = false;
config.IgnoreReadingExceptions = false;
config.SkipEmptyRecords = true;
config.WillThrowOnMissingField = false;
Encoding ce = System.Text.Encoding.GetEncoding(1250);
config.Encoding = ce;
config.QuoteNoFields = true;
config.RegisterClassMap<ZCSD84Map>();
Console.WriteLine(file1);
using (CsvReader csvIn = new CsvReader(new StreamReader(file1), config))
{
ZCSD84s = csvIn.GetRecords<ZCSD84>().ToList();
}
Console.WriteLine(file1Out);
using (CsvWriter csvOut = new CsvWriter(new StreamWriter(file1Out, false, ce), config))
{
csvOut.WriteRecords(ZCSD84s);
}
}
}