在 SSIS 脚本组件中使用 C# 将多个平面文件行提取为一个
Extract Multiple Flat File Lines as One Using C# in SSIS Script Component
在 SSIS 脚本组件的脚本中,我尝试从平面文件创建一条记录,如下所示:
起始数据:
HEADER|INVOICE_1|INVOICE_DATE|VENDOR_ID|
DETAIL|SKU|DESCRIPTION|AMOUNT|
DETAIL|SKU|DESCRIPTION|AMOUNT|
HEADER|INVOICE_2|INVOICE_DATE|VENDOR_ID|
DETAIL|SKU|DESCRIPTION|AMOUNT|
HEADER|INVOICE_3|INVOICE_DATE|VENDOR_ID|
DETAIL|SKU|DESCRIPTION|AMOUNT|
最终结果:
HEADER|INVOICE_1|INVOICE_DATE|VENDOR_ID|SKU|DESCRIPTION|AMOUNT|
HEADER|INVOICE_1|INVOICE_DATE|VENDOR_ID|SKU|DESCRIPTION|AMOUNT|
HEADER|INVOICE_2|INVOICE_DATE|VENDOR_ID|SKU|DESCRIPTION|AMOUNT|
HEADER|INVOICE_3|INVOICE_DATE|VENDOR_ID|SKU|DESCRIPTION|AMOUNT|
最终结果将映射到 SSIS 中的 OLE DB 目标。
我最初编写的脚本是使用 StreamReader 将每种线型(Header、详细信息)作为组输出拉出,但很快意识到没有标识符可以将它们重新组合在一起。
这是我目前写的:
System.IO.StreamReader reader = new System.IO.StreamReader(this.Variables.varFileName);
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
string[] items = line.Split('|');
// HEADER = 000
if (items[0] == "000")
{
HeaderBuffer.AddRow();
HeaderBuffer.H1 = items[0];
HeaderBuffer.H2 = items[1];
HeaderBuffer.H3 = items[2];
HeaderBuffer.H4 = items[3];
HeaderBuffer.H5 = items[4];
HeaderBuffer.H6 = items[5];
HeaderBuffer.H7 = items[6];
HeaderBuffer.H8 = items[7];
HeaderBuffer.H9 = items[8];
HeaderBuffer.H10 = items[9];
HeaderBuffer.H11 = items[10];
HeaderBuffer.H12 = items[11];
HeaderBuffer.H13 = items[12];
}
// DETAIL = 001
else if (items[0] == "001")
{
DetailBuffer.AddRow();
DetailBuffer.D1 = items[0];
DetailBuffer.D2 = items[1];
DetailBuffer.D3 = items[2];
DetailBuffer.D4 = items[3];
DetailBuffer.D5 = items[4];
DetailBuffer.D6 = items[5];
}
}
提前感谢您的帮助!如果您有任何问题,请告诉我。
如果该行包含
,则无需创建两个输出缓冲区来实现此目的
HEADER|INVOICE_1|INVOICE_DATE|VENDOR_ID|
然后将这些字段的值存储在变量中,然后对于包含
的每一行
DETAIL|SKU|DESCRIPTION|AMOUNT|
创建一个输出行并使用来自变量的值 + 该行的值。
下次 HEADER|INVOICE_1|INVOICE_DATE|VENDOR_ID|
发生时,只需清除变量值并为其分配新值。并重复该过程。
System.IO.StreamReader reader = new System.IO.StreamReader(this.Variables.varFileName);
string H1;
string H2;
string H3;
string H4;
string H5;
string H6;
string H7;
string H8;
string H9;
string H10;
string H11;
string H12;
string H13;
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
string[] items = line.Split('|');
// HEADER = 000
if (items[0] == "000")
{
H1 = items[0];
H2 = items[1];
H3 = items[2];
H4 = items[3];
H5 = items[4];
H6 = items[5];
H7 = items[6];
H8 = items[7];
H9 = items[8];
H10 = items[9];
H11 = items[10];
H12 = items[11];
H13 = items[12];
}
// DETAIL = 001
else if (items[0] == "001")
{
DetailBuffer.AddRow();
DetailBuffer.D1 = items[0];
DetailBuffer.D2 = items[1];
DetailBuffer.D3 = items[2];
DetailBuffer.D4 = items[3];
DetailBuffer.D5 = items[4];
DetailBuffer.D6 = items[5];
DetailBuffer.H1 = H1;
DetailBuffer.H2 = H1;
DetailBuffer.H3 = H2;
DetailBuffer.H4 = H3;
DetailBuffer.H5 = H4;
DetailBuffer.H6 = H5;
DetailBuffer.H7 = H6;
DetailBuffer.H8 = H7;
DetailBuffer.H9 = H8;
DetailBuffer.H10 = H9;
DetailBuffer.H11 = H10;
DetailBuffer.H12 = H11;
DetailBuffer.H13 = H12;
}
}
@Hadi 的回答完全符合你的要求。但是,从架构上讲,最好有一个 header 和详细信息表。我结合了你的方法,一开始你非常接近。
System.IO.StreamReader reader = new System.IO.StreamReader(this.Variables.varFileName);
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
**string Hkey; //Make this whatever the key is**
string[] items = line.Split('|');
// HEADER = 000
if (items[0] == "000")
{
HeaderBuffer.AddRow();
HeaderBuffer.H1 = items[0];
HeaderBuffer.H2 = items[1];
HeaderBuffer.H3 = items[2];
HeaderBuffer.H4 = items[3];
HeaderBuffer.H5 = items[4];
HeaderBuffer.H6 = items[5];
HeaderBuffer.H7 = items[6];
HeaderBuffer.H8 = items[7];
HeaderBuffer.H9 = items[8];
HeaderBuffer.H10 = items[9];
HeaderBuffer.H11 = items[10];
HeaderBuffer.H12 = items[11];
HeaderBuffer.H13 = items[12];
**Hkey = items[keynode]**
}
// DETAIL = 001
else if (items[0] == "001")
{
DetailBuffer.AddRow();
**DetailBuffer.Hkey = Hkey;**
DetailBuffer.D1 = items[0];
DetailBuffer.D2 = items[1];
DetailBuffer.D3 = items[2];
DetailBuffer.D4 = items[3];
DetailBuffer.D5 = items[4];
DetailBuffer.D6 = items[5];
}
}
在 SSIS 脚本组件的脚本中,我尝试从平面文件创建一条记录,如下所示:
起始数据:
HEADER|INVOICE_1|INVOICE_DATE|VENDOR_ID|
DETAIL|SKU|DESCRIPTION|AMOUNT|
DETAIL|SKU|DESCRIPTION|AMOUNT|
HEADER|INVOICE_2|INVOICE_DATE|VENDOR_ID|
DETAIL|SKU|DESCRIPTION|AMOUNT|
HEADER|INVOICE_3|INVOICE_DATE|VENDOR_ID|
DETAIL|SKU|DESCRIPTION|AMOUNT|
最终结果:
HEADER|INVOICE_1|INVOICE_DATE|VENDOR_ID|SKU|DESCRIPTION|AMOUNT|
HEADER|INVOICE_1|INVOICE_DATE|VENDOR_ID|SKU|DESCRIPTION|AMOUNT|
HEADER|INVOICE_2|INVOICE_DATE|VENDOR_ID|SKU|DESCRIPTION|AMOUNT|
HEADER|INVOICE_3|INVOICE_DATE|VENDOR_ID|SKU|DESCRIPTION|AMOUNT|
最终结果将映射到 SSIS 中的 OLE DB 目标。
我最初编写的脚本是使用 StreamReader 将每种线型(Header、详细信息)作为组输出拉出,但很快意识到没有标识符可以将它们重新组合在一起。
这是我目前写的:
System.IO.StreamReader reader = new System.IO.StreamReader(this.Variables.varFileName);
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
string[] items = line.Split('|');
// HEADER = 000
if (items[0] == "000")
{
HeaderBuffer.AddRow();
HeaderBuffer.H1 = items[0];
HeaderBuffer.H2 = items[1];
HeaderBuffer.H3 = items[2];
HeaderBuffer.H4 = items[3];
HeaderBuffer.H5 = items[4];
HeaderBuffer.H6 = items[5];
HeaderBuffer.H7 = items[6];
HeaderBuffer.H8 = items[7];
HeaderBuffer.H9 = items[8];
HeaderBuffer.H10 = items[9];
HeaderBuffer.H11 = items[10];
HeaderBuffer.H12 = items[11];
HeaderBuffer.H13 = items[12];
}
// DETAIL = 001
else if (items[0] == "001")
{
DetailBuffer.AddRow();
DetailBuffer.D1 = items[0];
DetailBuffer.D2 = items[1];
DetailBuffer.D3 = items[2];
DetailBuffer.D4 = items[3];
DetailBuffer.D5 = items[4];
DetailBuffer.D6 = items[5];
}
}
提前感谢您的帮助!如果您有任何问题,请告诉我。
如果该行包含
,则无需创建两个输出缓冲区来实现此目的HEADER|INVOICE_1|INVOICE_DATE|VENDOR_ID|
然后将这些字段的值存储在变量中,然后对于包含
的每一行DETAIL|SKU|DESCRIPTION|AMOUNT|
创建一个输出行并使用来自变量的值 + 该行的值。
下次 HEADER|INVOICE_1|INVOICE_DATE|VENDOR_ID|
发生时,只需清除变量值并为其分配新值。并重复该过程。
System.IO.StreamReader reader = new System.IO.StreamReader(this.Variables.varFileName);
string H1;
string H2;
string H3;
string H4;
string H5;
string H6;
string H7;
string H8;
string H9;
string H10;
string H11;
string H12;
string H13;
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
string[] items = line.Split('|');
// HEADER = 000
if (items[0] == "000")
{
H1 = items[0];
H2 = items[1];
H3 = items[2];
H4 = items[3];
H5 = items[4];
H6 = items[5];
H7 = items[6];
H8 = items[7];
H9 = items[8];
H10 = items[9];
H11 = items[10];
H12 = items[11];
H13 = items[12];
}
// DETAIL = 001
else if (items[0] == "001")
{
DetailBuffer.AddRow();
DetailBuffer.D1 = items[0];
DetailBuffer.D2 = items[1];
DetailBuffer.D3 = items[2];
DetailBuffer.D4 = items[3];
DetailBuffer.D5 = items[4];
DetailBuffer.D6 = items[5];
DetailBuffer.H1 = H1;
DetailBuffer.H2 = H1;
DetailBuffer.H3 = H2;
DetailBuffer.H4 = H3;
DetailBuffer.H5 = H4;
DetailBuffer.H6 = H5;
DetailBuffer.H7 = H6;
DetailBuffer.H8 = H7;
DetailBuffer.H9 = H8;
DetailBuffer.H10 = H9;
DetailBuffer.H11 = H10;
DetailBuffer.H12 = H11;
DetailBuffer.H13 = H12;
}
}
@Hadi 的回答完全符合你的要求。但是,从架构上讲,最好有一个 header 和详细信息表。我结合了你的方法,一开始你非常接近。
System.IO.StreamReader reader = new System.IO.StreamReader(this.Variables.varFileName);
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
**string Hkey; //Make this whatever the key is**
string[] items = line.Split('|');
// HEADER = 000
if (items[0] == "000")
{
HeaderBuffer.AddRow();
HeaderBuffer.H1 = items[0];
HeaderBuffer.H2 = items[1];
HeaderBuffer.H3 = items[2];
HeaderBuffer.H4 = items[3];
HeaderBuffer.H5 = items[4];
HeaderBuffer.H6 = items[5];
HeaderBuffer.H7 = items[6];
HeaderBuffer.H8 = items[7];
HeaderBuffer.H9 = items[8];
HeaderBuffer.H10 = items[9];
HeaderBuffer.H11 = items[10];
HeaderBuffer.H12 = items[11];
HeaderBuffer.H13 = items[12];
**Hkey = items[keynode]**
}
// DETAIL = 001
else if (items[0] == "001")
{
DetailBuffer.AddRow();
**DetailBuffer.Hkey = Hkey;**
DetailBuffer.D1 = items[0];
DetailBuffer.D2 = items[1];
DetailBuffer.D3 = items[2];
DetailBuffer.D4 = items[3];
DetailBuffer.D5 = items[4];
DetailBuffer.D6 = items[5];
}
}