导入不带分隔符的自定义文本格式
Import custom text format without separators
我想将此 .txt 文件格式导入 SQL 服务器 Table 或将每个文本块转换为管道分隔线。
哪些工具或 C# 解决方案建议您解决此问题?
如有任何建议,我们将不胜感激。
谢谢。
=================
INPUT (.txt file)
=================
ID: 37
Name: Josephy Murphy
Email: jmurphy@email.com
Description: bla, bla, bla, bla...
ID: 38
Name: Paul Newman
Email: pnewman@email.com
Description: bla, bla, bla, bla...
:
:
=========================
OUTPUT (SQL Server Table)
=========================
ID | Name | Email | Description
37 | Josephy Murphy | jmurphy@email.com | bla, bla, bla, bla...
38 | Paul Newman | pnewman@email.com | bla, bla, bla, bla...
:
:
解析这个文件真的很简单。 40 年来一直在做这样的项目。请参阅下面的代码。我将结果放入数据表中。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
const string FILENAME = @"c:\temp\test.txt";
enum States
{
FIND_OUTPUT,
GET_SEPERATOR,
GET_TABLE_HEADER,
GET_DATA_TABLE,
END
}
static void Main(string[] args)
{
DataTable dt = new DataTable();
dt.Columns.Add("ID", typeof(int));
dt.Columns.Add("Name", typeof(string));
dt.Columns.Add("Email", typeof(string));
dt.Columns.Add("Description", typeof(string));
States state = States.FIND_OUTPUT;
StreamReader reader = new StreamReader(FILENAME);
string inputLine = "";
while ((inputLine = reader.ReadLine()) != null)
{
inputLine = inputLine.Trim();
if (inputLine.Length > 0)
{
switch (state)
{
case States.FIND_OUTPUT:
if (inputLine.StartsWith("OUTPUT (SQL Server Table)"))
state = States.GET_SEPERATOR;
break;
case States.GET_SEPERATOR:
state = States.GET_TABLE_HEADER;
break;
case States.GET_TABLE_HEADER:
state = States.GET_DATA_TABLE;
break;
case States.GET_DATA_TABLE:
string[] dataArray = inputLine.Split(new char[] { '|' });
dt.Rows.Add(dataArray);
break;
}
}
else
{
if (state == States.GET_DATA_TABLE)
break; //exit while loop if blank row at end of data table
}
}
reader.Close();
}
}
}
Python容易:
input='''\
ID: 37
Name: Josephy Murphy
Email: jmurphy@email.com
Description: bla, bla, bla, bla...
ID: 38
Name: Paul Newman
Email: pnewman@email.com
Description: bla, bla, bla, bla...'''
import re
fields=('ID', 'Name', 'Email', 'Description')
out={k:[] for k in fields}
for m in re.finditer(r'(^ID.*?(?=^ID|\Z))', input, flags=re.S | re.M):
for k, v in [map(str.strip, line.split(':')) for line in m.group(1).splitlines() if line.strip()]:
out[k].append(v)
# you now have all the data in a structure that could be used with SQL
# just print to show...
fmt='{:3}| {:20}| {:20}| {:20}'
print fmt.format(*fields)
for i in range(len(out['ID'])):
print fmt.format(*[out[k][i] for k in fields])
打印:
ID | Name | Email | Description
37 | Josephy Murphy | jmurphy@email.com | bla, bla, bla, bla...
38 | Paul Newman | pnewman@email.com | bla, bla, bla, bla...
我现在直接写入 SQL 服务器,而不是数据 Table。您需要在 Insert SQL.
中输入连接字符串和 SQL Table 的名称
如果您真的要添加那么多行,我会考虑使用 SQL 服务器附带的 SQLCMD.EXE。它接受数据的任何分隔符和字符串 SQL。我从来没有将它与 Insert 一起使用,我通常将它用于 Select SQL。有许多不同的命令行可执行文件可与 SQL 服务器一起使用。见下面的网页
https://msdn.microsoft.com/en-us/library/ms162816.aspx
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.IO;
using System.Data.SqlClient;
namespace ConsoleApplication1
{
class Program
{
const string FILENAME = @"c:\temp\test.txt";
enum States
{
FIND_OUTPUT,
GET_SEPERATOR,
GET_TABLE_HEADER,
GET_DATA_TABLE,
END
}
static void Main(string[] args)
{
States state = States.FIND_OUTPUT;
StreamReader reader = new StreamReader(FILENAME);
string inputLine = "";
string connStr = "Enter your connection string here";
SqlConnection conn = new SqlConnection(connStr);
conn.Open();
SqlCommand cmd = new SqlCommand();
cmd.Connection = conn;
string[] headers = null;
while ((inputLine = reader.ReadLine()) != null)
{
inputLine = inputLine.Trim();
if (inputLine.Length > 0)
{
switch (state)
{
case States.FIND_OUTPUT:
if (inputLine.StartsWith("OUTPUT (SQL Server Table)"))
state = States.GET_SEPERATOR;
break;
case States.GET_SEPERATOR:
state = States.GET_TABLE_HEADER;
break;
case States.GET_TABLE_HEADER:
headers = inputLine.Split(new char[] { '|'}, StringSplitOptions.RemoveEmptyEntries);
headers = headers.Select(x => x.Trim()).ToArray();
state = States.GET_DATA_TABLE;
break;
case States.GET_DATA_TABLE:
string[] dataArray = inputLine.Split(new char[] { '|'}, StringSplitOptions.RemoveEmptyEntries);
dataArray = dataArray.Select(x => x.Trim()).ToArray();
string commandText = string.Format("Insert into table1 ({0}) values ({1})", string.Join(",", headers), "'" + string.Join("','", dataArray) + "'");
cmd.CommandText = commandText;
cmd.ExecuteNonQuery();
break;
}
}
else
{
if (state == States.GET_DATA_TABLE)
break; //exit while loop if blank row at end of data table
}
}
reader.Close();
conn.Close();
}
}
}
我想将此 .txt 文件格式导入 SQL 服务器 Table 或将每个文本块转换为管道分隔线。
哪些工具或 C# 解决方案建议您解决此问题?
如有任何建议,我们将不胜感激。
谢谢。
=================
INPUT (.txt file)
=================
ID: 37
Name: Josephy Murphy
Email: jmurphy@email.com
Description: bla, bla, bla, bla...
ID: 38
Name: Paul Newman
Email: pnewman@email.com
Description: bla, bla, bla, bla...
:
:
=========================
OUTPUT (SQL Server Table)
=========================
ID | Name | Email | Description
37 | Josephy Murphy | jmurphy@email.com | bla, bla, bla, bla...
38 | Paul Newman | pnewman@email.com | bla, bla, bla, bla...
:
:
解析这个文件真的很简单。 40 年来一直在做这样的项目。请参阅下面的代码。我将结果放入数据表中。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.IO;
namespace ConsoleApplication1
{
class Program
{
const string FILENAME = @"c:\temp\test.txt";
enum States
{
FIND_OUTPUT,
GET_SEPERATOR,
GET_TABLE_HEADER,
GET_DATA_TABLE,
END
}
static void Main(string[] args)
{
DataTable dt = new DataTable();
dt.Columns.Add("ID", typeof(int));
dt.Columns.Add("Name", typeof(string));
dt.Columns.Add("Email", typeof(string));
dt.Columns.Add("Description", typeof(string));
States state = States.FIND_OUTPUT;
StreamReader reader = new StreamReader(FILENAME);
string inputLine = "";
while ((inputLine = reader.ReadLine()) != null)
{
inputLine = inputLine.Trim();
if (inputLine.Length > 0)
{
switch (state)
{
case States.FIND_OUTPUT:
if (inputLine.StartsWith("OUTPUT (SQL Server Table)"))
state = States.GET_SEPERATOR;
break;
case States.GET_SEPERATOR:
state = States.GET_TABLE_HEADER;
break;
case States.GET_TABLE_HEADER:
state = States.GET_DATA_TABLE;
break;
case States.GET_DATA_TABLE:
string[] dataArray = inputLine.Split(new char[] { '|' });
dt.Rows.Add(dataArray);
break;
}
}
else
{
if (state == States.GET_DATA_TABLE)
break; //exit while loop if blank row at end of data table
}
}
reader.Close();
}
}
}
Python容易:
input='''\
ID: 37
Name: Josephy Murphy
Email: jmurphy@email.com
Description: bla, bla, bla, bla...
ID: 38
Name: Paul Newman
Email: pnewman@email.com
Description: bla, bla, bla, bla...'''
import re
fields=('ID', 'Name', 'Email', 'Description')
out={k:[] for k in fields}
for m in re.finditer(r'(^ID.*?(?=^ID|\Z))', input, flags=re.S | re.M):
for k, v in [map(str.strip, line.split(':')) for line in m.group(1).splitlines() if line.strip()]:
out[k].append(v)
# you now have all the data in a structure that could be used with SQL
# just print to show...
fmt='{:3}| {:20}| {:20}| {:20}'
print fmt.format(*fields)
for i in range(len(out['ID'])):
print fmt.format(*[out[k][i] for k in fields])
打印:
ID | Name | Email | Description
37 | Josephy Murphy | jmurphy@email.com | bla, bla, bla, bla...
38 | Paul Newman | pnewman@email.com | bla, bla, bla, bla...
我现在直接写入 SQL 服务器,而不是数据 Table。您需要在 Insert SQL.
中输入连接字符串和 SQL Table 的名称如果您真的要添加那么多行,我会考虑使用 SQL 服务器附带的 SQLCMD.EXE。它接受数据的任何分隔符和字符串 SQL。我从来没有将它与 Insert 一起使用,我通常将它用于 Select SQL。有许多不同的命令行可执行文件可与 SQL 服务器一起使用。见下面的网页 https://msdn.microsoft.com/en-us/library/ms162816.aspx
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.IO;
using System.Data.SqlClient;
namespace ConsoleApplication1
{
class Program
{
const string FILENAME = @"c:\temp\test.txt";
enum States
{
FIND_OUTPUT,
GET_SEPERATOR,
GET_TABLE_HEADER,
GET_DATA_TABLE,
END
}
static void Main(string[] args)
{
States state = States.FIND_OUTPUT;
StreamReader reader = new StreamReader(FILENAME);
string inputLine = "";
string connStr = "Enter your connection string here";
SqlConnection conn = new SqlConnection(connStr);
conn.Open();
SqlCommand cmd = new SqlCommand();
cmd.Connection = conn;
string[] headers = null;
while ((inputLine = reader.ReadLine()) != null)
{
inputLine = inputLine.Trim();
if (inputLine.Length > 0)
{
switch (state)
{
case States.FIND_OUTPUT:
if (inputLine.StartsWith("OUTPUT (SQL Server Table)"))
state = States.GET_SEPERATOR;
break;
case States.GET_SEPERATOR:
state = States.GET_TABLE_HEADER;
break;
case States.GET_TABLE_HEADER:
headers = inputLine.Split(new char[] { '|'}, StringSplitOptions.RemoveEmptyEntries);
headers = headers.Select(x => x.Trim()).ToArray();
state = States.GET_DATA_TABLE;
break;
case States.GET_DATA_TABLE:
string[] dataArray = inputLine.Split(new char[] { '|'}, StringSplitOptions.RemoveEmptyEntries);
dataArray = dataArray.Select(x => x.Trim()).ToArray();
string commandText = string.Format("Insert into table1 ({0}) values ({1})", string.Join(",", headers), "'" + string.Join("','", dataArray) + "'");
cmd.CommandText = commandText;
cmd.ExecuteNonQuery();
break;
}
}
else
{
if (state == States.GET_DATA_TABLE)
break; //exit while loop if blank row at end of data table
}
}
reader.Close();
conn.Close();
}
}
}