导入不带分隔符的自定义文本格式

Import custom text format without separators

我想将此 .txt 文件格式导入 SQL 服务器 Table 或将每个文本块转换为管道分隔线。

哪些工具或 C# 解决方案建议您解决此问题?

如有任何建议,我们将不胜感激。

谢谢。

=================
INPUT (.txt file)
=================
ID: 37
Name: Josephy Murphy
Email: jmurphy@email.com
Description: bla, bla, bla, bla...

ID: 38
Name: Paul Newman
Email: pnewman@email.com
Description: bla, bla, bla, bla...

:
:

=========================
OUTPUT (SQL Server Table)
=========================

ID | Name           | Email             | Description  
37 | Josephy Murphy | jmurphy@email.com | bla, bla, bla, bla...
38 | Paul Newman    | pnewman@email.com | bla, bla, bla, bla...

:
: 

解析这个文件真的很简单。 40 年来一直在做这样的项目。请参阅下面的代码。我将结果放入数据表中。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.IO;

namespace ConsoleApplication1
{
    class Program
    {
        const string FILENAME = @"c:\temp\test.txt";
        enum States
        {
            FIND_OUTPUT,
            GET_SEPERATOR,
            GET_TABLE_HEADER,
            GET_DATA_TABLE,
            END
        }
        static void Main(string[] args)
        {
            DataTable dt = new DataTable();
            dt.Columns.Add("ID", typeof(int));
            dt.Columns.Add("Name", typeof(string));
            dt.Columns.Add("Email", typeof(string));
            dt.Columns.Add("Description", typeof(string));

            States state = States.FIND_OUTPUT;
            StreamReader reader = new StreamReader(FILENAME);
            string inputLine = "";
            while ((inputLine = reader.ReadLine()) != null)
            {
                inputLine = inputLine.Trim();
                if (inputLine.Length > 0)
                {
                    switch (state)
                    {
                        case States.FIND_OUTPUT:
                            if (inputLine.StartsWith("OUTPUT (SQL Server Table)"))
                                state = States.GET_SEPERATOR;
                            break;
                        case States.GET_SEPERATOR:
                            state = States.GET_TABLE_HEADER;
                            break;
                        case States.GET_TABLE_HEADER:
                            state = States.GET_DATA_TABLE;
                            break;
                        case States.GET_DATA_TABLE:
                            string[] dataArray = inputLine.Split(new char[] { '|' });
                            dt.Rows.Add(dataArray);
                            break;
                    }
                }
                else
                {
                    if (state == States.GET_DATA_TABLE)
                        break; //exit while loop if blank row at end of data table
                }
            }

            reader.Close();
        }
    }
}
​

Python容易:

input='''\
ID: 37
Name: Josephy Murphy
Email: jmurphy@email.com
Description: bla, bla, bla, bla...

ID: 38
Name: Paul Newman
Email: pnewman@email.com
Description: bla, bla, bla, bla...'''

import re
fields=('ID', 'Name', 'Email', 'Description')
out={k:[] for k in fields}
for m in re.finditer(r'(^ID.*?(?=^ID|\Z))', input, flags=re.S | re.M):
    for k, v in [map(str.strip, line.split(':')) for line in m.group(1).splitlines() if line.strip()]:
        out[k].append(v)

# you now have all the data in a structure that could be used with SQL
# just print to show...    
fmt='{:3}| {:20}| {:20}| {:20}'
print fmt.format(*fields)    
for i in range(len(out['ID'])):
    print fmt.format(*[out[k][i] for k in fields])  

打印:

ID | Name                | Email               | Description         
37 | Josephy Murphy      | jmurphy@email.com   | bla, bla, bla, bla...
38 | Paul Newman         | pnewman@email.com   | bla, bla, bla, bla...

我现在直接写入 SQL 服务器,而不是数据 Table。您需要在 Insert SQL.

中输入连接字符串和 SQL Table 的名称

如果您真的要添加那么多行,我会考虑使用 SQL 服务器附带的 SQLCMD.EXE。它接受数据的任何分隔符和字符串 SQL。我从来没有将它与 Insert 一起使用,我通常将它用于 Select SQL。有许多不同的命令行可执行文件可与 SQL 服务器一起使用。见下面的网页 https://msdn.microsoft.com/en-us/library/ms162816.aspx

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.IO;
using System.Data.SqlClient;

namespace ConsoleApplication1
{
    class Program
    {
        const string FILENAME = @"c:\temp\test.txt";
        enum States
        {
            FIND_OUTPUT,
            GET_SEPERATOR,
            GET_TABLE_HEADER,
            GET_DATA_TABLE,
            END
        }
        static void Main(string[] args)
        {
 
            States state = States.FIND_OUTPUT;
            StreamReader reader = new StreamReader(FILENAME);
            string inputLine = "";

            string connStr = "Enter your connection string here";
            SqlConnection conn = new SqlConnection(connStr);
            conn.Open();
            SqlCommand cmd = new SqlCommand();
            cmd.Connection = conn;

            string[] headers = null;
            while ((inputLine = reader.ReadLine()) != null)
            {
                inputLine = inputLine.Trim();
                if (inputLine.Length > 0)
                {
                    switch (state)
                    {
                        case States.FIND_OUTPUT:
                            if (inputLine.StartsWith("OUTPUT (SQL Server Table)"))
                                state = States.GET_SEPERATOR;
                            break;
                        case States.GET_SEPERATOR:
                            state = States.GET_TABLE_HEADER;
                            break;
                        case States.GET_TABLE_HEADER:
                            headers = inputLine.Split(new char[] { '|'}, StringSplitOptions.RemoveEmptyEntries);
                            headers = headers.Select(x => x.Trim()).ToArray();
                            state = States.GET_DATA_TABLE;
                            break;
                        case States.GET_DATA_TABLE:
                            string[] dataArray = inputLine.Split(new char[] { '|'}, StringSplitOptions.RemoveEmptyEntries);
                            dataArray = dataArray.Select(x => x.Trim()).ToArray();
                            string commandText = string.Format("Insert into table1 ({0}) values ({1})", string.Join(",", headers), "'" + string.Join("','", dataArray) + "'");
                            cmd.CommandText = commandText;    
                            cmd.ExecuteNonQuery();
                            break;
                    }
                }
                else
                {
                    if (state == States.GET_DATA_TABLE)
                        break; //exit while loop if blank row at end of data table
                }
            }

            reader.Close();
            conn.Close();
        }
    }
}
​