试图将文本文件中的数据解析为由 | 分隔的单行象征

Trying to parse data from text file to single line separated by | symbol

我有一个包含以下数据的文本文件:

#294448
ORDER_STATUS1098988 VALID
24.09.2021 05:17 AM
Customer_ID: 5524335312265537
MMYY: 08/23
Txn_ID: 74627
Name: Krystal Flowers
E-mail: abc@gmail.com
Phone: 9109153030
Address_original: 1656 W Alvarado dr, Pueblo West, Colorado, 81007, United States
ZIP_City_State_Country: -
Type: -
Subtype: -

#294448
ORDER_STATUS1097728 VALID
24.09.2021 05:17 AM
Customer_ID: 5524331591654699
MMYY: 11/23
Txn_ID: 45617
Name: Allen E Prieto
E-mail: xyz@gmail.com
Phone: 5056994899
Address_original: 655 Ives Dairy Rd, Miami, Florida, 33179, United States
ZIP_City_State_Country: -
Type: -
Subtype: -

#294445
ORDER_STATUS537099 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230087730234
MMYY: 09/25
Txn_ID: 24430
Name: tera casey
Phone: 7405863997
Address_original: 13705 Neptune Lane, New Concord, Ohio State, 43762, PE
ZIP_City_State_Country: 43762, New Concord, Ohio State, UNITED STATES
Subtype: N/A

#294445
ORDER_STATUS489401 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230054806983
MMYY: 07/24
Txn_ID: 13183
Name: Nancy Lambert
Address_original: 2600 loop drive, N, N, 44113, PE
ZIP_City_State_Country: 44113, N, N, UNITED STATES
Subtype: N/A

#294445
ORDER_STATUS437355 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230061412668
MMYY: 05/24
Txn_ID: 55474
Name: Sheets Sherry
E-mail: tyd@gmail.com
Phone: (567) 241-5074
Address_original: 37 Martha Avenue, Mansfield, Ohio, 44905, US
ZIP_City_State_Country: 44905, Mansfield, Ohio, UNITED STATES
Subtype: N/A

数据需要以某种方式组织,以便 Customer_ID、MMYY 和 Txn_ID 仅显示在由 | 分隔的单行中象征。应忽略此文本文件中的所有其他内容。

示例:

5524335312265537 | 08/23 | 24430
5524331591654699 | 11/23 | 45617
4118230087730234 | 09/25 | 24430
4118230054806983 | 07/24 | 13183
4118230061412668 | 05/24 | 55474

这是我尝试过的方法,但我收到“无效文件!”打开文本文件后的消息。

private void openFile_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofdtmp = new OpenFileDialog();
            if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
            {
                try
                {
                    using (StreamReader sr = File.OpenText(ofdtmp.FileName))
                    {
                        while (sr.Peek() >= 0)
                        {
                            string line = sr.ReadLine();
                            line = line.Trim();
                            if (line.ToString() == "" || line.Contains("#") || line.Contains("ORDER_STATUS") || /*Exclude Date & Time*/ line.Contains(".") || line.Contains("Name:") || line.Contains("E-mail:") || line.Contains("Phone:") || line.Contains("Address_original:") || line.Contains("ZIP_City_State_Country:") || line.Contains("Type:") || line.Contains("Subtype:"))
                                continue; //skip

                            if (line.Contains("CustomerID: "))
                            {
                                string customID = line.Substring(12, 29).Trim();
                                continue;
                            }

                            if (line.Contains("MMYY: "))
                            {
                                string mmyy = line.Substring(6, 11).Trim();
                                continue;
                            }

                            if (line.Contains("Txn_ID: "))
                            {
                                string txnID = line.Substring(10, 16).Trim();
                                continue;
                            }   
                        }
                        richTextBox.Text = sr.ToString();
                    }
                }
                catch
                {
                    MessageBox.Show("Invalid file!");
                }
            }
        }

我在类似的在线帖子中查找了替代解决方案,应用正则表达式似乎是正确的方法。困难在于弄清楚如何跳过文本文件中所有不需要的字符和符号,只提取所需的数据。这个问题的最佳解决方案是什么?

解决方案更新:

using System;
using System.IO;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Windows.Forms;

namespace RegExTool
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        private void Form1_Load(object sender, EventArgs e)
        {
        }

        private void openFile_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofdtmp = new OpenFileDialog();
            if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
            {
                try
                {
                    using (StreamReader sr = new StreamReader(ofdtmp.FileName))
                    {
                        string data = sr.ReadToEnd();
                        richTextBox1.Clear();
                        richTextBox2.Clear();
                        richTextBox1.Text = data;
                        string pattern = @"(?<=CustomerID:).*|(?<=MMYY:).*|(?<=Txn_ID:).*";
                        var en = Regex.Matches(data, pattern, RegexOptions.IgnoreCase).GetEnumerator();
                        while (en.MoveNext())
                        {
                            var ci = en.Current;
                            if (!en.MoveNext())
                                break;
                            var di = en.Current;
                            if (!en.MoveNext())
                                break;
                            var ti = en.Current;
                            string text = ($"{ci}|{di}|{ti}") + System.Environment.NewLine;
                            richTextBox2.Text += text.Replace(" ", string.Empty);
                        }
                    }
                } 
                catch (Exception ex) 
                { 
                    MessageBox.Show(ex.Message); 
                }
            }
        }

        private void saveFile_Click(object sender, EventArgs e)
        {
            string tmp = richTextBox2.Text;
            SaveFileDialog svdtmp = new SaveFileDialog();
                if (svdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
                {
                    try
                    {
                        File.WriteAllText(svdtmp.FileName, (tmp.ToString()));
                        MessageBox.Show("File Saved!");
                    }
                    catch (Exception ex)
                    {
                        MessageBox.Show("Cannot save text to file.");
                    }
                }
        }
    }
}

最终解决方案:

using System;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
using System.Windows.Forms;

namespace RegExTool
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        private void Form1_Load(object sender, EventArgs e)
        {

        }

        static List<string> GetStrings(string input)
        {
            string pattern = @"Customer_ID: (?<CustomerId>\d+)[\r\n]+MMYY\: (?<ExpiryDate>\d{2}\/\d{2})[\r\n]+Txn_ID: (?<TxnId>\d+)";
            List<string> strings = new List<string>();
            foreach(Match match in Regex.Matches(input, pattern, RegexOptions.Multiline,TimeSpan.FromSeconds(1)))
            {
                strings.Add($"{match.Groups["CustomerId"]} | {match.Groups["ExpiryDate"]} | {match.Groups["TxnId"]}");
            }
            return strings;
        }

        private void openFile_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofdtmp = new OpenFileDialog();
            if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
            {
                try
                {
                    using (StreamReader sr = new StreamReader(ofdtmp.FileName))
                    {
                        string input = sr.ReadToEnd();
                        richTextBox1.Clear();
                        richTextBox2.Clear();
                        richTextBox1.Text = input;
                        foreach (var value in GetStrings(input))
                        {
                            string text = value + System.Environment.NewLine;
                            richTextBox2.Text += text;
                        }
                    }
                } catch (Exception ex)
                {
                    MessageBox.Show(ex.Message);
                }
            }
        }

        private void saveFile_Click(object sender, EventArgs e)
        {
            string tmp = richTextBox2.Text;
            SaveFileDialog svdtmp = new SaveFileDialog();
            if (svdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
            {
                try
                {
                    File.WriteAllText(svdtmp.FileName, (tmp.ToString()));
                    MessageBox.Show("File Saved!");
                }
                catch (Exception ex)
                {
                    MessageBox.Show("Cannot save text to file.");
                }
            }
        }
    }
}

我使用正则表达式的解决方案。处理
上的换行符 • Windows \r\n
• Linux \n
• MacOS \r

您可以 test/run 此代码 https://replit.com/@JomaCorpFX/SO70374465

您可以在 https://regex101.com/r/R7Q5bq/4

上检查正则表达式

代码

using System.Text.RegularExpressions;
using System.Collections.Generic;
using System;
using System.Linq;

public class Program
{
    static List<string> GetStrings(string input)
    {
        string pattern = @"Customer_ID: (?<CustomerId>\d+)[\r\n]+MMYY\: (?<ExpiryDate>\d{2}\/\d{2})[\r\n]+Txn_ID: (?<TxnId>\d+)";
        List<string> strings = new List<string>();
        foreach(Match match in Regex.Matches(input, pattern, RegexOptions.Multiline,TimeSpan.FromSeconds(1)))
        {
            strings.Add($"{match.Groups["CustomerId"]} | {match.Groups["ExpiryDate"]} | {match.Groups["TxnId"]}");
        }
        return strings;
    }

    public static void Main(string[] args)
    {
        string input = @"#294448
ORDER_STATUS1098988 VALID
24.09.2021 05:17 AM
Customer_ID: 5524335312265537
MMYY: 08/23
Txn_ID: 74627
Name: Krystal Flowers
E-mail: abc@gmail.com
Phone: 9109153030
Address_original: 1656 W Alvarado dr, Pueblo West, Colorado, 81007, United States
ZIP_City_State_Country: -
Type: -
Subtype: -

#294448
ORDER_STATUS1097728 VALID
24.09.2021 05:17 PM
Customer_ID: 5524331591654699
MMYY: 11/23
Txn_ID: 45617
Name: Allen E Prieto
E-mail: xyz@gmail.com
Phone: 5056994899
Address_original: 655 Ives Dairy Rd, Miami, Florida, 33179, United States
ZIP_City_State_Country: -
Type: -
Subtype: -

#294445
ORDER_STATUS537099 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230087730234
MMYY: 09/25
Txn_ID: 24430
Name: tera casey
Phone: 7405863997
Address_original: 13705 Neptune Lane, New Concord, Ohio State, 43762, PE
ZIP_City_State_Country: 43762, New Concord, Ohio State, UNITED STATES
Subtype: N/A

#294445
ORDER_STATUS489401 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230054806983
MMYY: 07/24
Txn_ID: 13183
Name: Nancy Lambert
Address_original: 2600 loop drive, N, N, 44113, PE
ZIP_City_State_Country: 44113, N, N, UNITED STATES
Subtype: N/A

#294445
ORDER_STATUS437355 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230061412668
MMYY: 05/24
Txn_ID: 55474
Name: Sheets Sherry
E-mail: tyd@gmail.com
Phone: (567) 241-5074
Address_original: 37 Martha Avenue, Mansfield, Ohio, 44905, US
ZIP_City_State_Country: 44905, Mansfield, Ohio, UNITED STATES
Subtype: N/A";
        foreach (var value in GetStrings(input))
        {
            Console.WriteLine(value);
        }
        Console.ReadLine();
    }
}

输出

5524335312265537 | 08/23 | 74627
5524331591654699 | 11/23 | 45617
4118230087730234 | 09/25 | 24430
4118230054806983 | 07/24 | 13183
4118230061412668 | 05/24 | 55474

参考资料

Regex Match Method - Match(String, String, RegexOptions, TimeSpan)