试图将文本文件中的数据解析为由 | 分隔的单行象征
Trying to parse data from text file to single line separated by | symbol
我有一个包含以下数据的文本文件:
#294448
ORDER_STATUS1098988 VALID
24.09.2021 05:17 AM
Customer_ID: 5524335312265537
MMYY: 08/23
Txn_ID: 74627
Name: Krystal Flowers
E-mail: abc@gmail.com
Phone: 9109153030
Address_original: 1656 W Alvarado dr, Pueblo West, Colorado, 81007, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294448
ORDER_STATUS1097728 VALID
24.09.2021 05:17 AM
Customer_ID: 5524331591654699
MMYY: 11/23
Txn_ID: 45617
Name: Allen E Prieto
E-mail: xyz@gmail.com
Phone: 5056994899
Address_original: 655 Ives Dairy Rd, Miami, Florida, 33179, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294445
ORDER_STATUS537099 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230087730234
MMYY: 09/25
Txn_ID: 24430
Name: tera casey
Phone: 7405863997
Address_original: 13705 Neptune Lane, New Concord, Ohio State, 43762, PE
ZIP_City_State_Country: 43762, New Concord, Ohio State, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS489401 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230054806983
MMYY: 07/24
Txn_ID: 13183
Name: Nancy Lambert
Address_original: 2600 loop drive, N, N, 44113, PE
ZIP_City_State_Country: 44113, N, N, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS437355 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230061412668
MMYY: 05/24
Txn_ID: 55474
Name: Sheets Sherry
E-mail: tyd@gmail.com
Phone: (567) 241-5074
Address_original: 37 Martha Avenue, Mansfield, Ohio, 44905, US
ZIP_City_State_Country: 44905, Mansfield, Ohio, UNITED STATES
Subtype: N/A
数据需要以某种方式组织,以便 Customer_ID、MMYY 和 Txn_ID 仅显示在由 | 分隔的单行中象征。应忽略此文本文件中的所有其他内容。
示例:
5524335312265537 | 08/23 | 24430
5524331591654699 | 11/23 | 45617
4118230087730234 | 09/25 | 24430
4118230054806983 | 07/24 | 13183
4118230061412668 | 05/24 | 55474
这是我尝试过的方法,但我收到“无效文件!”打开文本文件后的消息。
private void openFile_Click(object sender, EventArgs e)
{
OpenFileDialog ofdtmp = new OpenFileDialog();
if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
using (StreamReader sr = File.OpenText(ofdtmp.FileName))
{
while (sr.Peek() >= 0)
{
string line = sr.ReadLine();
line = line.Trim();
if (line.ToString() == "" || line.Contains("#") || line.Contains("ORDER_STATUS") || /*Exclude Date & Time*/ line.Contains(".") || line.Contains("Name:") || line.Contains("E-mail:") || line.Contains("Phone:") || line.Contains("Address_original:") || line.Contains("ZIP_City_State_Country:") || line.Contains("Type:") || line.Contains("Subtype:"))
continue; //skip
if (line.Contains("CustomerID: "))
{
string customID = line.Substring(12, 29).Trim();
continue;
}
if (line.Contains("MMYY: "))
{
string mmyy = line.Substring(6, 11).Trim();
continue;
}
if (line.Contains("Txn_ID: "))
{
string txnID = line.Substring(10, 16).Trim();
continue;
}
}
richTextBox.Text = sr.ToString();
}
}
catch
{
MessageBox.Show("Invalid file!");
}
}
}
我在类似的在线帖子中查找了替代解决方案,应用正则表达式似乎是正确的方法。困难在于弄清楚如何跳过文本文件中所有不需要的字符和符号,只提取所需的数据。这个问题的最佳解决方案是什么?
解决方案更新:
using System;
using System.IO;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Windows.Forms;
namespace RegExTool
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
}
private void openFile_Click(object sender, EventArgs e)
{
OpenFileDialog ofdtmp = new OpenFileDialog();
if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
using (StreamReader sr = new StreamReader(ofdtmp.FileName))
{
string data = sr.ReadToEnd();
richTextBox1.Clear();
richTextBox2.Clear();
richTextBox1.Text = data;
string pattern = @"(?<=CustomerID:).*|(?<=MMYY:).*|(?<=Txn_ID:).*";
var en = Regex.Matches(data, pattern, RegexOptions.IgnoreCase).GetEnumerator();
while (en.MoveNext())
{
var ci = en.Current;
if (!en.MoveNext())
break;
var di = en.Current;
if (!en.MoveNext())
break;
var ti = en.Current;
string text = ($"{ci}|{di}|{ti}") + System.Environment.NewLine;
richTextBox2.Text += text.Replace(" ", string.Empty);
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
}
private void saveFile_Click(object sender, EventArgs e)
{
string tmp = richTextBox2.Text;
SaveFileDialog svdtmp = new SaveFileDialog();
if (svdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
File.WriteAllText(svdtmp.FileName, (tmp.ToString()));
MessageBox.Show("File Saved!");
}
catch (Exception ex)
{
MessageBox.Show("Cannot save text to file.");
}
}
}
}
}
最终解决方案:
using System;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
using System.Windows.Forms;
namespace RegExTool
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
}
static List<string> GetStrings(string input)
{
string pattern = @"Customer_ID: (?<CustomerId>\d+)[\r\n]+MMYY\: (?<ExpiryDate>\d{2}\/\d{2})[\r\n]+Txn_ID: (?<TxnId>\d+)";
List<string> strings = new List<string>();
foreach(Match match in Regex.Matches(input, pattern, RegexOptions.Multiline,TimeSpan.FromSeconds(1)))
{
strings.Add($"{match.Groups["CustomerId"]} | {match.Groups["ExpiryDate"]} | {match.Groups["TxnId"]}");
}
return strings;
}
private void openFile_Click(object sender, EventArgs e)
{
OpenFileDialog ofdtmp = new OpenFileDialog();
if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
using (StreamReader sr = new StreamReader(ofdtmp.FileName))
{
string input = sr.ReadToEnd();
richTextBox1.Clear();
richTextBox2.Clear();
richTextBox1.Text = input;
foreach (var value in GetStrings(input))
{
string text = value + System.Environment.NewLine;
richTextBox2.Text += text;
}
}
} catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
}
private void saveFile_Click(object sender, EventArgs e)
{
string tmp = richTextBox2.Text;
SaveFileDialog svdtmp = new SaveFileDialog();
if (svdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
File.WriteAllText(svdtmp.FileName, (tmp.ToString()));
MessageBox.Show("File Saved!");
}
catch (Exception ex)
{
MessageBox.Show("Cannot save text to file.");
}
}
}
}
}
我使用正则表达式的解决方案。处理
上的换行符
• Windows \r\n
• Linux \n
• MacOS \r
您可以 test/run 此代码 https://replit.com/@JomaCorpFX/SO70374465
您可以在 https://regex101.com/r/R7Q5bq/4
上检查正则表达式
代码
using System.Text.RegularExpressions;
using System.Collections.Generic;
using System;
using System.Linq;
public class Program
{
static List<string> GetStrings(string input)
{
string pattern = @"Customer_ID: (?<CustomerId>\d+)[\r\n]+MMYY\: (?<ExpiryDate>\d{2}\/\d{2})[\r\n]+Txn_ID: (?<TxnId>\d+)";
List<string> strings = new List<string>();
foreach(Match match in Regex.Matches(input, pattern, RegexOptions.Multiline,TimeSpan.FromSeconds(1)))
{
strings.Add($"{match.Groups["CustomerId"]} | {match.Groups["ExpiryDate"]} | {match.Groups["TxnId"]}");
}
return strings;
}
public static void Main(string[] args)
{
string input = @"#294448
ORDER_STATUS1098988 VALID
24.09.2021 05:17 AM
Customer_ID: 5524335312265537
MMYY: 08/23
Txn_ID: 74627
Name: Krystal Flowers
E-mail: abc@gmail.com
Phone: 9109153030
Address_original: 1656 W Alvarado dr, Pueblo West, Colorado, 81007, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294448
ORDER_STATUS1097728 VALID
24.09.2021 05:17 PM
Customer_ID: 5524331591654699
MMYY: 11/23
Txn_ID: 45617
Name: Allen E Prieto
E-mail: xyz@gmail.com
Phone: 5056994899
Address_original: 655 Ives Dairy Rd, Miami, Florida, 33179, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294445
ORDER_STATUS537099 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230087730234
MMYY: 09/25
Txn_ID: 24430
Name: tera casey
Phone: 7405863997
Address_original: 13705 Neptune Lane, New Concord, Ohio State, 43762, PE
ZIP_City_State_Country: 43762, New Concord, Ohio State, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS489401 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230054806983
MMYY: 07/24
Txn_ID: 13183
Name: Nancy Lambert
Address_original: 2600 loop drive, N, N, 44113, PE
ZIP_City_State_Country: 44113, N, N, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS437355 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230061412668
MMYY: 05/24
Txn_ID: 55474
Name: Sheets Sherry
E-mail: tyd@gmail.com
Phone: (567) 241-5074
Address_original: 37 Martha Avenue, Mansfield, Ohio, 44905, US
ZIP_City_State_Country: 44905, Mansfield, Ohio, UNITED STATES
Subtype: N/A";
foreach (var value in GetStrings(input))
{
Console.WriteLine(value);
}
Console.ReadLine();
}
}
输出
5524335312265537 | 08/23 | 74627
5524331591654699 | 11/23 | 45617
4118230087730234 | 09/25 | 24430
4118230054806983 | 07/24 | 13183
4118230061412668 | 05/24 | 55474
参考资料
Regex Match Method - Match(String, String, RegexOptions, TimeSpan)
我有一个包含以下数据的文本文件:
#294448
ORDER_STATUS1098988 VALID
24.09.2021 05:17 AM
Customer_ID: 5524335312265537
MMYY: 08/23
Txn_ID: 74627
Name: Krystal Flowers
E-mail: abc@gmail.com
Phone: 9109153030
Address_original: 1656 W Alvarado dr, Pueblo West, Colorado, 81007, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294448
ORDER_STATUS1097728 VALID
24.09.2021 05:17 AM
Customer_ID: 5524331591654699
MMYY: 11/23
Txn_ID: 45617
Name: Allen E Prieto
E-mail: xyz@gmail.com
Phone: 5056994899
Address_original: 655 Ives Dairy Rd, Miami, Florida, 33179, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294445
ORDER_STATUS537099 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230087730234
MMYY: 09/25
Txn_ID: 24430
Name: tera casey
Phone: 7405863997
Address_original: 13705 Neptune Lane, New Concord, Ohio State, 43762, PE
ZIP_City_State_Country: 43762, New Concord, Ohio State, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS489401 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230054806983
MMYY: 07/24
Txn_ID: 13183
Name: Nancy Lambert
Address_original: 2600 loop drive, N, N, 44113, PE
ZIP_City_State_Country: 44113, N, N, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS437355 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230061412668
MMYY: 05/24
Txn_ID: 55474
Name: Sheets Sherry
E-mail: tyd@gmail.com
Phone: (567) 241-5074
Address_original: 37 Martha Avenue, Mansfield, Ohio, 44905, US
ZIP_City_State_Country: 44905, Mansfield, Ohio, UNITED STATES
Subtype: N/A
数据需要以某种方式组织,以便 Customer_ID、MMYY 和 Txn_ID 仅显示在由 | 分隔的单行中象征。应忽略此文本文件中的所有其他内容。
示例:
5524335312265537 | 08/23 | 24430
5524331591654699 | 11/23 | 45617
4118230087730234 | 09/25 | 24430
4118230054806983 | 07/24 | 13183
4118230061412668 | 05/24 | 55474
这是我尝试过的方法,但我收到“无效文件!”打开文本文件后的消息。
private void openFile_Click(object sender, EventArgs e)
{
OpenFileDialog ofdtmp = new OpenFileDialog();
if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
using (StreamReader sr = File.OpenText(ofdtmp.FileName))
{
while (sr.Peek() >= 0)
{
string line = sr.ReadLine();
line = line.Trim();
if (line.ToString() == "" || line.Contains("#") || line.Contains("ORDER_STATUS") || /*Exclude Date & Time*/ line.Contains(".") || line.Contains("Name:") || line.Contains("E-mail:") || line.Contains("Phone:") || line.Contains("Address_original:") || line.Contains("ZIP_City_State_Country:") || line.Contains("Type:") || line.Contains("Subtype:"))
continue; //skip
if (line.Contains("CustomerID: "))
{
string customID = line.Substring(12, 29).Trim();
continue;
}
if (line.Contains("MMYY: "))
{
string mmyy = line.Substring(6, 11).Trim();
continue;
}
if (line.Contains("Txn_ID: "))
{
string txnID = line.Substring(10, 16).Trim();
continue;
}
}
richTextBox.Text = sr.ToString();
}
}
catch
{
MessageBox.Show("Invalid file!");
}
}
}
我在类似的在线帖子中查找了替代解决方案,应用正则表达式似乎是正确的方法。困难在于弄清楚如何跳过文本文件中所有不需要的字符和符号,只提取所需的数据。这个问题的最佳解决方案是什么?
解决方案更新:
using System;
using System.IO;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Windows.Forms;
namespace RegExTool
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
}
private void openFile_Click(object sender, EventArgs e)
{
OpenFileDialog ofdtmp = new OpenFileDialog();
if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
using (StreamReader sr = new StreamReader(ofdtmp.FileName))
{
string data = sr.ReadToEnd();
richTextBox1.Clear();
richTextBox2.Clear();
richTextBox1.Text = data;
string pattern = @"(?<=CustomerID:).*|(?<=MMYY:).*|(?<=Txn_ID:).*";
var en = Regex.Matches(data, pattern, RegexOptions.IgnoreCase).GetEnumerator();
while (en.MoveNext())
{
var ci = en.Current;
if (!en.MoveNext())
break;
var di = en.Current;
if (!en.MoveNext())
break;
var ti = en.Current;
string text = ($"{ci}|{di}|{ti}") + System.Environment.NewLine;
richTextBox2.Text += text.Replace(" ", string.Empty);
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
}
private void saveFile_Click(object sender, EventArgs e)
{
string tmp = richTextBox2.Text;
SaveFileDialog svdtmp = new SaveFileDialog();
if (svdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
File.WriteAllText(svdtmp.FileName, (tmp.ToString()));
MessageBox.Show("File Saved!");
}
catch (Exception ex)
{
MessageBox.Show("Cannot save text to file.");
}
}
}
}
}
最终解决方案:
using System;
using System.Collections.Generic;
using System.IO;
using System.Text.RegularExpressions;
using System.Windows.Forms;
namespace RegExTool
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void Form1_Load(object sender, EventArgs e)
{
}
static List<string> GetStrings(string input)
{
string pattern = @"Customer_ID: (?<CustomerId>\d+)[\r\n]+MMYY\: (?<ExpiryDate>\d{2}\/\d{2})[\r\n]+Txn_ID: (?<TxnId>\d+)";
List<string> strings = new List<string>();
foreach(Match match in Regex.Matches(input, pattern, RegexOptions.Multiline,TimeSpan.FromSeconds(1)))
{
strings.Add($"{match.Groups["CustomerId"]} | {match.Groups["ExpiryDate"]} | {match.Groups["TxnId"]}");
}
return strings;
}
private void openFile_Click(object sender, EventArgs e)
{
OpenFileDialog ofdtmp = new OpenFileDialog();
if (ofdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
using (StreamReader sr = new StreamReader(ofdtmp.FileName))
{
string input = sr.ReadToEnd();
richTextBox1.Clear();
richTextBox2.Clear();
richTextBox1.Text = input;
foreach (var value in GetStrings(input))
{
string text = value + System.Environment.NewLine;
richTextBox2.Text += text;
}
}
} catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
}
}
private void saveFile_Click(object sender, EventArgs e)
{
string tmp = richTextBox2.Text;
SaveFileDialog svdtmp = new SaveFileDialog();
if (svdtmp.ShowDialog() == System.Windows.Forms.DialogResult.OK)
{
try
{
File.WriteAllText(svdtmp.FileName, (tmp.ToString()));
MessageBox.Show("File Saved!");
}
catch (Exception ex)
{
MessageBox.Show("Cannot save text to file.");
}
}
}
}
}
我使用正则表达式的解决方案。处理
上的换行符
• Windows \r\n
• Linux \n
• MacOS \r
您可以 test/run 此代码 https://replit.com/@JomaCorpFX/SO70374465
您可以在 https://regex101.com/r/R7Q5bq/4
上检查正则表达式代码
using System.Text.RegularExpressions;
using System.Collections.Generic;
using System;
using System.Linq;
public class Program
{
static List<string> GetStrings(string input)
{
string pattern = @"Customer_ID: (?<CustomerId>\d+)[\r\n]+MMYY\: (?<ExpiryDate>\d{2}\/\d{2})[\r\n]+Txn_ID: (?<TxnId>\d+)";
List<string> strings = new List<string>();
foreach(Match match in Regex.Matches(input, pattern, RegexOptions.Multiline,TimeSpan.FromSeconds(1)))
{
strings.Add($"{match.Groups["CustomerId"]} | {match.Groups["ExpiryDate"]} | {match.Groups["TxnId"]}");
}
return strings;
}
public static void Main(string[] args)
{
string input = @"#294448
ORDER_STATUS1098988 VALID
24.09.2021 05:17 AM
Customer_ID: 5524335312265537
MMYY: 08/23
Txn_ID: 74627
Name: Krystal Flowers
E-mail: abc@gmail.com
Phone: 9109153030
Address_original: 1656 W Alvarado dr, Pueblo West, Colorado, 81007, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294448
ORDER_STATUS1097728 VALID
24.09.2021 05:17 PM
Customer_ID: 5524331591654699
MMYY: 11/23
Txn_ID: 45617
Name: Allen E Prieto
E-mail: xyz@gmail.com
Phone: 5056994899
Address_original: 655 Ives Dairy Rd, Miami, Florida, 33179, United States
ZIP_City_State_Country: -
Type: -
Subtype: -
#294445
ORDER_STATUS537099 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230087730234
MMYY: 09/25
Txn_ID: 24430
Name: tera casey
Phone: 7405863997
Address_original: 13705 Neptune Lane, New Concord, Ohio State, 43762, PE
ZIP_City_State_Country: 43762, New Concord, Ohio State, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS489401 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230054806983
MMYY: 07/24
Txn_ID: 13183
Name: Nancy Lambert
Address_original: 2600 loop drive, N, N, 44113, PE
ZIP_City_State_Country: 44113, N, N, UNITED STATES
Subtype: N/A
#294445
ORDER_STATUS437355 VALID
24.09.2021 05:01 AM
Customer_ID: 4118230061412668
MMYY: 05/24
Txn_ID: 55474
Name: Sheets Sherry
E-mail: tyd@gmail.com
Phone: (567) 241-5074
Address_original: 37 Martha Avenue, Mansfield, Ohio, 44905, US
ZIP_City_State_Country: 44905, Mansfield, Ohio, UNITED STATES
Subtype: N/A";
foreach (var value in GetStrings(input))
{
Console.WriteLine(value);
}
Console.ReadLine();
}
}
输出
5524335312265537 | 08/23 | 74627
5524331591654699 | 11/23 | 45617
4118230087730234 | 09/25 | 24430
4118230054806983 | 07/24 | 13183
4118230061412668 | 05/24 | 55474
参考资料
Regex Match Method - Match(String, String, RegexOptions, TimeSpan)