在 C# 中比较文本文件并删除重复行

Compare text files in C# and remove duplicate lines

1.txt:

始发地、目的地、日期时间、价格

YYZ,YTC,2016-04-01 12:30,0
YYZ,YTC,2016-04-01 12:30,0
LKC,LKP,2016-04-01 12:30,0

2.txt:

始发地|目的地|日期时间|价格

YYZ|YTC|2016-04-01 12:30|0
AMV|YRk|2016-06-01 12:30|0
LKC|LKP|2016-12-01 12:30|0

我有两个带有“,”和“|”的文本文件作为分隔符,我想在 C# 中创建一个控制台应用程序,它在我从命令提示符传递起始位置和目标位置时读取这两个文件。

搜索时,我想忽略重复行,我想按价格顺序显示结果。

输出应该是{ origination } -> { destination } -> datetime -> price

需要帮助如何执行。

我不是 100% 清楚你的程序的输出应该是什么,所以我将把这部分实现留给你。我的策略是使用一个构造函数方法,该方法接受一个字符串(您将从文件中读取)和一个定界符(因为它会变化)并使用它来创建您可以操作的对象(例如添加到哈希集等)。

PriceObject.cs

using System;
using System.Globalization;

namespace ConsoleApplication1
{
class PriceObject
{
    public string origination { get; set; }
    public string destination { get; set; }
    public DateTime time { get; set; }
    public decimal price { get; set; }



    public PriceObject(string inputLine, char delimiter)
    {
        string[] parsed = inputLine.Split(new char[] { delimiter }, 4);
        origination = parsed[0];
        destination = parsed[1];
        time = DateTime.ParseExact(parsed[2], "yyyy-MM-dd HH:mm", CultureInfo.InvariantCulture);
        price = Decimal.Parse(parsed[3], NumberStyles.Currency, new CultureInfo("en-US"));
    }


    public override bool Equals(object obj)
    {
        var item = obj as PriceObject;
        return origination.Equals(item.origination) &&
            destination.Equals(item.destination) &&
            time.Equals(item.time) &&
            price.Equals(item.price);
    }

    public override int GetHashCode()
    {

        unchecked
        {
            var result = 17;
            result = (result * 23) + origination.GetHashCode();
            result = (result * 23) + destination.GetHashCode();
            result = (result * 23) + time.GetHashCode();
            result = (result * 23) + price.GetHashCode();
            return result;
        }
    }


}
}

Program.cs

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;

namespace ConsoleApplication1
{
class Program
{
    static void Main(string[] args)
    {
        HashSet<PriceObject> list1 = new HashSet<PriceObject>();
        HashSet<PriceObject> list2 = new HashSet<PriceObject>();

        using (StreamReader reader = File.OpenText(args[0]))
        {
            string line = reader.ReadLine(); // this will remove the header row

            while (!reader.EndOfStream)
            {
                line = reader.ReadLine();
                if (String.IsNullOrEmpty(line))
                    continue;
                // add each line to our list
                list1.Add(new PriceObject(line, ','));
            }

        }

        using (StreamReader reader = File.OpenText(args[1]))
        {
            string line = reader.ReadLine(); // this will remove the header row

            while (!reader.EndOfStream)
            {
                line = reader.ReadLine();
                if (String.IsNullOrEmpty(line))
                    continue;
                // add each line to our list
                list2.Add(new PriceObject(line, '|'));
            }

        }

        // merge the two hash sets, order by price
        list1.UnionWith(list2);
        List<PriceObject> output = list1.ToList();

        output.OrderByDescending(x => x.price).ToList();

        // display output here, e.g. define your own ToString method, etc
        foreach (var item in output)
        {
            Console.WriteLine(item.ToString());
        }

        Console.ReadLine();
    }
}
}

这是一个适用于您的示例文件的简单解决方案。它不会检查文件格式是否错误。

using System;
using System.Collections.Generic;

class Program
{
    class entry
    {
        public string origin;
        public string destination;
        public DateTime time;
        public double price;
    }

    static void Main(string[] args)
    {
        List<entry> data = new List<entry>();

        //parse the input files and add the data to a list
        ParseFile(data, args[0], ',');
        ParseFile(data, args[1], '|');

        //sort the list (by price first)
        data.Sort((a, b) =>
        {
            if (a.price != b.price)
                return a.price > b.price ? 1 : -1;
            else if (a.origin != b.origin)
                return string.Compare(a.origin, b.origin);
            else if (a.destination != b.destination)
                return string.Compare(a.destination, b.destination);
            else
                return DateTime.Compare(a.time, b.time);
        });

        //remove duplicates (list must be sorted for this to work)
        int i = 1;
        while (i < data.Count)
        {
            if (data[i].origin == data[i - 1].origin
                && data[i].destination == data[i - 1].destination
                && data[i].time == data[i - 1].time
                && data[i].price == data[i - 1].price)
                data.RemoveAt(i);
            else
                i++;
        }

        //print the results
        for (i = 0; i < data.Count; i++)
            Console.WriteLine("{0}->{1}->{2:yyyy-MM-dd HH:mm}->",
                data[i].origin, data[i].destination, data[i].time, data[i].price);

        Console.ReadLine();
    }

    private static void ParseFile(List<entry> data, string filename, char separator)
    {
        using (System.IO.FileStream fs = System.IO.File.Open(filename, System.IO.FileMode.Open))
        using (System.IO.StreamReader reader = new System.IO.StreamReader(fs))
            while (!reader.EndOfStream)
            {
                string[] line = reader.ReadLine().Split(separator);
                if (line.Length == 4)
                {
                    entry newitem = new entry();
                    newitem.origin = line[0];
                    newitem.destination = line[1];
                    newitem.time = DateTime.Parse(line[2]);
                    newitem.price = double.Parse(line[3].Substring(line[3].IndexOf('$') + 1));
                    data.Add(newitem);
                }
            }
    }
}