C# - 从日志文件中提取部分

C# - Extract Parts from Log File

我有一个日志文件,其中包含错误,我想尝试将消息的某些部分提取到另一个文件中。

我在下面粘贴了一个日志示例,我希望在日志中搜索“LEVEL: Error”并提取此行以及作为此错误一部分的“MESSAGE :”。

“MESSAGE :”的问题在于它进入了下一行。

2020-07-15-05.26.22.685000+060 XXXXXXXXXXX          LEVEL: Error
PID     : XXXX                 TID : XXXXX          PROC : XXXXXXXXX
INSTANCE: XXX                  NODE : XXX           DB   : XXXXXXXXX
APPHDL  : XXXXXXX              APPID: XXXXXXXXXXXXXXXXXXXX
AUTHID  : XXXXXXXX             HOSTNAME: XXXXXXXXXXXXX
EDUID   : XXXXX                EDUNAME: XXXXXXXXXXXXXXXXXXXXXXXXXXXXX
FUNCTION: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
MESSAGE : The error message is displayed here, however, it is not
          on the 1 line making it harder for me to work out how to
          extract it...
DATA #1 : XXXXXXXXXXXXXXXXXXXX


2020-07-15-05.26.22.685000+060 XXXXXXXXXXX          LEVEL: INFO
PID     : XXXX                 TID : XXXXX          PROC : XXXXXXXXX
INSTANCE: XXX                  NODE : XXX           DB   : XXXXXXXXX
APPHDL  : XXXXXXX              APPID: XXXXXXXXXXXXXXXXXXXX
AUTHID  : XXXXXXXX             HOSTNAME: XXXXXXXXXXXXX
EDUID   : XXXXX                EDUNAME: XXXXXXXXXXXXXXXXXXXXXXXXXXXXX
FUNCTION: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
MESSAGE : No error message, this is just a
          test line for the question on
          Whosebug.
DATA #1 : XXXXXXXXXXXXXXXXXXXX

输出文件的示例:

2020-07-15-05.26.22.685000+060 XXXXXXXXXXX          LEVEL: Error
MESSAGE : The error message is displayed here, however, it is not
          on the 1 line making it harder for me to work out how to
          extract it...

尝试以下操作:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO ;
using System.Text.RegularExpressions;
using System.Globalization;
namespace ConsoleApplication11
{
    class Program
    {
        const string FILENAME = @"c:\temp\test.txt";
        static void Main(string[] args)
        {
            StreamReader reader = new StreamReader(FILENAME);
            string line = "";
            List<Entry> entries = new List<Entry>();
            Entry entry = null; ;
            string pattern = @"(?'name'[^:]+):\s+(?'value'[^\s]+)";
            string patternDate = @"^(?'date'[^\s]+)([^:]+:\s+)(?'level'\w+)";
            Boolean messageTrue = false;
            while ((line = reader.ReadLine()) != null)
            {
                if(line.Trim().Length > 0)
                {
                    if (line.Contains("LEVEL:"))
                    {
                        entry = new Entry();
                        entries.Add(entry);
                        Match match = Regex.Match(line, patternDate);
                        entry.date = DateTime.ParseExact(match.Groups["date"].Value, "yyyy-MM-dd-HH.mm.ss.ffffffzz0", CultureInfo.InvariantCulture);
                        entry.level = match.Groups["level"].Value.Trim();
                    }
                    else
                    {
                        if (messageTrue && line.StartsWith(" "))
                        {
                            entry.message += " " + line.Trim();
                        }
                        else
                        {
                            MatchCollection matches = Regex.Matches(line, pattern);
                            foreach (Match match in matches.Cast<Match>())
                            {
                                string name = match.Groups["name"].Value.Trim();
                                string value = match.Groups["value"].Value.Trim();

                                if (name != "MESSAGE") messageTrue = false;

                                switch (name)
                                {
                                    case "PID" :
                                        entry.pid = value;
                                        break;
                                    case "TID":
                                        entry.tid = value;
                                        break;
                                    case "PROC":
                                        entry.proc = value;
                                        break;
                                    case "INSTANCE":
                                        entry.instance = value;
                                        break;
                                    case "NODE":
                                        entry.node = value;
                                        break;
                                    case "DB":
                                        entry.db = value;
                                        break;
                                    case "APPHDL":
                                        entry.apphdl = value;
                                        break;
                                    case "APPID":
                                        entry.appid = value;
                                        break;
                                    case "AUTHID":
                                        entry.authid = value;
                                        break;
                                    case "HOSTNAME":
                                        entry.hostname = value;
                                        break;
                                    case "EDUID":
                                        entry.eduid = value;
                                        break;
                                    case "EDUNAME":
                                        entry.eduname = value;
                                        break;
                                    case "FUNCTION":
                                        entry.function = value;
                                        break;
                                    case "MESSAGE":
                                        string[] splitLine = line.Split(new char[] { ':' });
                                        entry.message = splitLine[1].Trim();
                                        messageTrue = true;
                                        break;
                                    case "DATA #1":
                                        entry.data = value;
                                        break;
                                    default :
                                        break;
                                }

                            }
                        }

                    }


                }
            }

        }
    }
    public class Entry
    {
        public DateTime date { get; set; }
        public string level { get; set; }
        public string pid { get; set; }
        public string tid { get; set; }
        public string proc { get; set; }
        public string instance { get; set; }
        public string node { get; set; }
        public string db { get; set; }
        public string apphdl { get; set; }
        public string appid { get; set; }
        public string authid { get; set; }
        public string hostname { get; set; }
        public string eduid { get; set; }
        public string eduname { get; set; }
        public string function { get; set; }
        public string message { get; set; }
        public string data { get; set; }
    }

}

如果文件具有相同的基本结构,使用正则表达式按日期模式拆分每个日志,然后使用 SubString()IndexOf() 提取所需的字符串。

static void Main(string[] args)
{
    // Split each log by date pattern with regex
    Regex reg = new Regex(@"\d{4}-\d{2}-\d{2}");
    var splitted = reg.Split(logfile);

    foreach (var log in splitted)
    {
        if (log.Contains("MESSAGE :"))
        {
            string Summary = log.Substring(0, logfile.IndexOf("\n"));

            // locate the message safetly
            int indexOfMessage = log.IndexOf("MESSAGE :");
            int indexofData = log.IndexOf("DATA");
            string message = log.Substring(indexOfMessage, indexofData - indexOfMessage);
            // Concatenate date and error level with the log message
            Summary = Summary + Environment.NewLine + message;
            Console.WriteLine(Summary);
        }
    }
  
    Console.ReadLine();
}

输出:

-05.26.22.685000+060 XXXXXXXXXXX          LEVEL: Error

MESSAGE : The error message is displayed here, however, it is not
                                       on the 1 line making it harder for me to work out how to
                                       extract it...

-05.26.22.685000+060 XXXXXXXXXXX          LEVEL: INFO

MESSAGE : No error message, this is just a
                                       test line for the question on
                                       Whosebug.