使用 Zig 进行简单的日志分析

Simple log analysis with Zig

受到https://benhoyt.com/writings/count-words/的启发,我尝试了用多种语言重写内部日志分析脚本(我不会像文章中那样深入!)。

在 Go(我自己)和 Rust(在 SO 的一些帮助下)之后,我目前被 Zig 困住了。我或多或少地理解了 https://github.com/benhoyt/countwords/blob/master/simple.zig 但仍然很难按照这些思路翻译我的原文...值得注意的是,使用带有元组键的哈希,处理解析和打印中的月份名称...

Python中的原始脚本:

import sys

months = { "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6,
           "Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12 }

months_r = { v:k for k,v in months.items() }

totals = {}

for line in sys.stdin:
    if "redis" in line and "Partial" in line:
        f1, f2 = line.split()[:2]
        w = (months[f1], int(f2))
        totals[w] = totals.get(w, 0) + 1

for k in sorted(totals.keys()):
    print(months_r[k[0]], k[1], totals[k])

有人能熟练使用最近的 Zig 吗?

非常感谢!

Solution 来自 Zig 论坛

const std = @import("std");

const Key = struct { month: u4, day: u5 };

fn keyHash(key: Key) u64 {
    return @as(u64, key.month) << 32 | @as(u64, key.day);
}

const Totals = std.HashMap(
    Key,
    usize,
    keyHash,
    std.hash_map.getAutoEqlFn(Key),
    std.hash_map.default_max_load_percentage,
);

const Item = struct { key: Key, count: usize };

fn itemSort(context: void, lhs: Item, rhs: Item) bool {
    return keyHash(lhs.key) < keyHash(rhs.key);
}

// zig fmt: off
const months = std.ComptimeStringMap(u4, .{
    .{ "Jan",  1 }, .{ "Feb",  2 }, .{ "Mar",  3 },
    .{ "Apr",  4 }, .{ "May",  5 }, .{ "Jun",  6 },
    .{ "Jul",  7 }, .{ "Aug",  8 }, .{ "Sep",  9 },
    .{ "Oct", 10 }, .{ "Nov", 11 }, .{ "Dec", 12 },
});

const months_r = [_][]const u8{
    "(padding)",
    "Jan", "Feb", "Mar",
    "Apr", "May", "Jun",
    "Jul", "Aug", "Sep",
    "Oct", "Nov", "Dec",
};
// zig fmt: on

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer if (gpa.deinit()) std.log.err("memory leak detected", .{});
    const allocator = &gpa.allocator;

    var totals = Totals.init(allocator);
    defer totals.deinit();

    const stdin = std.io.bufferedReader(std.io.getStdIn().reader()).reader();
    var buf: [4096]u8 = undefined;
    while (try stdin.readUntilDelimiterOrEof(&buf, '\n')) |line| {
        if (std.mem.indexOf(u8, line, "redis") == null or std.mem.indexOf(u8, line, "Partial") == null)
            continue;

        var it = std.mem.tokenize(line, &std.ascii.spaces);
        const month = months.get(it.next().?).?;
        const day = try std.fmt.parseUnsigned(u5, it.next().?, 10);

        const res = try totals.getOrPut(.{ .month = month, .day = day });
        if (res.found_existing)
            res.entry.value += 1
        else
            res.entry.value = 1;
    }

    var stdout = std.io.bufferedWriter(std.io.getStdOut().writer());
    defer stdout.flush() catch std.log.err("stdout flushing failed", .{});
    const out = stdout.writer();

    var items = try allocator.alloc(Item, totals.count());
    defer allocator.free(items);

    {
        var it = totals.iterator();
        var i: usize = 0;
        while (it.next()) |kv| : (i += 1) {
            items[i] = .{ .key = kv.key, .count = kv.value };
        }
    }

    std.sort.sort(Item, items, {}, itemSort);

    for (items) |it| {
        try out.print("{s} {d} {d}\n", .{ months_r[it.key.month], it.key.day, it.count });
    }
}