使用 Zig 进行简单的日志分析
Simple log analysis with Zig
受到https://benhoyt.com/writings/count-words/的启发,我尝试了用多种语言重写内部日志分析脚本(我不会像文章中那样深入!)。
在 Go(我自己)和 Rust(在 SO 的一些帮助下)之后,我目前被 Zig 困住了。我或多或少地理解了 https://github.com/benhoyt/countwords/blob/master/simple.zig 但仍然很难按照这些思路翻译我的原文...值得注意的是,使用带有元组键的哈希,处理解析和打印中的月份名称...
Python中的原始脚本:
import sys
months = { "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6,
"Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12 }
months_r = { v:k for k,v in months.items() }
totals = {}
for line in sys.stdin:
if "redis" in line and "Partial" in line:
f1, f2 = line.split()[:2]
w = (months[f1], int(f2))
totals[w] = totals.get(w, 0) + 1
for k in sorted(totals.keys()):
print(months_r[k[0]], k[1], totals[k])
有人能熟练使用最近的 Zig 吗?
非常感谢!
Solution 来自 Zig 论坛
const std = @import("std");
const Key = struct { month: u4, day: u5 };
fn keyHash(key: Key) u64 {
return @as(u64, key.month) << 32 | @as(u64, key.day);
}
const Totals = std.HashMap(
Key,
usize,
keyHash,
std.hash_map.getAutoEqlFn(Key),
std.hash_map.default_max_load_percentage,
);
const Item = struct { key: Key, count: usize };
fn itemSort(context: void, lhs: Item, rhs: Item) bool {
return keyHash(lhs.key) < keyHash(rhs.key);
}
// zig fmt: off
const months = std.ComptimeStringMap(u4, .{
.{ "Jan", 1 }, .{ "Feb", 2 }, .{ "Mar", 3 },
.{ "Apr", 4 }, .{ "May", 5 }, .{ "Jun", 6 },
.{ "Jul", 7 }, .{ "Aug", 8 }, .{ "Sep", 9 },
.{ "Oct", 10 }, .{ "Nov", 11 }, .{ "Dec", 12 },
});
const months_r = [_][]const u8{
"(padding)",
"Jan", "Feb", "Mar",
"Apr", "May", "Jun",
"Jul", "Aug", "Sep",
"Oct", "Nov", "Dec",
};
// zig fmt: on
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer if (gpa.deinit()) std.log.err("memory leak detected", .{});
const allocator = &gpa.allocator;
var totals = Totals.init(allocator);
defer totals.deinit();
const stdin = std.io.bufferedReader(std.io.getStdIn().reader()).reader();
var buf: [4096]u8 = undefined;
while (try stdin.readUntilDelimiterOrEof(&buf, '\n')) |line| {
if (std.mem.indexOf(u8, line, "redis") == null or std.mem.indexOf(u8, line, "Partial") == null)
continue;
var it = std.mem.tokenize(line, &std.ascii.spaces);
const month = months.get(it.next().?).?;
const day = try std.fmt.parseUnsigned(u5, it.next().?, 10);
const res = try totals.getOrPut(.{ .month = month, .day = day });
if (res.found_existing)
res.entry.value += 1
else
res.entry.value = 1;
}
var stdout = std.io.bufferedWriter(std.io.getStdOut().writer());
defer stdout.flush() catch std.log.err("stdout flushing failed", .{});
const out = stdout.writer();
var items = try allocator.alloc(Item, totals.count());
defer allocator.free(items);
{
var it = totals.iterator();
var i: usize = 0;
while (it.next()) |kv| : (i += 1) {
items[i] = .{ .key = kv.key, .count = kv.value };
}
}
std.sort.sort(Item, items, {}, itemSort);
for (items) |it| {
try out.print("{s} {d} {d}\n", .{ months_r[it.key.month], it.key.day, it.count });
}
}
受到https://benhoyt.com/writings/count-words/的启发,我尝试了用多种语言重写内部日志分析脚本(我不会像文章中那样深入!)。
在 Go(我自己)和 Rust(在 SO 的一些帮助下)之后,我目前被 Zig 困住了。我或多或少地理解了 https://github.com/benhoyt/countwords/blob/master/simple.zig 但仍然很难按照这些思路翻译我的原文...值得注意的是,使用带有元组键的哈希,处理解析和打印中的月份名称...
Python中的原始脚本:
import sys
months = { "Jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6,
"Jul": 7, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12 }
months_r = { v:k for k,v in months.items() }
totals = {}
for line in sys.stdin:
if "redis" in line and "Partial" in line:
f1, f2 = line.split()[:2]
w = (months[f1], int(f2))
totals[w] = totals.get(w, 0) + 1
for k in sorted(totals.keys()):
print(months_r[k[0]], k[1], totals[k])
有人能熟练使用最近的 Zig 吗?
非常感谢!
Solution 来自 Zig 论坛
const std = @import("std");
const Key = struct { month: u4, day: u5 };
fn keyHash(key: Key) u64 {
return @as(u64, key.month) << 32 | @as(u64, key.day);
}
const Totals = std.HashMap(
Key,
usize,
keyHash,
std.hash_map.getAutoEqlFn(Key),
std.hash_map.default_max_load_percentage,
);
const Item = struct { key: Key, count: usize };
fn itemSort(context: void, lhs: Item, rhs: Item) bool {
return keyHash(lhs.key) < keyHash(rhs.key);
}
// zig fmt: off
const months = std.ComptimeStringMap(u4, .{
.{ "Jan", 1 }, .{ "Feb", 2 }, .{ "Mar", 3 },
.{ "Apr", 4 }, .{ "May", 5 }, .{ "Jun", 6 },
.{ "Jul", 7 }, .{ "Aug", 8 }, .{ "Sep", 9 },
.{ "Oct", 10 }, .{ "Nov", 11 }, .{ "Dec", 12 },
});
const months_r = [_][]const u8{
"(padding)",
"Jan", "Feb", "Mar",
"Apr", "May", "Jun",
"Jul", "Aug", "Sep",
"Oct", "Nov", "Dec",
};
// zig fmt: on
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer if (gpa.deinit()) std.log.err("memory leak detected", .{});
const allocator = &gpa.allocator;
var totals = Totals.init(allocator);
defer totals.deinit();
const stdin = std.io.bufferedReader(std.io.getStdIn().reader()).reader();
var buf: [4096]u8 = undefined;
while (try stdin.readUntilDelimiterOrEof(&buf, '\n')) |line| {
if (std.mem.indexOf(u8, line, "redis") == null or std.mem.indexOf(u8, line, "Partial") == null)
continue;
var it = std.mem.tokenize(line, &std.ascii.spaces);
const month = months.get(it.next().?).?;
const day = try std.fmt.parseUnsigned(u5, it.next().?, 10);
const res = try totals.getOrPut(.{ .month = month, .day = day });
if (res.found_existing)
res.entry.value += 1
else
res.entry.value = 1;
}
var stdout = std.io.bufferedWriter(std.io.getStdOut().writer());
defer stdout.flush() catch std.log.err("stdout flushing failed", .{});
const out = stdout.writer();
var items = try allocator.alloc(Item, totals.count());
defer allocator.free(items);
{
var it = totals.iterator();
var i: usize = 0;
while (it.next()) |kv| : (i += 1) {
items[i] = .{ .key = kv.key, .count = kv.value };
}
}
std.sort.sort(Item, items, {}, itemSort);
for (items) |it| {
try out.print("{s} {d} {d}\n", .{ months_r[it.key.month], it.key.day, it.count });
}
}