使用 awk 检查其他文件中的特定组合
Use awk to check a specific combination in other files
我有 3 个文件
base.txt
12345 6 78
13579 2 46
24680 1 35
123451 266 78
135792 6572 46
246803 12587 35
1stcheck.txt
Some odded stuffs
AB 12345/6/78 Fx00
BC 13579/2/47 0xFF
CD 24680/1/35 5x88
AB 123451/266_10/78 Fx00 #10 is mod(266,256)
BC 135792/6572_172/46 0xFF #172 is mod(6572,256)
CD 246803/12587_43/35 5x88 #43 is mod(12587,256)
There may be some other odded stuffs
2ndcheck.txt
12345u_6_78.dat
13579u_2_46.dat
24680u_0_35.dat
123451u_10_78.dat #10 is mod(266,256)
135792u_172_46.dat #172 is mod(6572,256)
246803u_43_35.dat #43 is mod(12587,256)
1stcheck.txt 和 2ndcheck.txt 中的信息只是 base.txt 中应用的 template/format
的组合
我想要
report.txt
12345 6 78 passed passed
| |
(12345/6/78) (12345u_6_78)
13579 2 46 failed passed
24680 1 35 passed failed
123451 266 78 passed passed
135792 6572 46 passed passed
246803 12587 35 passed passed
自
起请帮助考虑性能
- base.txt,2ndcheck.txt ~ 8MB-12MB
- 1stcheck.txt~70MB
非常感谢
您必须确定这是否是内存高效的:它确实必须在打印 table.
之前将所有文件中的数据存储在数组中
需要 GNU awk
gawk '
# base file: store keys (and line numbers for output ordering)
FILENAME == ARGV[1] {key[[=10=]] = FNR; next}
# 1st check: if key appears in base, store result as pass
FILENAME == ARGV[2] {
k =
gsub(/\//, " ", k)
if (k in key) pass1[k] = 1
}
# 2nd check: if key appears in base, store result as pass
FILENAME == ARGV[3] {
if ( match([=10=], /([0-9]+)._([0-9]+)_([0-9]+)\.dat/, m) ) {
k = m[1] " " m[2] " " m[3]
if (k in key) pass2[k] = 1
}
next
}
# print the result table
END {
PROCINFO["sorted_in"] = "@val_num_asc" # traverse array by line number
for (k in key) {
printf "%s\t%s\t%s\n", k \
, (k in pass1 ? "passed" : "failed") \
, (k in pass2 ? "passed" : "failed")
}
}
' base.txt 1stcheck.txt 2ndcheck.txt
12345 6 78 passed passed
13579 2 46 failed passed
24680 1 35 passed failed
根据@glenn jackman的建议,我可以解决我的问题
gawk '
# Store key for 1st check
FILENAME == ARGV[1] {
k =
gsub(/\//, " ", k)
key_first[k];next
}
# Store key for 2nd check
FILENAME == ARGV[2] {
if ( match([=10=], /([0-9]+)._([0-9]+)_([0-9]+)\.dat/, m) ) {
k = m[1] " " m[2] " " m[3]
key_second[k];
}
next
}
# base file: do check on both 1st and 2nd check
FILENAME == ARGV[3] {
if(>256) {
first= " " "_" (%256) " "
}
else {
first= " " " "
}
second= " " %256 " "
if (first in key_first) pass1[[=10=]] = 1
if (second in key_second) pass2[[=10=]] = 1
key[[=10=]]= FNR; next
}
# print the result table
END {
PROCINFO["sorted_in"] = "@val_num_asc" # traverse array by line number
for (k in key) {
printf "%s\t%s\t%s\n", k \
, (k in pass1 ? "sic_passed" : "sic_failed") \
, (k in pass2 ? "gd_passed" : "gd_failed")
}
}
' 1stcheck.txt 2ndcheck.txt base.txt
我有 3 个文件
base.txt
12345 6 78
13579 2 46
24680 1 35
123451 266 78
135792 6572 46
246803 12587 35
1stcheck.txt
Some odded stuffs
AB 12345/6/78 Fx00
BC 13579/2/47 0xFF
CD 24680/1/35 5x88
AB 123451/266_10/78 Fx00 #10 is mod(266,256)
BC 135792/6572_172/46 0xFF #172 is mod(6572,256)
CD 246803/12587_43/35 5x88 #43 is mod(12587,256)
There may be some other odded stuffs
2ndcheck.txt
12345u_6_78.dat
13579u_2_46.dat
24680u_0_35.dat
123451u_10_78.dat #10 is mod(266,256)
135792u_172_46.dat #172 is mod(6572,256)
246803u_43_35.dat #43 is mod(12587,256)
1stcheck.txt 和 2ndcheck.txt 中的信息只是 base.txt 中应用的 template/format
的组合我想要
report.txt
12345 6 78 passed passed
| |
(12345/6/78) (12345u_6_78)
13579 2 46 failed passed
24680 1 35 passed failed
123451 266 78 passed passed
135792 6572 46 passed passed
246803 12587 35 passed passed
自
起请帮助考虑性能- base.txt,2ndcheck.txt ~ 8MB-12MB
- 1stcheck.txt~70MB
非常感谢
您必须确定这是否是内存高效的:它确实必须在打印 table.
之前将所有文件中的数据存储在数组中需要 GNU awk
gawk '
# base file: store keys (and line numbers for output ordering)
FILENAME == ARGV[1] {key[[=10=]] = FNR; next}
# 1st check: if key appears in base, store result as pass
FILENAME == ARGV[2] {
k =
gsub(/\//, " ", k)
if (k in key) pass1[k] = 1
}
# 2nd check: if key appears in base, store result as pass
FILENAME == ARGV[3] {
if ( match([=10=], /([0-9]+)._([0-9]+)_([0-9]+)\.dat/, m) ) {
k = m[1] " " m[2] " " m[3]
if (k in key) pass2[k] = 1
}
next
}
# print the result table
END {
PROCINFO["sorted_in"] = "@val_num_asc" # traverse array by line number
for (k in key) {
printf "%s\t%s\t%s\n", k \
, (k in pass1 ? "passed" : "failed") \
, (k in pass2 ? "passed" : "failed")
}
}
' base.txt 1stcheck.txt 2ndcheck.txt
12345 6 78 passed passed
13579 2 46 failed passed
24680 1 35 passed failed
根据@glenn jackman的建议,我可以解决我的问题
gawk '
# Store key for 1st check
FILENAME == ARGV[1] {
k =
gsub(/\//, " ", k)
key_first[k];next
}
# Store key for 2nd check
FILENAME == ARGV[2] {
if ( match([=10=], /([0-9]+)._([0-9]+)_([0-9]+)\.dat/, m) ) {
k = m[1] " " m[2] " " m[3]
key_second[k];
}
next
}
# base file: do check on both 1st and 2nd check
FILENAME == ARGV[3] {
if(>256) {
first= " " "_" (%256) " "
}
else {
first= " " " "
}
second= " " %256 " "
if (first in key_first) pass1[[=10=]] = 1
if (second in key_second) pass2[[=10=]] = 1
key[[=10=]]= FNR; next
}
# print the result table
END {
PROCINFO["sorted_in"] = "@val_num_asc" # traverse array by line number
for (k in key) {
printf "%s\t%s\t%s\n", k \
, (k in pass1 ? "sic_passed" : "sic_failed") \
, (k in pass2 ? "gd_passed" : "gd_failed")
}
}
' 1stcheck.txt 2ndcheck.txt base.txt