用 (g)awk 注释 (c) 源代码中变量的最后使用
Annotate last use of variables in (c) source code with (g)awk
我有一个带有 c 源代码的文件,例如
func2(&x5, &x6, x4, (arg1[3]));
func2(&x7, &x8, x4, (arg1[2]));
func(&x13, &x14, 0x0, x12, x9);
func(&x17, &x18, x16, x8, x5);
uint64_t x19 = (x18 + x6);
func2(&x20, &x21, x11, 0xff);
func2(&x24, &x25, x11, 0xff11));
func(&x26, &x27, 0x0, x25, x22);
uint64_t x28 = (x27 + x23);
func(&x29, &x30, 0x0, x11, x24);
func(&x31, &x32, x30, x13, x26);
func(&x33, &x34, x32, x15, x28);
我想注释一个变量的最后一次使用。如:
func2(&x5, &x6, x4, (arg1[3]));
func2(&x7, &x8, x4, (arg1[2])); // 4,7
func(&x17, &x18, x16, x8, x5); // 5,8,16,17
uint64_t x19 = (x18 + x6); // 6,19
func2(&x20, &x21, x11, 0xff); // 21,20
func2(&x24, &x25, x11, 0xff11));
func(&x26, &x27, 0x0, x25, x22); // 25,22
uint64_t x28 = (x27 + x23); // 23,27,28
func(&x29, &x30, 0x0, x11, x24); // 24,11,29
func(&x31, &x32, x30, x13, x26); // 26,13,30,31
注释列出了所有变量,下面没有用到。 (背后的语义:之后可能是 reused/freed。)
变量遵循正则表达式 /x([0-9]){1,3}/
。
我尝试使用以下 cmd 通过 tac
将该文件提供给 gawk
:
tac file.c | gawk ' match([=16=],/x([0-9]){1,3}/,a) && ! seen[a[0]] {printf "%s// %s\n",[=16=],a[0];seen[a[0]]=1;}{print}' |tac
产生
func2(&x5, &x6, x4, (arg1[3]));
func2(&x5, &x6, x4, (arg1[3]));// x5
func2(&x7, &x8, x4, (arg1[2]));
func2(&x7, &x8, x4, (arg1[2])); // x7
func(&x13, &x14, 0x0, x12, x9);
func(&x13, &x14, 0x0, x12, x9); // x13
func(&x17, &x18, x16, x8, x5);
func(&x17, &x18, x16, x8, x5);// x17
uint64_t x19 = (x18 + x6);
uint64_t x19 = (x18 + x6); // x19
func2(&x20, &x21, x11, 0xff);
func2(&x20, &x21, x11, 0xff);// x20
func2(&x24, &x25, x11, 0xff11));
func2(&x24, &x25, x11, 0xff11));// x24
func(&x26, &x27, 0x0, x25, x22);
func(&x26, &x27, 0x0, x25, x22);// x26
uint64_t x28 = (x27 + x23);
uint64_t x28 = (x27 + x23);// x28
func(&x29, &x30, 0x0, x11, x24);
func(&x29, &x30, 0x0, x11, x24);// x29
func(&x31, &x32, x30, x13, x26);
func(&x31, &x32, x30, x13, x26);// x31
func(&x33, &x34, x32, x15, x28);
func(&x33, &x34, x32, x15, x28);// x33
我已经很接近了,但显然这不是我想要的。
如何将 match
的匹配全球化,使其识别函数调用中的所有匹配项?
避免打印该行两次?
最大的问题是 match()
只能找到正则表达式的 first 匹配项。您必须反复遍历每一行才能找到其中的所有变量。
如果您读取文件一次以查找变量的用途,然后再读取一次以根据第一遍数据收集打印出最后使用的条目,则可以在 gawk 中完成。此 shell 脚本为方便起见将其包装,因此您不必手动指定源文件两次:
#!/bin/sh
gawk '
NR == FNR {
s = [=10=]
while (match(s, /\<x([0-9]+)\>/, a)) {
seen[a[1]] = FNR
s = substr(s, RSTART + RLENGTH)
}
next
}
{
s = [=10=]
lasts = ""
while (match(s, /\<x([0-9]+)\>/, a)) {
if (seen[a[1]] == FNR) {
if (lasts == "")
lasts = a[1]
else
lasts = lasts "," a[1]
}
s = substr(s, RSTART + RLENGTH)
}
if (lasts == "")
print [=10=]
else
printf "%s\t// %s\n", [=10=], lasts;
}
' "" ""
用法示例:
$ ./lastvars foo.c
func2(&x5, &x6, x4, (arg1[3]));
func2(&x7, &x8, x4, (arg1[2])); // 7,4
func(&x13, &x14, 0x0, x12, x9); // 14,12,9
func(&x17, &x18, x16, x8, x5); // 17,16,8,5
uint64_t x19 = (x18 + x6); // 19,18,6
func2(&x20, &x21, x11, 0xff); // 20,21
func2(&x24, &x25, x11, 0xff11));
func(&x26, &x27, 0x0, x25, x22); // 25,22
uint64_t x28 = (x27 + x23); // 27,23
func(&x29, &x30, 0x0, x11, x24); // 29,11,24
func(&x31, &x32, x30, x13, x26); // 31,30,13,26
func(&x33, &x34, x32, x15, x28); // 33,34,32,15,28
我有一个带有 c 源代码的文件,例如
func2(&x5, &x6, x4, (arg1[3]));
func2(&x7, &x8, x4, (arg1[2]));
func(&x13, &x14, 0x0, x12, x9);
func(&x17, &x18, x16, x8, x5);
uint64_t x19 = (x18 + x6);
func2(&x20, &x21, x11, 0xff);
func2(&x24, &x25, x11, 0xff11));
func(&x26, &x27, 0x0, x25, x22);
uint64_t x28 = (x27 + x23);
func(&x29, &x30, 0x0, x11, x24);
func(&x31, &x32, x30, x13, x26);
func(&x33, &x34, x32, x15, x28);
我想注释一个变量的最后一次使用。如:
func2(&x5, &x6, x4, (arg1[3]));
func2(&x7, &x8, x4, (arg1[2])); // 4,7
func(&x17, &x18, x16, x8, x5); // 5,8,16,17
uint64_t x19 = (x18 + x6); // 6,19
func2(&x20, &x21, x11, 0xff); // 21,20
func2(&x24, &x25, x11, 0xff11));
func(&x26, &x27, 0x0, x25, x22); // 25,22
uint64_t x28 = (x27 + x23); // 23,27,28
func(&x29, &x30, 0x0, x11, x24); // 24,11,29
func(&x31, &x32, x30, x13, x26); // 26,13,30,31
注释列出了所有变量,下面没有用到。 (背后的语义:之后可能是 reused/freed。)
变量遵循正则表达式 /x([0-9]){1,3}/
。
我尝试使用以下 cmd 通过 tac
将该文件提供给 gawk
:
tac file.c | gawk ' match([=16=],/x([0-9]){1,3}/,a) && ! seen[a[0]] {printf "%s// %s\n",[=16=],a[0];seen[a[0]]=1;}{print}' |tac
产生
func2(&x5, &x6, x4, (arg1[3]));
func2(&x5, &x6, x4, (arg1[3]));// x5
func2(&x7, &x8, x4, (arg1[2]));
func2(&x7, &x8, x4, (arg1[2])); // x7
func(&x13, &x14, 0x0, x12, x9);
func(&x13, &x14, 0x0, x12, x9); // x13
func(&x17, &x18, x16, x8, x5);
func(&x17, &x18, x16, x8, x5);// x17
uint64_t x19 = (x18 + x6);
uint64_t x19 = (x18 + x6); // x19
func2(&x20, &x21, x11, 0xff);
func2(&x20, &x21, x11, 0xff);// x20
func2(&x24, &x25, x11, 0xff11));
func2(&x24, &x25, x11, 0xff11));// x24
func(&x26, &x27, 0x0, x25, x22);
func(&x26, &x27, 0x0, x25, x22);// x26
uint64_t x28 = (x27 + x23);
uint64_t x28 = (x27 + x23);// x28
func(&x29, &x30, 0x0, x11, x24);
func(&x29, &x30, 0x0, x11, x24);// x29
func(&x31, &x32, x30, x13, x26);
func(&x31, &x32, x30, x13, x26);// x31
func(&x33, &x34, x32, x15, x28);
func(&x33, &x34, x32, x15, x28);// x33
我已经很接近了,但显然这不是我想要的。
如何将
match
的匹配全球化,使其识别函数调用中的所有匹配项?避免打印该行两次?
最大的问题是 match()
只能找到正则表达式的 first 匹配项。您必须反复遍历每一行才能找到其中的所有变量。
如果您读取文件一次以查找变量的用途,然后再读取一次以根据第一遍数据收集打印出最后使用的条目,则可以在 gawk 中完成。此 shell 脚本为方便起见将其包装,因此您不必手动指定源文件两次:
#!/bin/sh
gawk '
NR == FNR {
s = [=10=]
while (match(s, /\<x([0-9]+)\>/, a)) {
seen[a[1]] = FNR
s = substr(s, RSTART + RLENGTH)
}
next
}
{
s = [=10=]
lasts = ""
while (match(s, /\<x([0-9]+)\>/, a)) {
if (seen[a[1]] == FNR) {
if (lasts == "")
lasts = a[1]
else
lasts = lasts "," a[1]
}
s = substr(s, RSTART + RLENGTH)
}
if (lasts == "")
print [=10=]
else
printf "%s\t// %s\n", [=10=], lasts;
}
' "" ""
用法示例:
$ ./lastvars foo.c
func2(&x5, &x6, x4, (arg1[3]));
func2(&x7, &x8, x4, (arg1[2])); // 7,4
func(&x13, &x14, 0x0, x12, x9); // 14,12,9
func(&x17, &x18, x16, x8, x5); // 17,16,8,5
uint64_t x19 = (x18 + x6); // 19,18,6
func2(&x20, &x21, x11, 0xff); // 20,21
func2(&x24, &x25, x11, 0xff11));
func(&x26, &x27, 0x0, x25, x22); // 25,22
uint64_t x28 = (x27 + x23); // 27,23
func(&x29, &x30, 0x0, x11, x24); // 29,11,24
func(&x31, &x32, x30, x13, x26); // 31,30,13,26
func(&x33, &x34, x32, x15, x28); // 33,34,32,15,28