用 (g)awk 注释 (c) 源代码中变量的最后使用

Annotate last use of variables in (c) source code with (g)awk

我有一个带有 c 源代码的文件,例如

func2(&x5, &x6, x4, (arg1[3]));
func2(&x7, &x8, x4, (arg1[2]));  
func(&x13, &x14, 0x0, x12, x9);     
func(&x17, &x18, x16, x8, x5);
uint64_t x19 = (x18 + x6); 
func2(&x20, &x21, x11, 0xff);
func2(&x24, &x25, x11, 0xff11));
func(&x26, &x27, 0x0, x25, x22);
uint64_t x28 = (x27 + x23);
func(&x29, &x30, 0x0, x11, x24);
func(&x31, &x32, x30, x13, x26);
func(&x33, &x34, x32, x15, x28);

我想注释一个变量的最后一次使用。如:

func2(&x5, &x6, x4, (arg1[3]));
func2(&x7, &x8, x4, (arg1[2]));   // 4,7
func(&x17, &x18, x16, x8, x5);    // 5,8,16,17
uint64_t x19 = (x18 + x6);        // 6,19
func2(&x20, &x21, x11, 0xff);     // 21,20
func2(&x24, &x25, x11, 0xff11));   
func(&x26, &x27, 0x0, x25, x22);  // 25,22
uint64_t x28 = (x27 + x23);       // 23,27,28
func(&x29, &x30, 0x0, x11, x24);  // 24,11,29
func(&x31, &x32, x30, x13, x26);  // 26,13,30,31

注释列出了所有变量,下面没有用到。 (背后的语义:之后可能是 reused/freed。)

变量遵循正则表达式 /x([0-9]){1,3}/。 我尝试使用以下 cmd 通过 tac 将该文件提供给 gawktac file.c | gawk ' match([=16=],/x([0-9]){1,3}/,a) && ! seen[a[0]] {printf "%s// %s\n",[=16=],a[0];seen[a[0]]=1;}{print}' |tac 产生

func2(&x5, &x6, x4, (arg1[3]));
func2(&x5, &x6, x4, (arg1[3]));// x5
func2(&x7, &x8, x4, (arg1[2]));  
func2(&x7, &x8, x4, (arg1[2]));  // x7
func(&x13, &x14, 0x0, x12, x9);     
func(&x13, &x14, 0x0, x12, x9);     // x13
func(&x17, &x18, x16, x8, x5);
func(&x17, &x18, x16, x8, x5);// x17
uint64_t x19 = (x18 + x6); 
uint64_t x19 = (x18 + x6); // x19
func2(&x20, &x21, x11, 0xff);
func2(&x20, &x21, x11, 0xff);// x20
func2(&x24, &x25, x11, 0xff11));
func2(&x24, &x25, x11, 0xff11));// x24
func(&x26, &x27, 0x0, x25, x22);
func(&x26, &x27, 0x0, x25, x22);// x26
uint64_t x28 = (x27 + x23);
uint64_t x28 = (x27 + x23);// x28
func(&x29, &x30, 0x0, x11, x24);
func(&x29, &x30, 0x0, x11, x24);// x29
func(&x31, &x32, x30, x13, x26);
func(&x31, &x32, x30, x13, x26);// x31
func(&x33, &x34, x32, x15, x28);
func(&x33, &x34, x32, x15, x28);// x33

我已经很接近了,但显然这不是我想要的。

最大的问题是 match() 只能找到正则表达式的 first 匹配项。您必须反复遍历每一行才能找到其中的所有变量。

如果您读取文件一次以查找变量的用途,然后再读取一次以根据第一遍数据收集打印出最后使用的条目,则可以在 gawk 中完成。此 shell 脚本为方便起见将其包装,因此您不必手动指定源文件两次:

#!/bin/sh
gawk '
NR == FNR {
    s = [=10=]
    while (match(s, /\<x([0-9]+)\>/, a)) {
        seen[a[1]] = FNR
        s = substr(s, RSTART + RLENGTH)
    }
    next
}
{
    s = [=10=]
    lasts = ""
    while (match(s, /\<x([0-9]+)\>/, a)) {
        if (seen[a[1]] == FNR) {
            if (lasts == "")
                lasts = a[1]
            else
                lasts = lasts "," a[1]
        }
        s = substr(s, RSTART + RLENGTH)
    }
    if (lasts == "")
        print [=10=]
    else
        printf "%s\t// %s\n", [=10=], lasts;
}
' "" ""

用法示例:

$ ./lastvars foo.c
func2(&x5, &x6, x4, (arg1[3]));
func2(&x7, &x8, x4, (arg1[2]));     // 7,4
func(&x13, &x14, 0x0, x12, x9);         // 14,12,9
func(&x17, &x18, x16, x8, x5);  // 17,16,8,5
uint64_t x19 = (x18 + x6);  // 19,18,6
func2(&x20, &x21, x11, 0xff);   // 20,21
func2(&x24, &x25, x11, 0xff11));
func(&x26, &x27, 0x0, x25, x22);    // 25,22
uint64_t x28 = (x27 + x23); // 27,23
func(&x29, &x30, 0x0, x11, x24);    // 29,11,24
func(&x31, &x32, x30, x13, x26);    // 31,30,13,26
func(&x33, &x34, x32, x15, x28);    // 33,34,32,15,28