越界访问字符串不会触发任何 valgrind/ASAN/UBSAN 警告
Accessing a string out of bounds does not trigger any valgrind/ASAN/UBSAN warnings
我有这个代码:
static int main(string[] args) {
info(escape_latex(args[1]));
return 0;
}
string escape_latex(string input) {
var builder = new StringBuilder.sized(input.length + 20);
var map = new Gee.HashMap<string, string>();
// ...<Snip>...
// Fix for some weird unicode bugs
map["\xff\xbf\xbf\xbf\xbf\xbf"] = "";
info("Len: %d", input.char_count());
for(var i = 0; i < input.char_count(); i++) {
var ic = input.get_char(i);
var as_string = ic.to_string();
info("%d %s", i, as_string);
if(map.has_key(as_string)) {
builder.append(map[as_string]);
} else {
builder.append_unichar(ic);
}
}
return builder.str;
}
如果我通过“foo123”,我得到预期的输出“foo123”。但是如果我通过“Geldbeutel+Schlüselanhänger”,我得到输出“Geldbeutel+Schl?selanh?ng”(最后两个字符丢失)。
现在我将 for 循环更改为 for(var i = 0; i <= input.char_count(); i++) {
对于“foo123”,我得到了预期的输出,对于“Geldbeutel+Schlüselanhänger”,我得到了“Geldbeutel+Schl?selanh?nge”。 (Valgrind、ASAN 和 UBSAN 没有显示任何内容)。
现在我将 for 循环更改为 for(var i = 0; i <= input.char_count() + 1; i++) {
“foo123”现在是 foo123G
,因为我 运行 进入了其他内存,但是“Geldbeutel+Schlüsselanhänger”给出了正确的输出“Geldbeutel+Schl?selAnh?nger”
对于最后一个示例输入,一个示例输出:
** INFO: 19:41:57.903: a.vala:23: Len: 28
** INFO: 19:41:57.903: a.vala:29: 0 G
** INFO: 19:41:57.903: a.vala:29: 1 e
** INFO: 19:41:57.903: a.vala:29: 2 l
** INFO: 19:41:57.903: a.vala:29: 3 d
** INFO: 19:41:57.903: a.vala:29: 4 b
** INFO: 19:41:57.903: a.vala:29: 5 e
** INFO: 19:41:57.903: a.vala:29: 6 u
** INFO: 19:41:57.903: a.vala:29: 7 t
** INFO: 19:41:57.903: a.vala:29: 8 e
** INFO: 19:41:57.903: a.vala:29: 9 l
** INFO: 19:41:57.903: a.vala:29: 10 +
** INFO: 19:41:57.903: a.vala:29: 11 S
** INFO: 19:41:57.903: a.vala:29: 12 c
** INFO: 19:41:57.903: a.vala:29: 13 h
** INFO: 19:41:57.903: a.vala:29: 14 l
** INFO: 19:41:57.903: a.vala:29: 15 ?
** INFO: 19:41:57.903: a.vala:29: 17 s
** INFO: 19:41:57.903: a.vala:29: 18 s
** INFO: 19:41:57.903: a.vala:29: 19 e
** INFO: 19:41:57.903: a.vala:29: 20 l
** INFO: 19:41:57.903: a.vala:29: 21 a
** INFO: 19:41:57.903: a.vala:29: 22 n
** INFO: 19:41:57.903: a.vala:29: 23 h
** INFO: 19:41:57.903: a.vala:29: 24 ?
** INFO: 19:41:57.903: a.vala:29: 26 n
** INFO: 19:41:57.903: a.vala:29: 27 g
** INFO: 19:41:57.903: a.vala:29: 28 e
** INFO: 19:41:57.903: a.vala:29: 29 r // <- Here, I access an invalid index, but it works
** INFO: 19:41:57.903: a.vala:2: Geldbeutel+Schl?sselanh?nger
好像和unicode有关,但是我找不到办法让这个功能起作用。
这与语言环境有关,C 运行时环境的默认值是 US ASCII。您可以通过将空字符串传递给 Intl.setlocale()
for LocaleCategory.ALL
来将其设置为运行时环境的用户首选语言环境,这也是默认参数值,因此 Intl.setlocale();
将起作用:
static int main(string[] args) {
Intl.setlocale();
print(escape_latex(args[1]) + "\n");
return 0;
}
string escape_latex(string input) {
var builder = new StringBuilder.sized(input.length + 20);
var map = new Gee.HashMap<string, string>();
// ...<Snip>...
// Fix for some weird unicode bugs
map["\xff\xbf\xbf\xbf\xbf\xbf"] = "";
info("Len: %d", input.char_count());
for(var i = 0; i < input.char_count(); i++) {
var ic = input.get_char(i);
var as_string = ic.to_string();
info("%d %s", i, as_string);
if(map.has_key(as_string)) {
builder.append(map[as_string]);
} else {
builder.append_unichar(ic);
}
}
return builder.str;
}
我有这个代码:
static int main(string[] args) {
info(escape_latex(args[1]));
return 0;
}
string escape_latex(string input) {
var builder = new StringBuilder.sized(input.length + 20);
var map = new Gee.HashMap<string, string>();
// ...<Snip>...
// Fix for some weird unicode bugs
map["\xff\xbf\xbf\xbf\xbf\xbf"] = "";
info("Len: %d", input.char_count());
for(var i = 0; i < input.char_count(); i++) {
var ic = input.get_char(i);
var as_string = ic.to_string();
info("%d %s", i, as_string);
if(map.has_key(as_string)) {
builder.append(map[as_string]);
} else {
builder.append_unichar(ic);
}
}
return builder.str;
}
如果我通过“foo123”,我得到预期的输出“foo123”。但是如果我通过“Geldbeutel+Schlüselanhänger”,我得到输出“Geldbeutel+Schl?selanh?ng”(最后两个字符丢失)。
现在我将 for 循环更改为 for(var i = 0; i <= input.char_count(); i++) {
对于“foo123”,我得到了预期的输出,对于“Geldbeutel+Schlüselanhänger”,我得到了“Geldbeutel+Schl?selanh?nge”。 (Valgrind、ASAN 和 UBSAN 没有显示任何内容)。
现在我将 for 循环更改为 for(var i = 0; i <= input.char_count() + 1; i++) {
“foo123”现在是 foo123G
,因为我 运行 进入了其他内存,但是“Geldbeutel+Schlüsselanhänger”给出了正确的输出“Geldbeutel+Schl?selAnh?nger”
对于最后一个示例输入,一个示例输出:
** INFO: 19:41:57.903: a.vala:23: Len: 28
** INFO: 19:41:57.903: a.vala:29: 0 G
** INFO: 19:41:57.903: a.vala:29: 1 e
** INFO: 19:41:57.903: a.vala:29: 2 l
** INFO: 19:41:57.903: a.vala:29: 3 d
** INFO: 19:41:57.903: a.vala:29: 4 b
** INFO: 19:41:57.903: a.vala:29: 5 e
** INFO: 19:41:57.903: a.vala:29: 6 u
** INFO: 19:41:57.903: a.vala:29: 7 t
** INFO: 19:41:57.903: a.vala:29: 8 e
** INFO: 19:41:57.903: a.vala:29: 9 l
** INFO: 19:41:57.903: a.vala:29: 10 +
** INFO: 19:41:57.903: a.vala:29: 11 S
** INFO: 19:41:57.903: a.vala:29: 12 c
** INFO: 19:41:57.903: a.vala:29: 13 h
** INFO: 19:41:57.903: a.vala:29: 14 l
** INFO: 19:41:57.903: a.vala:29: 15 ?
** INFO: 19:41:57.903: a.vala:29: 17 s
** INFO: 19:41:57.903: a.vala:29: 18 s
** INFO: 19:41:57.903: a.vala:29: 19 e
** INFO: 19:41:57.903: a.vala:29: 20 l
** INFO: 19:41:57.903: a.vala:29: 21 a
** INFO: 19:41:57.903: a.vala:29: 22 n
** INFO: 19:41:57.903: a.vala:29: 23 h
** INFO: 19:41:57.903: a.vala:29: 24 ?
** INFO: 19:41:57.903: a.vala:29: 26 n
** INFO: 19:41:57.903: a.vala:29: 27 g
** INFO: 19:41:57.903: a.vala:29: 28 e
** INFO: 19:41:57.903: a.vala:29: 29 r // <- Here, I access an invalid index, but it works
** INFO: 19:41:57.903: a.vala:2: Geldbeutel+Schl?sselanh?nger
好像和unicode有关,但是我找不到办法让这个功能起作用。
这与语言环境有关,C 运行时环境的默认值是 US ASCII。您可以通过将空字符串传递给 Intl.setlocale()
for LocaleCategory.ALL
来将其设置为运行时环境的用户首选语言环境,这也是默认参数值,因此 Intl.setlocale();
将起作用:
static int main(string[] args) {
Intl.setlocale();
print(escape_latex(args[1]) + "\n");
return 0;
}
string escape_latex(string input) {
var builder = new StringBuilder.sized(input.length + 20);
var map = new Gee.HashMap<string, string>();
// ...<Snip>...
// Fix for some weird unicode bugs
map["\xff\xbf\xbf\xbf\xbf\xbf"] = "";
info("Len: %d", input.char_count());
for(var i = 0; i < input.char_count(); i++) {
var ic = input.get_char(i);
var as_string = ic.to_string();
info("%d %s", i, as_string);
if(map.has_key(as_string)) {
builder.append(map[as_string]);
} else {
builder.append_unichar(ic);
}
}
return builder.str;
}