将 shell 引用的字符串解析为与 execv 兼容的参数向量
Parse shell quoted string into execv compatible argument vector
假设我有如下字符串
echo "foo" "bar\"blub""baz" "'" "\"" foo\ bar "\" '\'' """" Lots" "of\ whitespace
现在我想将上面的字符串作为命令执行,就好像它是通过调用 Unix.execv
输入到 shell 中一样。如果我没有犯任何错误,那么 shell 会将上面的内容解析为以下 ocaml 列表:
["echo"; "foo"; "bar\"blubbaz"; "'"; "\""; "foo bar"; "\"; "'", "", "Lots of whitespace"]
哪个库让我从原始字符串到解析后的列表?
最终我想把结果列表交给Unix.execvpe
。还有 Unix.open_process_full
可以使用 /bin/sh
处理我的原始字符串,但我发现在不使用 /bin/sh
的情况下直接调用外部程序时,我的应用程序快了 16%。现在我希望能够接受更多的输入字符串,包括引号和转义符。
我必须推出自己的解析器吗?
存在 POSIX 函数 wordexp
但包装该函数并不能解决我的问题,因为 wordexp
做的比我想要的更多(命令替换、计算 glob、替换tilda 和环境变量)。
我只想解决引号和转义问题。
我使用 ocamllex 制定了解决此问题的方法。将它张贴在这里以防其他人想要做类似的事情。它应该很容易扩展,以包含超出当前支持的转义字符和其他 shell 功能范围的功能。
{
exception UnknownShellEscape of string
exception UnmatchedChar of char
let buf_from_str str =
let buf = Buffer.create 16 in
Buffer.add_string buf str;
buf
}
let safechars = [^ '"' ''' '\' ' ' '\t']+
let space = [ ' ' '\t' ]+
rule shell_command argv = parse
| space { shell_command argv lexbuf }
| safechars { uquote argv (buf_from_str (Lexing.lexeme lexbuf)) lexbuf }
| '\' '"' { uquote argv (buf_from_str "\"") lexbuf }
| '\' ''' { uquote argv (buf_from_str "'") lexbuf }
| '\' '\' { uquote argv (buf_from_str "\") lexbuf }
| '\' ' ' { uquote argv (buf_from_str " ") lexbuf }
| '\' _ as c { raise (UnknownShellEscape c) }
| '"' { dquote argv (Buffer.create 16) lexbuf }
| ''' { squote argv (Buffer.create 16) lexbuf }
| _ as c { raise (UnmatchedChar c) }
| eof { List.rev argv }
and uquote argv buf = parse
| (space|eof) { shell_command ((Buffer.contents buf)::argv) lexbuf }
| '\' '"' { Buffer.add_string buf "\""; uquote argv buf lexbuf }
| '\' ''' { Buffer.add_string buf "'"; uquote argv buf lexbuf }
| '\' '\' { Buffer.add_string buf "\"; uquote argv buf lexbuf }
| '\' ' ' { Buffer.add_string buf " "; uquote argv buf lexbuf }
| '\' _ as c { raise (UnknownShellEscape c) }
| '"' { dquote argv buf lexbuf }
| ''' { squote argv buf lexbuf }
| safechars { Buffer.add_string buf (Lexing.lexeme lexbuf); uquote argv buf lexbuf }
| _ as c { raise (UnmatchedChar c) }
and dquote argv buf = parse
| '"' (space|eof) { shell_command ((Buffer.contents buf)::argv) lexbuf }
| '"' '"' { dquote argv buf lexbuf }
| '"' ''' { squote argv buf lexbuf }
| '"' { uquote argv buf lexbuf }
| '\' '"' { Buffer.add_string buf "\""; dquote argv buf lexbuf }
| '\' '\' { Buffer.add_string buf "\"; dquote argv buf lexbuf }
| '\' _ as c { raise (UnknownShellEscape c) }
| [^ '"' '\' ]+ { Buffer.add_string buf (Lexing.lexeme lexbuf); dquote argv buf lexbuf }
| _ as c { raise (UnmatchedChar c) }
and squote argv buf = parse
| ''' (space|eof) { shell_command ((Buffer.contents buf)::argv) lexbuf }
| ''' ''' { squote argv buf lexbuf }
| ''' '"' { dquote argv buf lexbuf }
| ''' { uquote argv buf lexbuf }
| [^ ''' ]+ { Buffer.add_string buf (Lexing.lexeme lexbuf); squote argv buf lexbuf }
| _ as c { raise (UnmatchedChar c) }
{
let main () =
let cin =
if Array.length Sys.argv > 1
then open_in Sys.argv.(1)
else stdin
in
let lexbuf = Lexing.from_channel cin in
let argv = shell_command [] lexbuf in
List.iter (Printf.printf "%s\n") argv
let _ = Printexc.print main ()
}
试一试运行:
$ ocamllex test.mll
$ echo 'echo "foo" "bar\"blub""baz" "'\''" "\"" foo\ bar '\
> '"\\" """"'\'''\'''\'''\''"" Lots" "of\ whitespace' \
> | ocaml test.ml
echo
foo
bar"blubbaz
'
"
foo bar
\
Lots of whitespace
成功! \o/
假设我有如下字符串
echo "foo" "bar\"blub""baz" "'" "\"" foo\ bar "\" '\'' """" Lots" "of\ whitespace
现在我想将上面的字符串作为命令执行,就好像它是通过调用 Unix.execv
输入到 shell 中一样。如果我没有犯任何错误,那么 shell 会将上面的内容解析为以下 ocaml 列表:
["echo"; "foo"; "bar\"blubbaz"; "'"; "\""; "foo bar"; "\"; "'", "", "Lots of whitespace"]
哪个库让我从原始字符串到解析后的列表?
最终我想把结果列表交给Unix.execvpe
。还有 Unix.open_process_full
可以使用 /bin/sh
处理我的原始字符串,但我发现在不使用 /bin/sh
的情况下直接调用外部程序时,我的应用程序快了 16%。现在我希望能够接受更多的输入字符串,包括引号和转义符。
我必须推出自己的解析器吗?
存在 POSIX 函数 wordexp
但包装该函数并不能解决我的问题,因为 wordexp
做的比我想要的更多(命令替换、计算 glob、替换tilda 和环境变量)。
我只想解决引号和转义问题。
我使用 ocamllex 制定了解决此问题的方法。将它张贴在这里以防其他人想要做类似的事情。它应该很容易扩展,以包含超出当前支持的转义字符和其他 shell 功能范围的功能。
{
exception UnknownShellEscape of string
exception UnmatchedChar of char
let buf_from_str str =
let buf = Buffer.create 16 in
Buffer.add_string buf str;
buf
}
let safechars = [^ '"' ''' '\' ' ' '\t']+
let space = [ ' ' '\t' ]+
rule shell_command argv = parse
| space { shell_command argv lexbuf }
| safechars { uquote argv (buf_from_str (Lexing.lexeme lexbuf)) lexbuf }
| '\' '"' { uquote argv (buf_from_str "\"") lexbuf }
| '\' ''' { uquote argv (buf_from_str "'") lexbuf }
| '\' '\' { uquote argv (buf_from_str "\") lexbuf }
| '\' ' ' { uquote argv (buf_from_str " ") lexbuf }
| '\' _ as c { raise (UnknownShellEscape c) }
| '"' { dquote argv (Buffer.create 16) lexbuf }
| ''' { squote argv (Buffer.create 16) lexbuf }
| _ as c { raise (UnmatchedChar c) }
| eof { List.rev argv }
and uquote argv buf = parse
| (space|eof) { shell_command ((Buffer.contents buf)::argv) lexbuf }
| '\' '"' { Buffer.add_string buf "\""; uquote argv buf lexbuf }
| '\' ''' { Buffer.add_string buf "'"; uquote argv buf lexbuf }
| '\' '\' { Buffer.add_string buf "\"; uquote argv buf lexbuf }
| '\' ' ' { Buffer.add_string buf " "; uquote argv buf lexbuf }
| '\' _ as c { raise (UnknownShellEscape c) }
| '"' { dquote argv buf lexbuf }
| ''' { squote argv buf lexbuf }
| safechars { Buffer.add_string buf (Lexing.lexeme lexbuf); uquote argv buf lexbuf }
| _ as c { raise (UnmatchedChar c) }
and dquote argv buf = parse
| '"' (space|eof) { shell_command ((Buffer.contents buf)::argv) lexbuf }
| '"' '"' { dquote argv buf lexbuf }
| '"' ''' { squote argv buf lexbuf }
| '"' { uquote argv buf lexbuf }
| '\' '"' { Buffer.add_string buf "\""; dquote argv buf lexbuf }
| '\' '\' { Buffer.add_string buf "\"; dquote argv buf lexbuf }
| '\' _ as c { raise (UnknownShellEscape c) }
| [^ '"' '\' ]+ { Buffer.add_string buf (Lexing.lexeme lexbuf); dquote argv buf lexbuf }
| _ as c { raise (UnmatchedChar c) }
and squote argv buf = parse
| ''' (space|eof) { shell_command ((Buffer.contents buf)::argv) lexbuf }
| ''' ''' { squote argv buf lexbuf }
| ''' '"' { dquote argv buf lexbuf }
| ''' { uquote argv buf lexbuf }
| [^ ''' ]+ { Buffer.add_string buf (Lexing.lexeme lexbuf); squote argv buf lexbuf }
| _ as c { raise (UnmatchedChar c) }
{
let main () =
let cin =
if Array.length Sys.argv > 1
then open_in Sys.argv.(1)
else stdin
in
let lexbuf = Lexing.from_channel cin in
let argv = shell_command [] lexbuf in
List.iter (Printf.printf "%s\n") argv
let _ = Printexc.print main ()
}
试一试运行:
$ ocamllex test.mll
$ echo 'echo "foo" "bar\"blub""baz" "'\''" "\"" foo\ bar '\
> '"\\" """"'\'''\'''\'''\''"" Lots" "of\ whitespace' \
> | ocaml test.ml
echo
foo
bar"blubbaz
'
"
foo bar
\
Lots of whitespace
成功! \o/