将命令行字符串解析为 Golang 中的标志和参数
Parse a command line string into flags and arguments in Golang
我正在寻找一个包,它可以将 -v --format "some example" -i test
之类的字符串解析为一段字符串,正确处理引号、空格等:
-v
--format
some example
-i
test
我已经检查了内置的 flag
包以及 Github 上的其他标志处理包,但其中 none 似乎可以处理这种解析原始文件的特殊情况字符串变成标记。在尝试自己做之前,我宁愿寻找一个包,因为我确信有很多特殊情况需要处理。
有什么建议吗?
有关信息,这是我最终创建的函数。
它将命令拆分为其参数。例如,cat -v "some file.txt"
,将 return ["cat", "-v", "some file.txt"]
.
它还能正确处理转义字符,尤其是空格。所以 cat -v some\ file.txt
也将正确地拆分为 ["cat", "-v", "some file.txt"]
func parseCommandLine(command string) ([]string, error) {
var args []string
state := "start"
current := ""
quote := "\""
escapeNext := true
for i := 0; i < len(command); i++ {
c := command[i]
if state == "quotes" {
if string(c) != quote {
current += string(c)
} else {
args = append(args, current)
current = ""
state = "start"
}
continue
}
if (escapeNext) {
current += string(c)
escapeNext = false
continue
}
if (c == '\') {
escapeNext = true
continue
}
if c == '"' || c == '\'' {
state = "quotes"
quote = string(c)
continue
}
if state == "arg" {
if c == ' ' || c == '\t' {
args = append(args, current)
current = ""
state = "start"
} else {
current += string(c)
}
continue
}
if c != ' ' && c != '\t' {
state = "arg"
current += string(c)
}
}
if state == "quotes" {
return []string{}, errors.New(fmt.Sprintf("Unclosed quote in command line: %s", command))
}
if current != "" {
args = append(args, current)
}
return args, nil
}
如果参数是在命令行上传递给您的程序的,那么 shell 应该处理这个并且 os.Args
将被正确填充。例如,在您的情况下 os.Args[1:]
将等于
[]string{"-v", "--format", "some example", "-i", "test"}
如果你只是有字符串,出于某种原因,你想模仿 shell 会用它做什么,那么我推荐一个像 https://github.com/kballard/go-shellquote[=14 这样的包=]
hedzr/cmdr 可能不错。它是一个 getopt-like command-line 解析器,重量轻,流畅 api 或古典风格。
看起来类似于 shlex:
import "github.com/google/shlex"
shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"}
@laurent 的回答很棒,但是当 command
包含 utf-8 字符时它不起作用。
第三次测试失败:
func TestParseCommandLine(t *testing.T){
tests := []struct{
name string
input string
want []string
}{
{
"normal",
"hello world",
[]string{"hello", "world"},
},
{
"quote",
"hello \"world hello\"",
[]string{"hello", "world hello"},
},
{
"utf-8",
"hello 世界",
[]string{"hello", "世界"},
},
{
"space",
"hello\ world",
[]string{"hello world"},
},
}
for _, tt := range tests{
t.Run(tt.name, func(t *testing.T) {
got, _ := parseCommandLine(tt.input)
if !reflect.DeepEqual(got, tt.want){
t.Errorf("expect %v, got %v", tt.want, got)
}
})
}
}
基于 his/her 的回答,我写了这个适用于 utf-8 的函数,只需将 for i := 0; i < len(command); i++ {c := command[i]
替换为 for _, c := range command
这是我的答案:
func parseCommandLine(command string) ([]string, error) {
var args []string
state := "start"
current := ""
quote := "\""
escapeNext := true
for _, c := range command {
if state == "quotes" {
if string(c) != quote {
current += string(c)
} else {
args = append(args, current)
current = ""
state = "start"
}
continue
}
if escapeNext {
current += string(c)
escapeNext = false
continue
}
if c == '\' {
escapeNext = true
continue
}
if c == '"' || c == '\'' {
state = "quotes"
quote = string(c)
continue
}
if state == "arg" {
if c == ' ' || c == '\t' {
args = append(args, current)
current = ""
state = "start"
} else {
current += string(c)
}
continue
}
if c != ' ' && c != '\t' {
state = "arg"
current += string(c)
}
}
if state == "quotes" {
return []string{}, errors.New(fmt.Sprintf("Unclosed quote in command line: %s", command))
}
if current != "" {
args = append(args, current)
}
return args, nil
}
我知道这是一个老问题,但可能仍然相关。使用正则表达式怎么样?它非常简单,对于大多数情况可能就足够了:
r := regexp.MustCompile(`\"[^\"]+\"|\S+`)
m := r.FindAllString(`-v --format "some example" -i test`, -1)
fmt.Printf("%q", m)
// Prints out ["-v" "--format" "\"some example\"" "-i" "test"]
你可以试试https://go.dev/play/p/1K0MlsOUzQI
编辑:
要将 test\ abc
也处理为 1 个条目,请使用此正则表达式:\"[^\"]+\"|\S+\\s\S+|\S+
我正在寻找一个包,它可以将 -v --format "some example" -i test
之类的字符串解析为一段字符串,正确处理引号、空格等:
-v
--format
some example
-i
test
我已经检查了内置的 flag
包以及 Github 上的其他标志处理包,但其中 none 似乎可以处理这种解析原始文件的特殊情况字符串变成标记。在尝试自己做之前,我宁愿寻找一个包,因为我确信有很多特殊情况需要处理。
有什么建议吗?
有关信息,这是我最终创建的函数。
它将命令拆分为其参数。例如,cat -v "some file.txt"
,将 return ["cat", "-v", "some file.txt"]
.
它还能正确处理转义字符,尤其是空格。所以 cat -v some\ file.txt
也将正确地拆分为 ["cat", "-v", "some file.txt"]
func parseCommandLine(command string) ([]string, error) {
var args []string
state := "start"
current := ""
quote := "\""
escapeNext := true
for i := 0; i < len(command); i++ {
c := command[i]
if state == "quotes" {
if string(c) != quote {
current += string(c)
} else {
args = append(args, current)
current = ""
state = "start"
}
continue
}
if (escapeNext) {
current += string(c)
escapeNext = false
continue
}
if (c == '\') {
escapeNext = true
continue
}
if c == '"' || c == '\'' {
state = "quotes"
quote = string(c)
continue
}
if state == "arg" {
if c == ' ' || c == '\t' {
args = append(args, current)
current = ""
state = "start"
} else {
current += string(c)
}
continue
}
if c != ' ' && c != '\t' {
state = "arg"
current += string(c)
}
}
if state == "quotes" {
return []string{}, errors.New(fmt.Sprintf("Unclosed quote in command line: %s", command))
}
if current != "" {
args = append(args, current)
}
return args, nil
}
如果参数是在命令行上传递给您的程序的,那么 shell 应该处理这个并且 os.Args
将被正确填充。例如,在您的情况下 os.Args[1:]
将等于
[]string{"-v", "--format", "some example", "-i", "test"}
如果你只是有字符串,出于某种原因,你想模仿 shell 会用它做什么,那么我推荐一个像 https://github.com/kballard/go-shellquote[=14 这样的包=]
hedzr/cmdr 可能不错。它是一个 getopt-like command-line 解析器,重量轻,流畅 api 或古典风格。
看起来类似于 shlex:
import "github.com/google/shlex"
shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"}
@laurent 的回答很棒,但是当 command
包含 utf-8 字符时它不起作用。
第三次测试失败:
func TestParseCommandLine(t *testing.T){
tests := []struct{
name string
input string
want []string
}{
{
"normal",
"hello world",
[]string{"hello", "world"},
},
{
"quote",
"hello \"world hello\"",
[]string{"hello", "world hello"},
},
{
"utf-8",
"hello 世界",
[]string{"hello", "世界"},
},
{
"space",
"hello\ world",
[]string{"hello world"},
},
}
for _, tt := range tests{
t.Run(tt.name, func(t *testing.T) {
got, _ := parseCommandLine(tt.input)
if !reflect.DeepEqual(got, tt.want){
t.Errorf("expect %v, got %v", tt.want, got)
}
})
}
}
基于 his/her 的回答,我写了这个适用于 utf-8 的函数,只需将 for i := 0; i < len(command); i++ {c := command[i]
替换为 for _, c := range command
这是我的答案:
func parseCommandLine(command string) ([]string, error) {
var args []string
state := "start"
current := ""
quote := "\""
escapeNext := true
for _, c := range command {
if state == "quotes" {
if string(c) != quote {
current += string(c)
} else {
args = append(args, current)
current = ""
state = "start"
}
continue
}
if escapeNext {
current += string(c)
escapeNext = false
continue
}
if c == '\' {
escapeNext = true
continue
}
if c == '"' || c == '\'' {
state = "quotes"
quote = string(c)
continue
}
if state == "arg" {
if c == ' ' || c == '\t' {
args = append(args, current)
current = ""
state = "start"
} else {
current += string(c)
}
continue
}
if c != ' ' && c != '\t' {
state = "arg"
current += string(c)
}
}
if state == "quotes" {
return []string{}, errors.New(fmt.Sprintf("Unclosed quote in command line: %s", command))
}
if current != "" {
args = append(args, current)
}
return args, nil
}
我知道这是一个老问题,但可能仍然相关。使用正则表达式怎么样?它非常简单,对于大多数情况可能就足够了:
r := regexp.MustCompile(`\"[^\"]+\"|\S+`)
m := r.FindAllString(`-v --format "some example" -i test`, -1)
fmt.Printf("%q", m)
// Prints out ["-v" "--format" "\"some example\"" "-i" "test"]
你可以试试https://go.dev/play/p/1K0MlsOUzQI
编辑:
要将 test\ abc
也处理为 1 个条目,请使用此正则表达式:\"[^\"]+\"|\S+\\s\S+|\S+