逐行对齐列
Align columns against one another by row
假设一个文件包含三列字符串。
> cat file
foo foo bar
bar baz
baz qux
qux
bash 中的什么命令(如果有)可用于将这些列逐行对齐?正确的输出如下所示:
> sought_command file
foo foo
bar bar
baz baz
qux qux
使用 awk:
$ awk 'max<NF{max=NF} # Get max number of columns
{ #For every input line,
for(i=1;i<=NF;i++){
b[$i]++; # Record all possible tokens, like foo, bar etc.
a[i$i]++; # Record their column indices
}
}
END{
for(i in b) #Get max length of all the tokens (for printing)
if(c<length(i))
c=length(i);
for(i in b) # For each token,
{
for(j=1;j<=max;j++){ # For every column,
if(a[j i]) d = i; # Decide, if we want to print it, or left blank...
else d="";
printf "%-"(c+5)"s", d; # Print the token, or blank space
}
print ""; # Print newline after every tokens line.
}
}' test.input
foo foo
baz baz
qux qux
bar bar
关于输入与输出数据的顺序:我认为没有任何输入标记顺序,因为下面的输入数据也应该给出类似的输出。
foo foo
bar
baz baz bar
qux qux
可以保持令牌首次出现的顺序。例如在上面(重新排序)的情况下,它将是 foo、bar、baz、qux。
$ awk 'max<NF{max=NF} # Get max number of columns
{ #For every input line,
for(i=1;i<=NF;i++){
if(!b[$i]++)
token[j++]=$i;
a[i$i]++; # Record their column indices
}
}
END{
for(i in b) #Get max length of all the tokens (for printing)
if(max_len<length(i))
max_len=length(i);
PROCINFO["sorted_in"] = "@ind_num_asc";
for(i in token) { # For each token,
for(j=1;j<=max;j++){ # For every column,
if(a[j token[i]]) d = token[i]; # Decide, if we want to print it, or left blank...
else d="";
printf "%-"(max_len+5)"s", d; # Print the token, or blank space
}
print ""; # Print newline after every tokens line.
}
}' test.input.reordered
foo foo
bar bar
baz baz
qux qux
假设一个文件包含三列字符串。
> cat file
foo foo bar
bar baz
baz qux
qux
bash 中的什么命令(如果有)可用于将这些列逐行对齐?正确的输出如下所示:
> sought_command file
foo foo
bar bar
baz baz
qux qux
使用 awk:
$ awk 'max<NF{max=NF} # Get max number of columns
{ #For every input line,
for(i=1;i<=NF;i++){
b[$i]++; # Record all possible tokens, like foo, bar etc.
a[i$i]++; # Record their column indices
}
}
END{
for(i in b) #Get max length of all the tokens (for printing)
if(c<length(i))
c=length(i);
for(i in b) # For each token,
{
for(j=1;j<=max;j++){ # For every column,
if(a[j i]) d = i; # Decide, if we want to print it, or left blank...
else d="";
printf "%-"(c+5)"s", d; # Print the token, or blank space
}
print ""; # Print newline after every tokens line.
}
}' test.input
foo foo
baz baz
qux qux
bar bar
关于输入与输出数据的顺序:我认为没有任何输入标记顺序,因为下面的输入数据也应该给出类似的输出。
foo foo
bar
baz baz bar
qux qux
可以保持令牌首次出现的顺序。例如在上面(重新排序)的情况下,它将是 foo、bar、baz、qux。
$ awk 'max<NF{max=NF} # Get max number of columns
{ #For every input line,
for(i=1;i<=NF;i++){
if(!b[$i]++)
token[j++]=$i;
a[i$i]++; # Record their column indices
}
}
END{
for(i in b) #Get max length of all the tokens (for printing)
if(max_len<length(i))
max_len=length(i);
PROCINFO["sorted_in"] = "@ind_num_asc";
for(i in token) { # For each token,
for(j=1;j<=max;j++){ # For every column,
if(a[j token[i]]) d = token[i]; # Decide, if we want to print it, or left blank...
else d="";
printf "%-"(max_len+5)"s", d; # Print the token, or blank space
}
print ""; # Print newline after every tokens line.
}
}' test.input.reordered
foo foo
bar bar
baz baz
qux qux