在 bash 中排序文本文件
Sort text file in bash
我有这个文本文件:
0_0_0_0_1_1_1_1_1 [
0.01155712 0.5775286 0.01599521 0.383362 0.01155712 ]
0_1_1_0_0_1_1_1_232 [
4.980576e-09 1.21296e-06 0.0001519765 0.9998468 4.980576e-09 ]
0_1_1_0_0_1_1_1_226 [
0.009718912 0.5821248 0.013627 0.3848103 0.009718912 ]
0_1_1_0_0_1_1_1_227 [
0.009751211 0.5818524 0.01370924 0.3849359 0.009751211 ]
0_0_0_0_1_1_1_1_0 [
0.0101164 0.580949 0.01410898 0.3847092 0.0101164 ]
0_0_0_0_1_1_1_1_10 [
2.829467e-13 0.9999987 1.257838e-06 1.43308e-11 2.829467e-13 ]
0_1_1_0_0_1_1_1_228 [
1.050669e-09 3.34911e-06 0.0006787472 0.9993179 1.050669e-09 ]
0_0_0_0_1_1_1_1_5 [
8.811786e-08 2.491895e-05 0.0001959623 0.9997789 8.811786e-08 ]
0_1_1_0_0_1_1_1_229 [
4.031684e-08 0.0009897997 0.9982967 0.0007134909 4.031684e-08 ]
0_0_0_0_1_1_1_1_6 [
0.00122948 0.1168619 0.005731953 0.8749472 0.001229479 ]
0_1_1_0_0_1_1_1_230 [
1.156299e-06 0.9997224 4.28468e-05 0.0002323914 1.1563e-06 ]
0_1_1_0_0_1_1_1_231 [
1.581783e-08 0.05209186 0.947908 5.398444e-08 1.581783e-08 ]
0_0_0_0_1_1_1_1_7 [
0.001118228 0.8515728 0.005113816 0.141077 0.001118228 ]
0_1_1_0_0_1_1_1_233 [
4.050781e-05 0.01894026 0.001323607 0.9796551 4.050781e-05 ]
0_0_0_0_1_1_1_1_8 [
2.476718e-13 1.084459e-05 0.9999892 7.285134e-09 2.476718e-13 ]
0_1_1_0_0_1_1_1_234 [
0.0002922548 0.8787904 0.002183357 0.1184417 0.0002922548 ]
0_1_1_0_0_1_1_1_235 [
8.223566e-12 0.9999944 5.548976e-06 4.713996e-10 8.223566e-12 ]
0_1_1_0_0_1_1_1_236 [
9.655272e-18 2.265286e-08 1 4.498827e-11 9.655272e-18 ]
0_1_1_0_0_1_1_1_237 [
1.503673e-06 0.9467446 0.05282303 0.0004293863 1.503673e-06 ]
0_1_1_0_0_1_1_1_238 [
4.519509e-12 1 1.623816e-08 3.29057e-08 4.519509e-12 ]
0_1_1_0_0_1_1_1_239 [
0.01033381 0.580116 0.01444763 0.3847687 0.01033381 ]
其格式为 {name}_{part} [{data points}]
举个例子:
0_0_1_0_0_0_1_0
: 就是名字
_51
:将是部件号
[1.651947e-12 5.555453e-05 0.9999444 3.284811e-09 1.651947e-12 ]
: 将是数据点。
需要以某种方式对所有具有相同名称的数据点进行排序和追加,增加部件号...这如何以简单的方式完成?我想在 bash 中这样做是有道理的,但是如何实际执行增加零件号的追加有点神秘..
期望的输出:
0_1_1_0_0_1_1_1 [
0.009718912 0.5821248 0.013627 0.3848103 0.009718912
0.009751211 0.5818524 0.01370924 0.3849359 0.009751211
1.050669e-09 3.34911e-06 0.0006787472 0.9993179 1.050669e-09
4.031684e-08 0.0009897997 0.9982967 0.0007134909 4.031684e-08
1.156299e-06 0.9997224 4.28468e-05 0.0002323914 1.1563e-06
1.581783e-08 0.05209186 0.947908 5.398444e-08 1.581783e-08
4.980576e-09 1.21296e-06 0.0001519765 0.9998468 4.980576e-09
4.050781e-05 0.01894026 0.001323607 0.9796551 4.050781e-05
0.0002922548 0.8787904 0.002183357 0.1184417 0.0002922548
8.223566e-12 0.9999944 5.548976e-06 4.713996e-10 8.223566e-12
9.655272e-18 2.265286e-08 1 4.498827e-11 9.655272e-18
1.503673e-06 0.9467446 0.05282303 0.0004293863 1.503673e-06
4.519509e-12 1 1.623816e-08 3.29057e-08 4.519509e-12
0.01033381 0.580116 0.01444763 0.3847687 0.01033381 ]
0_0_0_0_1_1_1_1 [
0.0101164 0.580949 0.01410898 0.3847092 0.0101164
0.01155712 0.5775286 0.01599521 0.383362 0.01155712
8.811786e-08 2.491895e-05 0.0001959623 0.9997789 8.811786e-08
0.00122948 0.1168619 0.005731953 0.8749472 0.001229479
0.001118228 0.8515728 0.005113816 0.141077 0.001118228
2.476718e-13 1.084459e-05 0.9999892 7.285134e-09 2.476718e-13
2.829467e-13 0.9999987 1.257838e-06 1.43308e-11 2.829467e-13 ]
perl -ne '
if ($.%2==1) {
# odd line numbers
($name,$part) = /(.*)_(\d+)/;
# debug: print "name:$name part:$part\n";
} else {
# even line numbers
($datapoints) = /(.*)\]/;
$hash{$name}{$part}=$datapoints;
}
END {
while (($key,$v)=each %hash) {
print "$key [\n", (
map "${$v}{$_}\n", sort {$a<=>$b} keys %{$v}
), "]\n";
}
# or with debug
# while (($key,$v)=each %hash) {
# print "$key [\n", (
# map ":$_:${$v}{$_}\n", sort {$a<=>$b} keys %{$v}
# ), "]\n";
# }
}
' input.txt
如果输入不完全符合给定格式,另一个版本更安全
perl -n00e '
while ( /([\d_]*)_(\d*) \s* \[ \s* (.*?) \s* \]/gmsx ) {
($name,$part,$datapoints) = (,,);
$hash{$name}{$part}=$datapoints;
}
while (($key,$v)=each %hash) {
print "$key [\n", (
map "${$v}{$_}\n", sort {$a<=>$b} keys %{$v}
), "]\n";
}
' input.txt
awk 解决办法:
awk 'NF == 2{ match(,/^[0-9]+(_[0-9]+){7}/); k = substr(,RSTART,RLENGTH); next }
{ $NF=""; a[k]=a[k]"\n "[=10=] }
END { for(i in a) printf "%s [%s ]\n\n",i,a[i] }' yourfile
NF == 2
- 处理具有 2 个字段的记录(即具有“name”值)
match(,/^[0-9]+(_[0-9]+){7}/)
- 捕获“name”序列
k = substr(,RSTART,RLENGTH)
- 将 name 视为数组键
{ $NF=""; a[k]=a[k]"\n "[=15=] }
- 将每一行附加 数据点(没有尾随 ]
)到相关的 name元素
printf "%s [%s ]\n\n",i,a[i]
- 打印格式化输出
输出:
0_0_0_0_1_1_1_1 [
0.01155712 0.5775286 0.01599521 0.383362 0.01155712
0.0101164 0.580949 0.01410898 0.3847092 0.0101164
2.829467e-13 0.9999987 1.257838e-06 1.43308e-11 2.829467e-13
8.811786e-08 2.491895e-05 0.0001959623 0.9997789 8.811786e-08
0.00122948 0.1168619 0.005731953 0.8749472 0.001229479
0.001118228 0.8515728 0.005113816 0.141077 0.001118228
2.476718e-13 1.084459e-05 0.9999892 7.285134e-09 2.476718e-13 ]
0_1_1_0_0_1_1_1 [
4.980576e-09 1.21296e-06 0.0001519765 0.9998468 4.980576e-09
0.009718912 0.5821248 0.013627 0.3848103 0.009718912
0.009751211 0.5818524 0.01370924 0.3849359 0.009751211
1.050669e-09 3.34911e-06 0.0006787472 0.9993179 1.050669e-09
4.031684e-08 0.0009897997 0.9982967 0.0007134909 4.031684e-08
1.156299e-06 0.9997224 4.28468e-05 0.0002323914 1.1563e-06
1.581783e-08 0.05209186 0.947908 5.398444e-08 1.581783e-08
4.050781e-05 0.01894026 0.001323607 0.9796551 4.050781e-05
0.0002922548 0.8787904 0.002183357 0.1184417 0.0002922548
8.223566e-12 0.9999944 5.548976e-06 4.713996e-10 8.223566e-12
9.655272e-18 2.265286e-08 1 4.498827e-11 9.655272e-18
1.503673e-06 0.9467446 0.05282303 0.0004293863 1.503673e-06
4.519509e-12 1 1.623816e-08 3.29057e-08 4.519509e-12
0.01033381 0.580116 0.01444763 0.3847687 0.01033381 ]
我有这个文本文件:
0_0_0_0_1_1_1_1_1 [
0.01155712 0.5775286 0.01599521 0.383362 0.01155712 ]
0_1_1_0_0_1_1_1_232 [
4.980576e-09 1.21296e-06 0.0001519765 0.9998468 4.980576e-09 ]
0_1_1_0_0_1_1_1_226 [
0.009718912 0.5821248 0.013627 0.3848103 0.009718912 ]
0_1_1_0_0_1_1_1_227 [
0.009751211 0.5818524 0.01370924 0.3849359 0.009751211 ]
0_0_0_0_1_1_1_1_0 [
0.0101164 0.580949 0.01410898 0.3847092 0.0101164 ]
0_0_0_0_1_1_1_1_10 [
2.829467e-13 0.9999987 1.257838e-06 1.43308e-11 2.829467e-13 ]
0_1_1_0_0_1_1_1_228 [
1.050669e-09 3.34911e-06 0.0006787472 0.9993179 1.050669e-09 ]
0_0_0_0_1_1_1_1_5 [
8.811786e-08 2.491895e-05 0.0001959623 0.9997789 8.811786e-08 ]
0_1_1_0_0_1_1_1_229 [
4.031684e-08 0.0009897997 0.9982967 0.0007134909 4.031684e-08 ]
0_0_0_0_1_1_1_1_6 [
0.00122948 0.1168619 0.005731953 0.8749472 0.001229479 ]
0_1_1_0_0_1_1_1_230 [
1.156299e-06 0.9997224 4.28468e-05 0.0002323914 1.1563e-06 ]
0_1_1_0_0_1_1_1_231 [
1.581783e-08 0.05209186 0.947908 5.398444e-08 1.581783e-08 ]
0_0_0_0_1_1_1_1_7 [
0.001118228 0.8515728 0.005113816 0.141077 0.001118228 ]
0_1_1_0_0_1_1_1_233 [
4.050781e-05 0.01894026 0.001323607 0.9796551 4.050781e-05 ]
0_0_0_0_1_1_1_1_8 [
2.476718e-13 1.084459e-05 0.9999892 7.285134e-09 2.476718e-13 ]
0_1_1_0_0_1_1_1_234 [
0.0002922548 0.8787904 0.002183357 0.1184417 0.0002922548 ]
0_1_1_0_0_1_1_1_235 [
8.223566e-12 0.9999944 5.548976e-06 4.713996e-10 8.223566e-12 ]
0_1_1_0_0_1_1_1_236 [
9.655272e-18 2.265286e-08 1 4.498827e-11 9.655272e-18 ]
0_1_1_0_0_1_1_1_237 [
1.503673e-06 0.9467446 0.05282303 0.0004293863 1.503673e-06 ]
0_1_1_0_0_1_1_1_238 [
4.519509e-12 1 1.623816e-08 3.29057e-08 4.519509e-12 ]
0_1_1_0_0_1_1_1_239 [
0.01033381 0.580116 0.01444763 0.3847687 0.01033381 ]
其格式为 {name}_{part} [{data points}]
举个例子:
0_0_1_0_0_0_1_0
: 就是名字
_51
:将是部件号
[1.651947e-12 5.555453e-05 0.9999444 3.284811e-09 1.651947e-12 ]
: 将是数据点。
需要以某种方式对所有具有相同名称的数据点进行排序和追加,增加部件号...这如何以简单的方式完成?我想在 bash 中这样做是有道理的,但是如何实际执行增加零件号的追加有点神秘..
期望的输出:
0_1_1_0_0_1_1_1 [
0.009718912 0.5821248 0.013627 0.3848103 0.009718912
0.009751211 0.5818524 0.01370924 0.3849359 0.009751211
1.050669e-09 3.34911e-06 0.0006787472 0.9993179 1.050669e-09
4.031684e-08 0.0009897997 0.9982967 0.0007134909 4.031684e-08
1.156299e-06 0.9997224 4.28468e-05 0.0002323914 1.1563e-06
1.581783e-08 0.05209186 0.947908 5.398444e-08 1.581783e-08
4.980576e-09 1.21296e-06 0.0001519765 0.9998468 4.980576e-09
4.050781e-05 0.01894026 0.001323607 0.9796551 4.050781e-05
0.0002922548 0.8787904 0.002183357 0.1184417 0.0002922548
8.223566e-12 0.9999944 5.548976e-06 4.713996e-10 8.223566e-12
9.655272e-18 2.265286e-08 1 4.498827e-11 9.655272e-18
1.503673e-06 0.9467446 0.05282303 0.0004293863 1.503673e-06
4.519509e-12 1 1.623816e-08 3.29057e-08 4.519509e-12
0.01033381 0.580116 0.01444763 0.3847687 0.01033381 ]
0_0_0_0_1_1_1_1 [
0.0101164 0.580949 0.01410898 0.3847092 0.0101164
0.01155712 0.5775286 0.01599521 0.383362 0.01155712
8.811786e-08 2.491895e-05 0.0001959623 0.9997789 8.811786e-08
0.00122948 0.1168619 0.005731953 0.8749472 0.001229479
0.001118228 0.8515728 0.005113816 0.141077 0.001118228
2.476718e-13 1.084459e-05 0.9999892 7.285134e-09 2.476718e-13
2.829467e-13 0.9999987 1.257838e-06 1.43308e-11 2.829467e-13 ]
perl -ne '
if ($.%2==1) {
# odd line numbers
($name,$part) = /(.*)_(\d+)/;
# debug: print "name:$name part:$part\n";
} else {
# even line numbers
($datapoints) = /(.*)\]/;
$hash{$name}{$part}=$datapoints;
}
END {
while (($key,$v)=each %hash) {
print "$key [\n", (
map "${$v}{$_}\n", sort {$a<=>$b} keys %{$v}
), "]\n";
}
# or with debug
# while (($key,$v)=each %hash) {
# print "$key [\n", (
# map ":$_:${$v}{$_}\n", sort {$a<=>$b} keys %{$v}
# ), "]\n";
# }
}
' input.txt
如果输入不完全符合给定格式,另一个版本更安全
perl -n00e '
while ( /([\d_]*)_(\d*) \s* \[ \s* (.*?) \s* \]/gmsx ) {
($name,$part,$datapoints) = (,,);
$hash{$name}{$part}=$datapoints;
}
while (($key,$v)=each %hash) {
print "$key [\n", (
map "${$v}{$_}\n", sort {$a<=>$b} keys %{$v}
), "]\n";
}
' input.txt
awk 解决办法:
awk 'NF == 2{ match(,/^[0-9]+(_[0-9]+){7}/); k = substr(,RSTART,RLENGTH); next }
{ $NF=""; a[k]=a[k]"\n "[=10=] }
END { for(i in a) printf "%s [%s ]\n\n",i,a[i] }' yourfile
NF == 2
- 处理具有 2 个字段的记录(即具有“name”值)match(,/^[0-9]+(_[0-9]+){7}/)
- 捕获“name”序列k = substr(,RSTART,RLENGTH)
- 将 name 视为数组键{ $NF=""; a[k]=a[k]"\n "[=15=] }
- 将每一行附加 数据点(没有尾随]
)到相关的 name元素printf "%s [%s ]\n\n",i,a[i]
- 打印格式化输出
输出:
0_0_0_0_1_1_1_1 [
0.01155712 0.5775286 0.01599521 0.383362 0.01155712
0.0101164 0.580949 0.01410898 0.3847092 0.0101164
2.829467e-13 0.9999987 1.257838e-06 1.43308e-11 2.829467e-13
8.811786e-08 2.491895e-05 0.0001959623 0.9997789 8.811786e-08
0.00122948 0.1168619 0.005731953 0.8749472 0.001229479
0.001118228 0.8515728 0.005113816 0.141077 0.001118228
2.476718e-13 1.084459e-05 0.9999892 7.285134e-09 2.476718e-13 ]
0_1_1_0_0_1_1_1 [
4.980576e-09 1.21296e-06 0.0001519765 0.9998468 4.980576e-09
0.009718912 0.5821248 0.013627 0.3848103 0.009718912
0.009751211 0.5818524 0.01370924 0.3849359 0.009751211
1.050669e-09 3.34911e-06 0.0006787472 0.9993179 1.050669e-09
4.031684e-08 0.0009897997 0.9982967 0.0007134909 4.031684e-08
1.156299e-06 0.9997224 4.28468e-05 0.0002323914 1.1563e-06
1.581783e-08 0.05209186 0.947908 5.398444e-08 1.581783e-08
4.050781e-05 0.01894026 0.001323607 0.9796551 4.050781e-05
0.0002922548 0.8787904 0.002183357 0.1184417 0.0002922548
8.223566e-12 0.9999944 5.548976e-06 4.713996e-10 8.223566e-12
9.655272e-18 2.265286e-08 1 4.498827e-11 9.655272e-18
1.503673e-06 0.9467446 0.05282303 0.0004293863 1.503673e-06
4.519509e-12 1 1.623816e-08 3.29057e-08 4.519509e-12
0.01033381 0.580116 0.01444763 0.3847687 0.01033381 ]