使用来自另一个文件的行范围从单独文件中的字符串替换每 2 nth 出现
Replace each 2 nth occurs from a string in separate files using line range from another file
我有三个文件:
0.txt
e 0-1.txt
以下内容相同:
"#sun\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
和下面的源文件 1.txt
:
(food, apple,)(bag, tortoise,)
(sky, cat,)(sun, sea,)
(car, shape)(milk, market,)
(man, shirt)(hair, life)
(dog, big)(bal, pink)
对于 0.txt
我想用 1 nth 替换从 home_cool
开始的每个 2 nth 1.txt
行,但只使用到 1.txt
的第二行(然后是 sed -n '1,2p'
),这样我的 2.txt
输出如下:
"#sun\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
在 2.txt
完成过程时,我想用 0-1.txt
替换从 home_cool
发生的所有 2 第 nth 56=]1 nth 1.txt
行使用 1.txt
的第三行开始(然后是 sed -n '3,5p'
),这样我的 3.txt
输出如下:
"#sun\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((man, shirt)(hair, life))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((man, shirt)(hair, life))\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree((dog, big)(bal, pink))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((dog, big)(bal, pink))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
通过下面的行,我可以将 0.txt
处 home_cool
的替换分为两个步骤(第一步 sed -n '1,2p'
和第二步 sed -n '3,5p'
)。但是我想在2.txt
中保存第一步,在3.txt
中保存第二步:
awk 'NR==FNR {a[NR]=[=14=]; n=NR; next}/home_cool/ { gsub("home_cool", a[int((++i-1)%(n*2)/2)+1])}1' <(cat 1.txt | tee >(sed -n '1,2p') >(sed -n '3,5p')) 0.txt >> 2.txt
所以我真正想要的是(下面的伪代码):
awk 'NR==FNR {a[NR]=[=15=]; n=NR; next}/home_cool/ { gsub("home_cool", a[int((++i-1)%(n*2)/2)+1])}1' <(cat 1.txt | tee >(sed -n '1,2p') >(sed -n '3,5p')) | "to sed -n '1,2p' make" 0.txt >> 2.txt | "to sed -n '3,5p' make" 0-1.txt >> 3.txt
我如何通过维护命令行而不中断几个孤立的 awk 片段来做到这一点?
注意:题名或许应该是“多输入,相同过程,不同输出”
指的是 OP ...
虽然我们当然可以修改(已接受的答案)之前的问答以跨各种输入执行这些拆分操作,但我会通过将这个新问题分成两个单独的操作来投票支持简单而不是复杂,例如:
awk '... from previous Q&A ...' <(head -2 1.txt) 0.txt > 2.txt
awk '... from previous Q&A ...' <(tail +3 1.txt) 0-1.txt > 3.txt
删除不必要的 /house_cool01/{...}
代码行,这变成:
awk 'NR==FNR {a[NR]=[=11=]; n=NR; next} /home_cool/ {gsub("home_cool", a[int((i++)%(n*2)/2)+1] )} 1' <(head -2 1.txt) 0.txt > 2.txt
awk 'NR==FNR {a[NR]=[=11=]; n=NR; next} /home_cool/ {gsub("home_cool", a[int((i++)%(n*2)/2)+1] )} 1' <(tail +3 1.txt) 0-1.txt > 3.txt
这些生成:
$ cat 2.txt
"#sun\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
$ cat 3.txt
"#sun\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((man, shirt)(hair, life))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((man, shirt)(hair, life))\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree((dog, big)(bal, pink))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((dog, big)(bal, pink))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
这个有效:
awk \
'FNR==1 {++f}
f==1 {a[i++]=[=10=]}
f==2 {if ([=10=]~/home_cool/) {gsub(/home_cool/, a[int(j++/2)%2]) }; print > "2.txt"}
f==3 {if ([=10=]~/home_cool/) {gsub(/home_cool/, a[int(k++/2)%3 + 2]) }; print > "3.txt"}' \
1.txt 0.txt 0-1.txt
硬编码 "2.txt"
和 "3.txt"
的替代方案包括:
- 使用分配给
-v outfile1=2.txt -v outfile2=2.txt
的变量
- 将它们替换为
outfile
,并使用此 arg 列表:1.txt outfile=2.txt 0.txt outfile=3.txt 0-1.txt
- 将它们替换为
ARGV[4]
和 ARGV[5]
,添加行 f==4 {exit}
,并使用此 arg 列表:1.txt 0.txt 0-1.txt 2.txt 3.txt
注意事项:
- 如果给定文件为空,则不会导致
f
递增,并相应地破坏内容。在 gawk 中,可以使用 ENDFILE
代替。看到这个答案:
我有三个文件:
0.txt
e 0-1.txt
以下内容相同:
"#sun\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
和下面的源文件 1.txt
:
(food, apple,)(bag, tortoise,)
(sky, cat,)(sun, sea,)
(car, shape)(milk, market,)
(man, shirt)(hair, life)
(dog, big)(bal, pink)
对于 0.txt
我想用 1 nth 替换从 home_cool
开始的每个 2 nth 1.txt
行,但只使用到 1.txt
的第二行(然后是 sed -n '1,2p'
),这样我的 2.txt
输出如下:
"#sun\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
在 2.txt
完成过程时,我想用 0-1.txt
替换从 home_cool
发生的所有 2 第 nth 56=]1 nth 1.txt
行使用 1.txt
的第三行开始(然后是 sed -n '3,5p'
),这样我的 3.txt
输出如下:
"#sun\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((man, shirt)(hair, life))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((man, shirt)(hair, life))\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree((dog, big)(bal, pink))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((dog, big)(bal, pink))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
通过下面的行,我可以将 0.txt
处 home_cool
的替换分为两个步骤(第一步 sed -n '1,2p'
和第二步 sed -n '3,5p'
)。但是我想在2.txt
中保存第一步,在3.txt
中保存第二步:
awk 'NR==FNR {a[NR]=[=14=]; n=NR; next}/home_cool/ { gsub("home_cool", a[int((++i-1)%(n*2)/2)+1])}1' <(cat 1.txt | tee >(sed -n '1,2p') >(sed -n '3,5p')) 0.txt >> 2.txt
所以我真正想要的是(下面的伪代码):
awk 'NR==FNR {a[NR]=[=15=]; n=NR; next}/home_cool/ { gsub("home_cool", a[int((++i-1)%(n*2)/2)+1])}1' <(cat 1.txt | tee >(sed -n '1,2p') >(sed -n '3,5p')) | "to sed -n '1,2p' make" 0.txt >> 2.txt | "to sed -n '3,5p' make" 0-1.txt >> 3.txt
我如何通过维护命令行而不中断几个孤立的 awk 片段来做到这一点?
注意:题名或许应该是“多输入,相同过程,不同输出”
指的是 OP
虽然我们当然可以修改(已接受的答案)之前的问答以跨各种输入执行这些拆分操作,但我会通过将这个新问题分成两个单独的操作来投票支持简单而不是复杂,例如:
awk '... from previous Q&A ...' <(head -2 1.txt) 0.txt > 2.txt
awk '... from previous Q&A ...' <(tail +3 1.txt) 0-1.txt > 3.txt
删除不必要的 /house_cool01/{...}
代码行,这变成:
awk 'NR==FNR {a[NR]=[=11=]; n=NR; next} /home_cool/ {gsub("home_cool", a[int((i++)%(n*2)/2)+1] )} 1' <(head -2 1.txt) 0.txt > 2.txt
awk 'NR==FNR {a[NR]=[=11=]; n=NR; next} /home_cool/ {gsub("home_cool", a[int((i++)%(n*2)/2)+1] )} 1' <(tail +3 1.txt) 0-1.txt > 3.txt
这些生成:
$ cat 2.txt
"#sun\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
$ cat 3.txt
"#sun\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((man, shirt)(hair, life))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((man, shirt)(hair, life))\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree((dog, big)(bal, pink))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((dog, big)(bal, pink))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
这个有效:
awk \
'FNR==1 {++f}
f==1 {a[i++]=[=10=]}
f==2 {if ([=10=]~/home_cool/) {gsub(/home_cool/, a[int(j++/2)%2]) }; print > "2.txt"}
f==3 {if ([=10=]~/home_cool/) {gsub(/home_cool/, a[int(k++/2)%3 + 2]) }; print > "3.txt"}' \
1.txt 0.txt 0-1.txt
硬编码 "2.txt"
和 "3.txt"
的替代方案包括:
- 使用分配给
-v outfile1=2.txt -v outfile2=2.txt
的变量
- 将它们替换为
outfile
,并使用此 arg 列表:1.txt outfile=2.txt 0.txt outfile=3.txt 0-1.txt
- 将它们替换为
ARGV[4]
和ARGV[5]
,添加行f==4 {exit}
,并使用此 arg 列表:1.txt 0.txt 0-1.txt 2.txt 3.txt
注意事项:
- 如果给定文件为空,则不会导致
f
递增,并相应地破坏内容。在 gawk 中,可以使用ENDFILE
代替。看到这个答案: