使用来自另一个文件的行范围从单独文件中的字符串替换每 2 nth 出现

Replace each 2 nth occurs from a string in separate files using line range from another file

我有三个文件:

0.txt e 0-1.txt 以下内容相同:

"#sun\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree(home_cool)\t",

和下面的源文件 1.txt

(food, apple,)(bag, tortoise,)
(sky, cat,)(sun, sea,)
(car, shape)(milk, market,)
(man, shirt)(hair, life)
(dog, big)(bal, pink)

对于 0.txt 我想用 1 nth 替换从 home_cool 开始的每个 2 nth 1.txt 行,但只使用到 1.txt 的第二行(然后是 sed -n '1,2p'),这样我的 2.txt 输出如下:

"#sun\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",

2.txt 完成过程时,我想用 0-1.txt 替换从 home_cool 发生的所有 2 第 nth 56=]1 nth 1.txt 行使用 1.txt 的第三行开始(然后是 sed -n '3,5p'),这样我的 3.txt 输出如下:

"#sun\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((man, shirt)(hair, life))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((man, shirt)(hair, life))\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree((dog, big)(bal, pink))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((dog, big)(bal, pink))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",

通过下面的行,我可以将 0.txthome_cool 的替换分为两个步骤(第一步 sed -n '1,2p' 和第二步 sed -n '3,5p')。但是我想在2.txt中保存第一步,在3.txt中保存第二步:

awk 'NR==FNR {a[NR]=[=14=]; n=NR; next}/home_cool/ { gsub("home_cool", a[int((++i-1)%(n*2)/2)+1])}1' <(cat 1.txt | tee >(sed -n '1,2p') >(sed -n '3,5p')) 0.txt >> 2.txt

所以我真正想要的是(下面的伪代码):

awk 'NR==FNR {a[NR]=[=15=]; n=NR; next}/home_cool/ { gsub("home_cool", a[int((++i-1)%(n*2)/2)+1])}1' <(cat 1.txt | tee >(sed -n '1,2p') >(sed -n '3,5p')) | "to sed -n '1,2p' make" 0.txt >> 2.txt | "to sed -n '3,5p' make" 0-1.txt >> 3.txt

我如何通过维护命令行而不中断几个孤立的 awk 片段来做到这一点?

注意:题名或许应该是“多输入,相同过程,不同输出”

指的是 OP ...

虽然我们当然可以修改(已接受的答案)之前的问答以跨各种输入执行这些拆分操作,但我会通过将这个新问题分成两个单独的操作来投票支持简单而不是复杂,例如:

awk '... from previous Q&A ...' <(head -2 1.txt) 0.txt   > 2.txt
awk '... from previous Q&A ...' <(tail +3 1.txt) 0-1.txt > 3.txt

删除不必要的 /house_cool01/{...} 代码行,这变成:

awk 'NR==FNR {a[NR]=[=11=]; n=NR; next} /home_cool/ {gsub("home_cool", a[int((i++)%(n*2)/2)+1] )} 1' <(head -2 1.txt) 0.txt   > 2.txt
awk 'NR==FNR {a[NR]=[=11=]; n=NR; next} /home_cool/ {gsub("home_cool", a[int((i++)%(n*2)/2)+1] )} 1' <(tail +3 1.txt) 0-1.txt > 3.txt

这些生成:

$ cat 2.txt
"#sun\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((food, apple,)(bag, tortoise,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((sky, cat,)(sun, sea,))\t",

$ cat 3.txt
"#sun\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((man, shirt)(hair, life))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((man, shirt)(hair, life))\t",
"machine(shoes_shirt.shop)\t",
"#sun\t",
"car_snif = house.group_tree((dog, big)(bal, pink))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((dog, big)(bal, pink))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",
"machine(shoes_shirt.shop)\t",
"car_snif = house.group_tree((car, shape)(milk, market,))\t",

这个有效:

awk \
'FNR==1 {++f}
f==1 {a[i++]=[=10=]}
f==2 {if ([=10=]~/home_cool/) {gsub(/home_cool/, a[int(j++/2)%2]) }; print > "2.txt"}
f==3 {if ([=10=]~/home_cool/) {gsub(/home_cool/, a[int(k++/2)%3 + 2]) }; print > "3.txt"}' \
    1.txt 0.txt 0-1.txt

硬编码 "2.txt""3.txt" 的替代方案包括:

  • 使用分配给 -v outfile1=2.txt -v outfile2=2.txt
  • 的变量
  • 将它们替换为 outfile,并使用此 arg 列表:1.txt outfile=2.txt 0.txt outfile=3.txt 0-1.txt
  • 将它们替换为 ARGV[4]ARGV[5],添加行 f==4 {exit},并使用此 arg 列表:1.txt 0.txt 0-1.txt 2.txt 3.txt

注意事项:

  • 如果给定文件为空,则不会导致 f 递增,并相应地破坏内容。在 gawk 中,可以使用 ENDFILE 代替。看到这个答案: