使用 R 中的另一个特定模式重命名多个文件的特定模式

Rename specific pattern of multiple files with another specific pattern in R

我是 R 的新手,在重命名文件方面需要一些帮助。

这些是文件:

 [1] "MYD11C3.A2003001.006.2015182092934_LST_Day_CMG_subregion.tif"
 [2] "MYD11C3.A2004001.006.2015213013933_LST_Day_CMG_subregion.tif"
 [3] "MYD11C3.A2005001.006.2015243211529_LST_Day_CMG_subregion.tif"
 [4] "MYD11C3.A2006001.006.2015274114332_LST_Day_CMG_subregion.tif"
 [5] "MYD11C3.A2007001.006.2015309201228_LST_Day_CMG_subregion.tif"
 [6] "MYD11C3.A2008001.006.2015338170025_LST_Day_CMG_subregion.tif"
 [7] "MYD11C3.A2009001.006.2016001145426_LST_Day_CMG_subregion.tif"
 [8] "MYD11C3.A2010001.006.2016035025512_LST_Day_CMG_subregion.tif"
 [9] "MYD11C3.A2011001.006.2016053231728_LST_Day_CMG_subregion.tif"
[10] "MYD11C3.A2012001.006.2016106151313_LST_Day_CMG_subregion.tif"
[11] "MYD11C3.A2013001.006.2016189231222_LST_Day_CMG_subregion.tif"
[12] "MYD11C3.A2014001.006.2016198015925_LST_Day_CMG_subregion.tif"
[13] "MYD11C3.A2015001.006.2016223172712_LST_Day_CMG_subregion.tif"
[14] "MYD11C3.A2016001.006.2016242200237_LST_Day_CMG_subregion.tif"
[15] "MYD11C3.A2017001.006.2017032230414_LST_Day_CMG_subregion.tif"
[16] "MYD11C3.A2018001.006.2018032175447_LST_Day_CMG_subregion.tif"
[17] "MYD11C3.A2019001.006.2019035162351_LST_Day_CMG_subregion.tif"

模式“A2003”...“A2019”应重命名为“A2002”...“A2018”。

谢谢!

我会建议这种 tidyverse 方法将字符串分成列,格式化所需的值,然后再次连接所有内容。这里的代码:

library(tidyverse)
#Code
df %>%
  #Separate by period
  separate(V1,into = c(paste0('V',1:5)),sep = '\.') %>%
  #Remove text
  mutate(V2=gsub('A|001','',V2)) %>%
  #Format year
  mutate(V2=paste0('A',as.numeric(V2)-1,'001')) %>%
  rowwise() %>%
  #Collapse all
  mutate(V=paste(V1,V2,V3,V4,V5,sep = '.')) %>% select(V)

输出:

# A tibble: 17 x 1
# Rowwise: 
   V                                                           
   <chr>                                                       
 1 MYD11C3.A2002001.006.2015182092934_LST_Day_CMG_subregion.tif
 2 MYD11C3.A2003001.006.2015213013933_LST_Day_CMG_subregion.tif
 3 MYD11C3.A2004001.006.2015243211529_LST_Day_CMG_subregion.tif
 4 MYD11C3.A2005001.006.2015274114332_LST_Day_CMG_subregion.tif
 5 MYD11C3.A2006001.006.2015309201228_LST_Day_CMG_subregion.tif
 6 MYD11C3.A2007001.006.2015338170025_LST_Day_CMG_subregion.tif
 7 MYD11C3.A2008001.006.2016001145426_LST_Day_CMG_subregion.tif
 8 MYD11C3.A2009001.006.2016035025512_LST_Day_CMG_subregion.tif
 9 MYD11C3.A2010001.006.2016053231728_LST_Day_CMG_subregion.tif
10 MYD11C3.A2011001.006.2016106151313_LST_Day_CMG_subregion.tif
11 MYD11C3.A2012001.006.2016189231222_LST_Day_CMG_subregion.tif
12 MYD11C3.A2013001.006.2016198015925_LST_Day_CMG_subregion.tif
13 MYD11C3.A2014001.006.2016223172712_LST_Day_CMG_subregion.tif
14 MYD11C3.A2015001.006.2016242200237_LST_Day_CMG_subregion.tif
15 MYD11C3.A2016001.006.2017032230414_LST_Day_CMG_subregion.tif
16 MYD11C3.A2017001.006.2018032175447_LST_Day_CMG_subregion.tif
17 MYD11C3.A2018001.006.2019035162351_LST_Day_CMG_subregion.tif

使用了一些数据:

#Data
df <- structure(list(V1 = c("MYD11C3.A2003001.006.2015182092934_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2004001.006.2015213013933_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2005001.006.2015243211529_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2006001.006.2015274114332_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2007001.006.2015309201228_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2008001.006.2015338170025_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2009001.006.2016001145426_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2010001.006.2016035025512_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2011001.006.2016053231728_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2012001.006.2016106151313_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2013001.006.2016189231222_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2014001.006.2016198015925_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2015001.006.2016223172712_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2016001.006.2016242200237_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2017001.006.2017032230414_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2018001.006.2018032175447_LST_Day_CMG_subregion.tif", 
"MYD11C3.A2019001.006.2019035162351_LST_Day_CMG_subregion.tif"
)), class = "data.frame", row.names = c(NA, -17L))

欢迎来到 regex 的世界。

df <- structure(list(V1 = c("MYD11C3.A2003001.006.2015182092934_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2004001.006.2015213013933_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2005001.006.2015243211529_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2006001.006.2015274114332_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2007001.006.2015309201228_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2008001.006.2015338170025_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2009001.006.2016001145426_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2010001.006.2016035025512_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2011001.006.2016053231728_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2012001.006.2016106151313_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2013001.006.2016189231222_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2014001.006.2016198015925_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2015001.006.2016223172712_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2016001.006.2016242200237_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2017001.006.2017032230414_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2018001.006.2018032175447_LST_Day_CMG_subregion.tif", 
                            "MYD11C3.A2019001.006.2019035162351_LST_Day_CMG_subregion.tif"
)), class = "data.frame", row.names = c(NA, -17L))


df %>%
  as_tibble() %>% 
  mutate(current_year = str_extract(V1, pattern = "(?<=\.A)\d{4}") %>% as.numeric() - 1,
         new_file_name = str_replace(V1, pattern = "(?<=\.A)\d{4}", replacement = as.character(current_year)))
  

输出

V1                                       current_year new_file_name                             
   <chr>                                           <dbl> <chr>                                     
 1 MYD11C3.A2003001.006.2015182092934_LST_~         2002 MYD11C3.A2002001.006.2015182092934_LST_Da~
 2 MYD11C3.A2004001.006.2015213013933_LST_~         2003 MYD11C3.A2003001.006.2015213013933_LST_Da~
 3 MYD11C3.A2005001.006.2015243211529_LST_~         2004 MYD11C3.A2004001.006.2015243211529_LST_Da~
 4 MYD11C3.A2006001.006.2015274114332_LST_~         2005 MYD11C3.A2005001.006.2015274114332_LST_Da~
 5 MYD11C3.A2007001.006.2015309201228_LST_~         2006 MYD11C3.A2006001.006.2015309201228_LST_Da~
 6 MYD11C3.A2008001.006.2015338170025_LST_~         2007 MYD11C3.A2007001.006.2015338170025_LST_Da~
 7 MYD11C3.A2009001.006.2016001145426_LST_~         2008 MYD11C3.A2008001.006.2016001145426_LST_Da~
 8 MYD11C3.A2010001.006.2016035025512_LST_~         2009 MYD11C3.A2009001.006.2016035025512_LST_Da~
 9 MYD11C3.A2011001.006.2016053231728_LST_~         2010 MYD11C3.A2010001.006.2016053231728_LST_Da~
10 MYD11C3.A2012001.006.2016106151313_LST_~         2011 MYD11C3.A2011001.006.2016106151313_LST_Da~
11 MYD11C3.A2013001.006.2016189231222_LST_~         2012 MYD11C3.A2012001.006.2016189231222_LST_Da~
12 MYD11C3.A2014001.006.2016198015925_LST_~         2013 MYD11C3.A2013001.006.2016198015925_LST_Da~
13 MYD11C3.A2015001.006.2016223172712_LST_~         2014 MYD11C3.A2014001.006.2016223172712_LST_Da~
14 MYD11C3.A2016001.006.2016242200237_LST_~         2015 MYD11C3.A2015001.006.2016242200237_LST_Da~
15 MYD11C3.A2017001.006.2017032230414_LST_~         2016 MYD11C3.A2016001.006.2017032230414_LST_Da~
16 MYD11C3.A2018001.006.2018032175447_LST_~         2017 MYD11C3.A2017001.006.2018032175447_LST_Da~
17 MYD11C3.A2019001.006.2019035162351_LST_~         2018 MYD11C3.A2018001.006.2019035162351_LST_Da~

对于基础 R 解决方案,您可以使用 sapply()sub()。这会提取文件名中 ".A" 之后的 4 位数字,然后在减去 1 时替换它们(使用 paste0() 代替 ".A")。

sapply(vec, function(x) {
  num <- as.integer(sub(".*?\.A(\d{4}).*", "\1", x))
  sub("(\.A\d{4})", paste0(".A", num - 1), x)
}, USE.NAMES = FALSE)  # USE.NAMES = FALSE just for cleaner output on SO, no functional reason

 [1] "MYD11C3.A2002001.006.2015182092934_LST_Day_CMG_subregion.tif"
 [2] "MYD11C3.A2003001.006.2015213013933_LST_Day_CMG_subregion.tif"
 [3] "MYD11C3.A2004001.006.2015243211529_LST_Day_CMG_subregion.tif"
 [4] "MYD11C3.A2005001.006.2015274114332_LST_Day_CMG_subregion.tif"
 [5] "MYD11C3.A2006001.006.2015309201228_LST_Day_CMG_subregion.tif"
 [6] "MYD11C3.A2007001.006.2015338170025_LST_Day_CMG_subregion.tif"
 [7] "MYD11C3.A2008001.006.2016001145426_LST_Day_CMG_subregion.tif"
 [8] "MYD11C3.A2009001.006.2016035025512_LST_Day_CMG_subregion.tif"
 [9] "MYD11C3.A2010001.006.2016053231728_LST_Day_CMG_subregion.tif"
[10] "MYD11C3.A2011001.006.2016106151313_LST_Day_CMG_subregion.tif"
[11] "MYD11C3.A2012001.006.2016189231222_LST_Day_CMG_subregion.tif"
[12] "MYD11C3.A2013001.006.2016198015925_LST_Day_CMG_subregion.tif"
[13] "MYD11C3.A2014001.006.2016223172712_LST_Day_CMG_subregion.tif"
[14] "MYD11C3.A2015001.006.2016242200237_LST_Day_CMG_subregion.tif"
[15] "MYD11C3.A2016001.006.2017032230414_LST_Day_CMG_subregion.tif"
[16] "MYD11C3.A2017001.006.2018032175447_LST_Day_CMG_subregion.tif"
[17] "MYD11C3.A2018001.006.2019035162351_LST_Day_CMG_subregion.tif"

数据:

vec = c("MYD11C3.A2003001.006.2015182092934_LST_Day_CMG_subregion.tif",
        "MYD11C3.A2004001.006.2015213013933_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2005001.006.2015243211529_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2006001.006.2015274114332_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2007001.006.2015309201228_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2008001.006.2015338170025_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2009001.006.2016001145426_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2010001.006.2016035025512_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2011001.006.2016053231728_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2012001.006.2016106151313_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2013001.006.2016189231222_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2014001.006.2016198015925_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2015001.006.2016223172712_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2016001.006.2016242200237_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2017001.006.2017032230414_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2018001.006.2018032175447_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2019001.006.2019035162351_LST_Day_CMG_subregion.tif")

您可以使用 stringr 中的 str_replace(),它可以将函数传递给替换。它将为每个匹配调用一次,其 return 值将用于替换匹配。

stringr::str_replace(vec, "(?<=A)\d{4}", function(x) as.numeric(x) - 1)
#  [1] "MYD11C3.A2002001.006.2015182092934_LST_Day_CMG_subregion.tif"
#  [2] "MYD11C3.A2003001.006.2015213013933_LST_Day_CMG_subregion.tif"
#  [3] "MYD11C3.A2004001.006.2015243211529_LST_Day_CMG_subregion.tif"

数据

vec = c("MYD11C3.A2003001.006.2015182092934_LST_Day_CMG_subregion.tif",
        "MYD11C3.A2004001.006.2015213013933_LST_Day_CMG_subregion.tif", 
        "MYD11C3.A2005001.006.2015243211529_LST_Day_CMG_subregion.tif")