更改 df 中具有相同名称的列的名称

Change name of columns that have the same name in a df

我有以下 df,其中两列 ara 标有相同的名称:

dput(df_test)
structure(list(X = c("Gen", "ABCB1", "ABCG2", "CES1"), X.1 = c("Prioridad del gen", 
"Candidato", "Candidato", "Candidato"), X.2 = c("Región codificante", 
"2110", "1526", "3533"), X.3 = c("Categoría Reg. Codif.", "intron", 
"intron", "intron"), X.4 = c("Alineamiento múltiple", "No", "No", 
"No"), X.5 = c("Cromosoma", "7", "4", "16"), X.6 = c("Posición inicial", 
"87153584", "89096060", "55855151"), X.7 = c("Posición final", 
"87153585", "89096061", "55855151"), X.8 = c("Tamaño (pb)", "2", 
"2", "1"), X.9 = c("Nº pb cob. ? 15X", "0", "1", "0"), X.10 = c("Nº pb cob. ? 15X", 
"2", "1", "1"), X.11 = c("% pb cob. ? 15X", "0%", "50%", "0%"
), X.12 = c("Cobertura media", "3", "14,50", "0"), X.13 = c("Nº pb sin cubrir", 
"0", "0", "1"), X.14 = c("Nº pb cob. [1-5]", "2", "0", "0"), 
    X.15 = c("Nº pb cob. [6-14]", "0", "1", "0"), X.16 = c("Nº pb cob. [15-29]", 
    "0", "1", "0"), X.17 = c("Nº pb cob. ? 30X", "0", "0", "0"
    )), class = "data.frame", row.names = c(NA, -4L))

因为原文件中第一个raw是空的,所以真正的header变成了df的一部分,而不是作为header使用。因此,我使用 row_to_names 向上移动包含名称的原始文件:

df1 <- read.delim("file", header = T) %>% row_to_names(row_number = 1)

现在我需要将列“Nº pb cob. ? 15X”分别重命名为“Nº pb cob. ≥ 15X”和“Nº pb cob. ≤ 15X”。我试过:

有人能给我一些建议吗?

谢谢!!

您可以使用反引号手动完成:

library(tidyverse)
df <-tibble(`Nº pb cob. ? 15X` = seq(2))
df
#> # A tibble: 2 x 1
#>   `Nº pb cob. ? 15X`
#>                <int>
#> 1                  1
#> 2                  2
rename(df, `Nº pb cob. ≤ 15X` = `Nº pb cob. ? 15X`)
#> # A tibble: 2 x 1
#>   `Nº pb cob. ≤ 15X`
#>                <int>
#> 1                  1
#> 2                  2

reprex package (v2.0.0)

创建于 2022-02-22

我没有找到将这些特殊字符放在数据框变量名称中的方法,所以我使用了一个小的变体。

我们的想法是创建一个清理数据的函数,这样您就可以将此函数应用于所有文件。

library(stringr)
library(purrr)

test <- structure(
  list(
    X = c("Gen", "ABCB1", "ABCG2", "CES1"), 
    X.1 = c("Prioridad del gen","Candidato", "Candidato", "Candidato"),
    X.2 = c("Región codificante","2110", "1526", "3533"), 
    X.3 = c("Categoría Reg. Codif.", "intron", "intron", "intron"),
    X.4 = c("Alineamiento múltiple", "No", "No", "No"),
    X.5 = c("Cromosoma", "7", "4", "16"),
    X.6 = c("Posición inicial", "87153584", "89096060", "55855151"),
    X.7 = c("Posición final", "87153585", "89096061", "55855151"), 
    X.8 = c("Tamaño (pb)", "2", "2", "1"), 
    X.9 = c("Nº pb cob. ? 15X", "0", "1", "0"), 
    X.10 = c("Nº pb cob. ? 15X", "2", "1", "1"), 
    X.11 = c("% pb cob. ? 15X", "0%", "50%", "0%"),
    X.12 = c("Cobertura media", "3", "14,50", "0"), 
    X.13 = c("Nº pb sin cubrir", "0", "0", "1"), 
    X.14 = c("Nº pb cob. [1-5]", "2", "0", "0"), 
    X.15 = c("Nº pb cob. [6-14]", "0", "1", "0"), 
    X.16 = c("Nº pb cob. [15-29]", "0", "1", "0"),
    X.17 = c("Nº pb cob. ? 30X", "0", "0", "0")), 
  class = "data.frame", row.names = c(NA, -4L))

# Function to clean the names as you need
clean_df_names <- function(df) {
  df_names <- df[1, ] %>%
    unlist(use.names = FALSE)
  
  repeated_names <- which(df_names == 'Nº pb cob. ? 15X')
  
  
  #name_symbols <- c('\u2265', '\u2264') # these are the unicode symbols, but can not be used in df names
  name_symbols <- c('>=', '<=')
  
  new_names <- purrr::map2_chr(
    df_names[repeated_names], name_symbols,
    ~stringr::str_replace(.x, '\?', .y)
  )
  
  df_names[repeated_names] <- new_names
  
  new_df <- df[-1, ]
  
  setNames(new_df, df_names)
}

test <- clean_df_names(test)

str(test)
#> 'data.frame':    3 obs. of  18 variables:
#>  $ Gen                  : chr  "ABCB1" "ABCG2" "CES1"
#>  $ Prioridad del gen    : chr  "Candidato" "Candidato" "Candidato"
#>  $ Región codificante   : chr  "2110" "1526" "3533"
#>  $ Categoría Reg. Codif.: chr  "intron" "intron" "intron"
#>  $ Alineamiento múltiple: chr  "No" "No" "No"
#>  $ Cromosoma            : chr  "7" "4" "16"
#>  $ Posición inicial     : chr  "87153584" "89096060" "55855151"
#>  $ Posición final       : chr  "87153585" "89096061" "55855151"
#>  $ Tamaño (pb)          : chr  "2" "2" "1"
#>  $ Nº pb cob. >= 15X    : chr  "0" "1" "0"
#>  $ Nº pb cob. <= 15X    : chr  "2" "1" "1"
#>  $ % pb cob. ? 15X      : chr  "0%" "50%" "0%"
#>  $ Cobertura media      : chr  "3" "14,50" "0"
#>  $ Nº pb sin cubrir     : chr  "0" "0" "1"
#>  $ Nº pb cob. [1-5]     : chr  "2" "0" "0"
#>  $ Nº pb cob. [6-14]    : chr  "0" "1" "0"
#>  $ Nº pb cob. [15-29]   : chr  "0" "1" "0"
#>  $ Nº pb cob. ? 30X     : chr  "0" "0" "0"

reprex package (v2.0.1)

于 2022-02-22 创建