更改 df 中具有相同名称的列的名称
Change name of columns that have the same name in a df
我有以下 df,其中两列 ara 标有相同的名称:
dput(df_test)
structure(list(X = c("Gen", "ABCB1", "ABCG2", "CES1"), X.1 = c("Prioridad del gen",
"Candidato", "Candidato", "Candidato"), X.2 = c("Región codificante",
"2110", "1526", "3533"), X.3 = c("Categoría Reg. Codif.", "intron",
"intron", "intron"), X.4 = c("Alineamiento múltiple", "No", "No",
"No"), X.5 = c("Cromosoma", "7", "4", "16"), X.6 = c("Posición inicial",
"87153584", "89096060", "55855151"), X.7 = c("Posición final",
"87153585", "89096061", "55855151"), X.8 = c("Tamaño (pb)", "2",
"2", "1"), X.9 = c("Nº pb cob. ? 15X", "0", "1", "0"), X.10 = c("Nº pb cob. ? 15X",
"2", "1", "1"), X.11 = c("% pb cob. ? 15X", "0%", "50%", "0%"
), X.12 = c("Cobertura media", "3", "14,50", "0"), X.13 = c("Nº pb sin cubrir",
"0", "0", "1"), X.14 = c("Nº pb cob. [1-5]", "2", "0", "0"),
X.15 = c("Nº pb cob. [6-14]", "0", "1", "0"), X.16 = c("Nº pb cob. [15-29]",
"0", "1", "0"), X.17 = c("Nº pb cob. ? 30X", "0", "0", "0"
)), class = "data.frame", row.names = c(NA, -4L))
因为原文件中第一个raw是空的,所以真正的header变成了df的一部分,而不是作为header使用。因此,我使用 row_to_names 向上移动包含名称的原始文件:
df1 <- read.delim("file", header = T) %>% row_to_names(row_number = 1)
现在我需要将列“Nº pb cob. ? 15X”分别重命名为“Nº pb cob. ≥ 15X”和“Nº pb cob. ≤ 15X”。我试过:
clean_ rename_at clean_names() 在 row_to_names() 之后并没有改变
任何东西。
rename_vars 和 rename_at 也没有成功。
df1 <- rename_at(df1, 10, ~"Num pb cob. ≥ 15X")
combine_names()
中的错误:
!无法将重复变量重命名为 {name}
。
运行 rlang::last_error()
看看哪里出错了。
有人能给我一些建议吗?
谢谢!!
您可以使用反引号手动完成:
library(tidyverse)
df <-tibble(`Nº pb cob. ? 15X` = seq(2))
df
#> # A tibble: 2 x 1
#> `Nº pb cob. ? 15X`
#> <int>
#> 1 1
#> 2 2
rename(df, `Nº pb cob. ≤ 15X` = `Nº pb cob. ? 15X`)
#> # A tibble: 2 x 1
#> `Nº pb cob. ≤ 15X`
#> <int>
#> 1 1
#> 2 2
由 reprex package (v2.0.0)
创建于 2022-02-22
我没有找到将这些特殊字符放在数据框变量名称中的方法,所以我使用了一个小的变体。
我们的想法是创建一个清理数据的函数,这样您就可以将此函数应用于所有文件。
library(stringr)
library(purrr)
test <- structure(
list(
X = c("Gen", "ABCB1", "ABCG2", "CES1"),
X.1 = c("Prioridad del gen","Candidato", "Candidato", "Candidato"),
X.2 = c("Región codificante","2110", "1526", "3533"),
X.3 = c("Categoría Reg. Codif.", "intron", "intron", "intron"),
X.4 = c("Alineamiento múltiple", "No", "No", "No"),
X.5 = c("Cromosoma", "7", "4", "16"),
X.6 = c("Posición inicial", "87153584", "89096060", "55855151"),
X.7 = c("Posición final", "87153585", "89096061", "55855151"),
X.8 = c("Tamaño (pb)", "2", "2", "1"),
X.9 = c("Nº pb cob. ? 15X", "0", "1", "0"),
X.10 = c("Nº pb cob. ? 15X", "2", "1", "1"),
X.11 = c("% pb cob. ? 15X", "0%", "50%", "0%"),
X.12 = c("Cobertura media", "3", "14,50", "0"),
X.13 = c("Nº pb sin cubrir", "0", "0", "1"),
X.14 = c("Nº pb cob. [1-5]", "2", "0", "0"),
X.15 = c("Nº pb cob. [6-14]", "0", "1", "0"),
X.16 = c("Nº pb cob. [15-29]", "0", "1", "0"),
X.17 = c("Nº pb cob. ? 30X", "0", "0", "0")),
class = "data.frame", row.names = c(NA, -4L))
# Function to clean the names as you need
clean_df_names <- function(df) {
df_names <- df[1, ] %>%
unlist(use.names = FALSE)
repeated_names <- which(df_names == 'Nº pb cob. ? 15X')
#name_symbols <- c('\u2265', '\u2264') # these are the unicode symbols, but can not be used in df names
name_symbols <- c('>=', '<=')
new_names <- purrr::map2_chr(
df_names[repeated_names], name_symbols,
~stringr::str_replace(.x, '\?', .y)
)
df_names[repeated_names] <- new_names
new_df <- df[-1, ]
setNames(new_df, df_names)
}
test <- clean_df_names(test)
str(test)
#> 'data.frame': 3 obs. of 18 variables:
#> $ Gen : chr "ABCB1" "ABCG2" "CES1"
#> $ Prioridad del gen : chr "Candidato" "Candidato" "Candidato"
#> $ Región codificante : chr "2110" "1526" "3533"
#> $ Categoría Reg. Codif.: chr "intron" "intron" "intron"
#> $ Alineamiento múltiple: chr "No" "No" "No"
#> $ Cromosoma : chr "7" "4" "16"
#> $ Posición inicial : chr "87153584" "89096060" "55855151"
#> $ Posición final : chr "87153585" "89096061" "55855151"
#> $ Tamaño (pb) : chr "2" "2" "1"
#> $ Nº pb cob. >= 15X : chr "0" "1" "0"
#> $ Nº pb cob. <= 15X : chr "2" "1" "1"
#> $ % pb cob. ? 15X : chr "0%" "50%" "0%"
#> $ Cobertura media : chr "3" "14,50" "0"
#> $ Nº pb sin cubrir : chr "0" "0" "1"
#> $ Nº pb cob. [1-5] : chr "2" "0" "0"
#> $ Nº pb cob. [6-14] : chr "0" "1" "0"
#> $ Nº pb cob. [15-29] : chr "0" "1" "0"
#> $ Nº pb cob. ? 30X : chr "0" "0" "0"
由 reprex package (v2.0.1)
于 2022-02-22 创建
我有以下 df,其中两列 ara 标有相同的名称:
dput(df_test)
structure(list(X = c("Gen", "ABCB1", "ABCG2", "CES1"), X.1 = c("Prioridad del gen",
"Candidato", "Candidato", "Candidato"), X.2 = c("Región codificante",
"2110", "1526", "3533"), X.3 = c("Categoría Reg. Codif.", "intron",
"intron", "intron"), X.4 = c("Alineamiento múltiple", "No", "No",
"No"), X.5 = c("Cromosoma", "7", "4", "16"), X.6 = c("Posición inicial",
"87153584", "89096060", "55855151"), X.7 = c("Posición final",
"87153585", "89096061", "55855151"), X.8 = c("Tamaño (pb)", "2",
"2", "1"), X.9 = c("Nº pb cob. ? 15X", "0", "1", "0"), X.10 = c("Nº pb cob. ? 15X",
"2", "1", "1"), X.11 = c("% pb cob. ? 15X", "0%", "50%", "0%"
), X.12 = c("Cobertura media", "3", "14,50", "0"), X.13 = c("Nº pb sin cubrir",
"0", "0", "1"), X.14 = c("Nº pb cob. [1-5]", "2", "0", "0"),
X.15 = c("Nº pb cob. [6-14]", "0", "1", "0"), X.16 = c("Nº pb cob. [15-29]",
"0", "1", "0"), X.17 = c("Nº pb cob. ? 30X", "0", "0", "0"
)), class = "data.frame", row.names = c(NA, -4L))
因为原文件中第一个raw是空的,所以真正的header变成了df的一部分,而不是作为header使用。因此,我使用 row_to_names 向上移动包含名称的原始文件:
df1 <- read.delim("file", header = T) %>% row_to_names(row_number = 1)
现在我需要将列“Nº pb cob. ? 15X”分别重命名为“Nº pb cob. ≥ 15X”和“Nº pb cob. ≤ 15X”。我试过:
clean_ rename_at clean_names() 在 row_to_names() 之后并没有改变 任何东西。
rename_vars 和 rename_at 也没有成功。
df1 <- rename_at(df1, 10, ~"Num pb cob. ≥ 15X")
combine_names()
中的错误: !无法将重复变量重命名为{name}
。 运行rlang::last_error()
看看哪里出错了。
有人能给我一些建议吗?
谢谢!!
您可以使用反引号手动完成:
library(tidyverse)
df <-tibble(`Nº pb cob. ? 15X` = seq(2))
df
#> # A tibble: 2 x 1
#> `Nº pb cob. ? 15X`
#> <int>
#> 1 1
#> 2 2
rename(df, `Nº pb cob. ≤ 15X` = `Nº pb cob. ? 15X`)
#> # A tibble: 2 x 1
#> `Nº pb cob. ≤ 15X`
#> <int>
#> 1 1
#> 2 2
由 reprex package (v2.0.0)
创建于 2022-02-22我没有找到将这些特殊字符放在数据框变量名称中的方法,所以我使用了一个小的变体。
我们的想法是创建一个清理数据的函数,这样您就可以将此函数应用于所有文件。
library(stringr)
library(purrr)
test <- structure(
list(
X = c("Gen", "ABCB1", "ABCG2", "CES1"),
X.1 = c("Prioridad del gen","Candidato", "Candidato", "Candidato"),
X.2 = c("Región codificante","2110", "1526", "3533"),
X.3 = c("Categoría Reg. Codif.", "intron", "intron", "intron"),
X.4 = c("Alineamiento múltiple", "No", "No", "No"),
X.5 = c("Cromosoma", "7", "4", "16"),
X.6 = c("Posición inicial", "87153584", "89096060", "55855151"),
X.7 = c("Posición final", "87153585", "89096061", "55855151"),
X.8 = c("Tamaño (pb)", "2", "2", "1"),
X.9 = c("Nº pb cob. ? 15X", "0", "1", "0"),
X.10 = c("Nº pb cob. ? 15X", "2", "1", "1"),
X.11 = c("% pb cob. ? 15X", "0%", "50%", "0%"),
X.12 = c("Cobertura media", "3", "14,50", "0"),
X.13 = c("Nº pb sin cubrir", "0", "0", "1"),
X.14 = c("Nº pb cob. [1-5]", "2", "0", "0"),
X.15 = c("Nº pb cob. [6-14]", "0", "1", "0"),
X.16 = c("Nº pb cob. [15-29]", "0", "1", "0"),
X.17 = c("Nº pb cob. ? 30X", "0", "0", "0")),
class = "data.frame", row.names = c(NA, -4L))
# Function to clean the names as you need
clean_df_names <- function(df) {
df_names <- df[1, ] %>%
unlist(use.names = FALSE)
repeated_names <- which(df_names == 'Nº pb cob. ? 15X')
#name_symbols <- c('\u2265', '\u2264') # these are the unicode symbols, but can not be used in df names
name_symbols <- c('>=', '<=')
new_names <- purrr::map2_chr(
df_names[repeated_names], name_symbols,
~stringr::str_replace(.x, '\?', .y)
)
df_names[repeated_names] <- new_names
new_df <- df[-1, ]
setNames(new_df, df_names)
}
test <- clean_df_names(test)
str(test)
#> 'data.frame': 3 obs. of 18 variables:
#> $ Gen : chr "ABCB1" "ABCG2" "CES1"
#> $ Prioridad del gen : chr "Candidato" "Candidato" "Candidato"
#> $ Región codificante : chr "2110" "1526" "3533"
#> $ Categoría Reg. Codif.: chr "intron" "intron" "intron"
#> $ Alineamiento múltiple: chr "No" "No" "No"
#> $ Cromosoma : chr "7" "4" "16"
#> $ Posición inicial : chr "87153584" "89096060" "55855151"
#> $ Posición final : chr "87153585" "89096061" "55855151"
#> $ Tamaño (pb) : chr "2" "2" "1"
#> $ Nº pb cob. >= 15X : chr "0" "1" "0"
#> $ Nº pb cob. <= 15X : chr "2" "1" "1"
#> $ % pb cob. ? 15X : chr "0%" "50%" "0%"
#> $ Cobertura media : chr "3" "14,50" "0"
#> $ Nº pb sin cubrir : chr "0" "0" "1"
#> $ Nº pb cob. [1-5] : chr "2" "0" "0"
#> $ Nº pb cob. [6-14] : chr "0" "1" "0"
#> $ Nº pb cob. [15-29] : chr "0" "1" "0"
#> $ Nº pb cob. ? 30X : chr "0" "0" "0"
由 reprex package (v2.0.1)
于 2022-02-22 创建