在 R 中使用 gsub 重命名列中的值
Using gsub in R to rename values in column
我有以下数据框:
structure(list(ID = c("D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_33714.txt",
"D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_35377.txt", "D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_38623.txt",
"D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_38806.txt", "D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_39593.txt",
"D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_39820.txt"), mean = c(0.374389,
0.368736, 0.397192, 0.483207, 0.372074, 0.367745), median = c(0.374017,
0.353222, 0.390063, 0.49433, 0.323613, 0.344656), std = c("0.110854",
"0.144632", "0.128366", "0.106284", "0.145565", "0.178729"),
min = c(0.149913, 0.108011, 0.10917, 0.235377, 0.157571,
0.0690003), max = c(0.650852, 0.692352, 0.672184, 0.658852,
0.723939, 0.707643), count = c(75L, 75L, 64L, 62L, 108L,
42L)), row.names = c(NA, 6L), class = "data.frame")
看起来像:
ID mean median std min max count
1 D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_33714.txt 0.374389 0.374017 0.110854 0.1499130 0.650852 75
2 D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_35377.txt 0.368736 0.353222 0.144632 0.1080110 0.692352 75
3 D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_38623.txt 0.397192 0.390063 0.128366 0.1091700 0.672184 64
4 D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_38806.txt 0.483207 0.494330 0.106284 0.2353770 0.658852 62
5 D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_39593.txt 0.372074 0.323613 0.145565 0.1575710 0.723939 108
6 D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_39820.txt 0.367745 0.344656 0.178729 0.0690003 0.707643 42
我想从 ID
列中删除所有非数字项。
当我使用 gsub("[^0-9]", "", DF[1,1])
时,它会在输出中添加前导 1
。我不知道为什么。
我最终想 apply
此函数应用于 ID
列中的所有行,并将数字项保留为文件 ID。
所需的输出:
ID mean median std min max count
1 33714 0.374389 0.374017 0.110854 0.1499130 0.650852 75
2 35377 0.368736 0.353222 0.144632 0.1080110 0.692352 75
.
.
.
.
告诉我这是否是您要找的东西
df$ID <- gsub("[^0-9]", "", df$ID)
ID mean median std min max count
1 1234567890 0.374389 0.374017 0.110854 0.1499130 0.650852 75
2 1234567890 0.368736 0.353222 0.144632 0.1080110 0.692352 75
3 1234567890 0.397192 0.390063 0.128366 0.1091700 0.672184 64
4 1234567890 0.483207 0.494330 0.106284 0.2353770 0.658852 62
5 1234567890 0.372074 0.323613 0.145565 0.1575710 0.723939 108
6 1234567890 0.367745 0.344656 0.178729 0.0690003 0.707643 42
使用str_extract
df$ID <- str_extract(df$ID, "(\d)+")
df
ID mean median std min max count
1 1234567890 0.374389 0.374017 0.110854 0.1499130 0.650852 75
2 1234567890 0.368736 0.353222 0.144632 0.1080110 0.692352 75
3 1234567890 0.397192 0.390063 0.128366 0.1091700 0.672184 64
4 1234567890 0.483207 0.494330 0.106284 0.2353770 0.658852 62
5 1234567890 0.372074 0.323613 0.145565 0.1575710 0.723939 108
6 1234567890 0.367745 0.344656 0.178729 0.0690003 0.707643 42
或根据您的数据使用特定的正则表达式模式。这是使用 sub
.
的基本 R 方式
df$ID <- sub('.*_(\d+)\.txt', '\1', df$ID)
df
# ID mean median std min max count
#1 33714 0.374389 0.374017 0.110854 0.1499130 0.650852 75
#2 35377 0.368736 0.353222 0.144632 0.1080110 0.692352 75
#3 38623 0.397192 0.390063 0.128366 0.1091700 0.672184 64
#4 38806 0.483207 0.494330 0.106284 0.2353770 0.658852 62
#5 39593 0.372074 0.323613 0.145565 0.1575710 0.723939 108
#6 39820 0.367745 0.344656 0.178729 0.0690003 0.707643 42
我们可以使用parse_number
library(dplyr)
df %>%
mutate(ID = readr::parse_number(basename(ID)))
ID mean median std min max count
1 33714 0.374389 0.374017 0.110854 0.1499130 0.650852 75
2 35377 0.368736 0.353222 0.144632 0.1080110 0.692352 75
3 38623 0.397192 0.390063 0.128366 0.1091700 0.672184 64
4 38806 0.483207 0.494330 0.106284 0.2353770 0.658852 62
5 39593 0.372074 0.323613 0.145565 0.1575710 0.723939 108
6 39820 0.367745 0.344656 0.178729 0.0690003 0.707643 42
我有以下数据框:
structure(list(ID = c("D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_33714.txt",
"D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_35377.txt", "D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_38623.txt",
"D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_38806.txt", "D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_39593.txt",
"D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_39820.txt"), mean = c(0.374389,
0.368736, 0.397192, 0.483207, 0.372074, 0.367745), median = c(0.374017,
0.353222, 0.390063, 0.49433, 0.323613, 0.344656), std = c("0.110854",
"0.144632", "0.128366", "0.106284", "0.145565", "0.178729"),
min = c(0.149913, 0.108011, 0.10917, 0.235377, 0.157571,
0.0690003), max = c(0.650852, 0.692352, 0.672184, 0.658852,
0.723939, 0.707643), count = c(75L, 75L, 64L, 62L, 108L,
42L)), row.names = c(NA, 6L), class = "data.frame")
看起来像:
ID mean median std min max count
1 D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_33714.txt 0.374389 0.374017 0.110854 0.1499130 0.650852 75
2 D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_35377.txt 0.368736 0.353222 0.144632 0.1080110 0.692352 75
3 D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_38623.txt 0.397192 0.390063 0.128366 0.1091700 0.672184 64
4 D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_38806.txt 0.483207 0.494330 0.106284 0.2353770 0.658852 62
5 D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_39593.txt 0.372074 0.323613 0.145565 0.1575710 0.723939 108
6 D:/Desktop/BAS1/FA/FPT_LEFT_stats_FA_39820.txt 0.367745 0.344656 0.178729 0.0690003 0.707643 42
我想从 ID
列中删除所有非数字项。
当我使用 gsub("[^0-9]", "", DF[1,1])
时,它会在输出中添加前导 1
。我不知道为什么。
我最终想 apply
此函数应用于 ID
列中的所有行,并将数字项保留为文件 ID。
所需的输出:
ID mean median std min max count
1 33714 0.374389 0.374017 0.110854 0.1499130 0.650852 75
2 35377 0.368736 0.353222 0.144632 0.1080110 0.692352 75
.
.
.
.
告诉我这是否是您要找的东西
df$ID <- gsub("[^0-9]", "", df$ID)
ID mean median std min max count
1 1234567890 0.374389 0.374017 0.110854 0.1499130 0.650852 75
2 1234567890 0.368736 0.353222 0.144632 0.1080110 0.692352 75
3 1234567890 0.397192 0.390063 0.128366 0.1091700 0.672184 64
4 1234567890 0.483207 0.494330 0.106284 0.2353770 0.658852 62
5 1234567890 0.372074 0.323613 0.145565 0.1575710 0.723939 108
6 1234567890 0.367745 0.344656 0.178729 0.0690003 0.707643 42
使用str_extract
df$ID <- str_extract(df$ID, "(\d)+")
df
ID mean median std min max count
1 1234567890 0.374389 0.374017 0.110854 0.1499130 0.650852 75
2 1234567890 0.368736 0.353222 0.144632 0.1080110 0.692352 75
3 1234567890 0.397192 0.390063 0.128366 0.1091700 0.672184 64
4 1234567890 0.483207 0.494330 0.106284 0.2353770 0.658852 62
5 1234567890 0.372074 0.323613 0.145565 0.1575710 0.723939 108
6 1234567890 0.367745 0.344656 0.178729 0.0690003 0.707643 42
或根据您的数据使用特定的正则表达式模式。这是使用 sub
.
df$ID <- sub('.*_(\d+)\.txt', '\1', df$ID)
df
# ID mean median std min max count
#1 33714 0.374389 0.374017 0.110854 0.1499130 0.650852 75
#2 35377 0.368736 0.353222 0.144632 0.1080110 0.692352 75
#3 38623 0.397192 0.390063 0.128366 0.1091700 0.672184 64
#4 38806 0.483207 0.494330 0.106284 0.2353770 0.658852 62
#5 39593 0.372074 0.323613 0.145565 0.1575710 0.723939 108
#6 39820 0.367745 0.344656 0.178729 0.0690003 0.707643 42
我们可以使用parse_number
library(dplyr)
df %>%
mutate(ID = readr::parse_number(basename(ID)))
ID mean median std min max count
1 33714 0.374389 0.374017 0.110854 0.1499130 0.650852 75
2 35377 0.368736 0.353222 0.144632 0.1080110 0.692352 75
3 38623 0.397192 0.390063 0.128366 0.1091700 0.672184 64
4 38806 0.483207 0.494330 0.106284 0.2353770 0.658852 62
5 39593 0.372074 0.323613 0.145565 0.1575710 0.723939 108
6 39820 0.367745 0.344656 0.178729 0.0690003 0.707643 42