str_replace_all 错误地赋值
str_replace_all mistakingly assigning values
我正在尝试从以下 df 中替换“天”列中的值。
structure(list(Segment = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Position = c(1550L,
1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L,
1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L,
1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L,
1550L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L,
100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L,
100L, 100L, 100L, 100L, 100L, 2327L, 2327L, 2327L, 2327L, 2327L,
2327L, 2327L, 2327L, 2327L, 2327L, 2327L, 2327L, 2327L, 2327L
), Quail = c(52L, 53L, 54L, 12L, 36L, 48L, 59L, 11L, 12L, 36L,
48L, 59L, 52L, 53L, 54L, 52L, 53L, 54L, 11L, 12L, 48L, 59L, 59L,
11L, 36L, 59L, 52L, 53L, 54L, 52L, 53L, 54L, 36L, 59L, 36L, 48L,
59L, 52L, 53L, 54L, 52L, 53L, 54L, 36L, 48L, 59L, 36L, 59L, 36L,
48L, 59L, 52L, 53L, 54L, 52L, 53L, 54L, 36L, 48L, 59L, 11L, 11L,
12L, 36L, 48L, 59L, 36L, 59L), Freq = c(0.443883, 0.440835, 0.477273,
0.761589, 0.186821, 0.072325, 0.748305, 0.986968, 0.99361, 0.664921,
0.188847, 0.858921, 0.960804, 0.102041, 0.323194, 0.2, 0.449976,
0.630868, 0.958506, 0.743932, 0.257758, 0.886377, 0.038241, 0.992894,
0.633987, 0.564021, 0.054054, 0.068994, 0.200188, 0.091693, 0.256094,
0.165732, 0.988798, 0.46675, 0.997898, 0.954168, 0.993462, 0.996931,
0.932008, 0.998634, 0.957213, 0.858198, 0.22418, 0.910005, 0.045072,
0.731313, 0.995946, 0.877519, 0.998066, 0.999401, 0.953812, 0.02749,
0.043711, 0.065646, 0.032982, 0.025522, 0.023756, 0.02199, 0.020975,
0.021915, 0.026906, 0.029056, 0.025562, 0.031411, 0.021782, 0.024584,
0.033382, 0.026406), Group = structure(c(4L, 4L, 4L, 1L, 4L,
2L, 3L, 1L, 1L, 4L, 2L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 2L,
3L, 3L, 1L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 2L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 3L, 4L, 3L, 4L, 2L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 3L, 1L, 1L, 1L, 4L, 2L, 3L, 4L, 3L), .Label = c("var",
"varL", "varLQ", "varQ"), class = "factor"), Expo = structure(c(2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("DC", "DI"), class = "factor"), day = c("3",
"3", "3", "3", "3", "3", "3", "7", "7", "7", "7", "7", "7", "7",
"7", "5", "5", "5", "5", "5", "5", "5", "1", "8", "8", "8", "1",
"1", "1", "3", "3", "3", "3", "3", "7", "7", "7", "7", "7", "7",
"5", "5", "5", "5", "5", "5", "1", "1", "8", "8", "8", "1", "1",
"1", "3", "3", "3", "3", "3", "3", "7", "5", "5", "5", "5", "5",
"1", "1")), row.names = c(NA, -68L), class = "data.frame")
为此,我制作了以下列表:
p = c("1" = "3",
"3" = "5",
"5" = "7",
"7" = "9",
"8" = "10")
并尝试使用以下内容替换:
library(stringr)
# Substitute
VariantsGenomeQuails.sub <-
VariantsGenomeQuails %>%
mutate(day = case_when(Expo == "DC" ~ str_replace_all(day, p),
TRUE ~ as.character(day)))
在这样做时,我只得到 9s 和 10s 作为替换值并且缺少其他值。
如果我尝试用字母而不是数字替换数值,它会按预期工作。
我过去曾多次使用这种方法,从未遇到过问题。
你能检查一下我在这里遗漏了什么吗?
非常感谢。
一个简单的解决方法是这样,基于 'old' 数字和替换数字之间的步长总是 2
:
的观察
df$day[df$Expo=="DC"] <- as.numeric(df$day[df$Expo=="DC"])+2
我测试了以下两种情况。在测试用例1中,Expo为“DC”时的数字变成了字母a到e,而在测试用例2中,Expo为“DC”时的所有结果都变成了“e”。这意味着,当新替换的字符在您提供的列表中有另一个可用的匹配项时,此代码将继续替换该字符,直到最后一个可用的字符。所以在你原来的情况下,所有的结果都变成了“9”和“10”,而在我的测试用例2中,所有的结果都是“e”。我相信根本原因是 str_replace_all
是向量化的。当您在 case_when
中重复应用 str_replace_all
时,它总是会替换整个列。
library(dplyr)
library(stringr)
# Test case 1
p2 = c("1" = "a",
"3" = "b",
"5" = "c",
"7" = "d",
"8" = "e")
VariantsGenomeQuails.sub2 <-
VariantsGenomeQuails %>%
mutate(day = case_when(Expo == "DC" ~ str_replace_all(day, p2),
TRUE ~ as.character(day)))
# Test case 2
p3 = c("1" = "a",
"a" = "3",
"3" = "b",
"b" = "5",
"5" = "c",
"c" = "7",
"7" = "d",
"d" = "8",
"8" = "e")
VariantsGenomeQuails.sub3 <-
VariantsGenomeQuails %>%
mutate(day = case_when(Expo == "DC" ~ str_replace_all(day, p3),
TRUE ~ as.character(day)))
这是对您的代码的修复。我们不要使用 str_replace_all
,而是使用匹配。效果不错。
VariantsGenomeQuails.sub4 <-
VariantsGenomeQuails %>%
mutate(day = case_when(
Expo %in% "DC" & day %in% "1" ~"3",
Expo %in% "DC" & day %in% "3" ~"5",
Expo %in% "DC" & day %in% "5" ~"7",
Expo %in% "DC" & day %in% "7" ~"9",
Expo %in% "DC" & day %in% "8" ~"10",
TRUE ~ day
))
我正在尝试从以下 df 中替换“天”列中的值。
structure(list(Segment = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Position = c(1550L,
1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L,
1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L,
1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L, 1550L,
1550L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L,
100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L, 100L,
100L, 100L, 100L, 100L, 100L, 2327L, 2327L, 2327L, 2327L, 2327L,
2327L, 2327L, 2327L, 2327L, 2327L, 2327L, 2327L, 2327L, 2327L
), Quail = c(52L, 53L, 54L, 12L, 36L, 48L, 59L, 11L, 12L, 36L,
48L, 59L, 52L, 53L, 54L, 52L, 53L, 54L, 11L, 12L, 48L, 59L, 59L,
11L, 36L, 59L, 52L, 53L, 54L, 52L, 53L, 54L, 36L, 59L, 36L, 48L,
59L, 52L, 53L, 54L, 52L, 53L, 54L, 36L, 48L, 59L, 36L, 59L, 36L,
48L, 59L, 52L, 53L, 54L, 52L, 53L, 54L, 36L, 48L, 59L, 11L, 11L,
12L, 36L, 48L, 59L, 36L, 59L), Freq = c(0.443883, 0.440835, 0.477273,
0.761589, 0.186821, 0.072325, 0.748305, 0.986968, 0.99361, 0.664921,
0.188847, 0.858921, 0.960804, 0.102041, 0.323194, 0.2, 0.449976,
0.630868, 0.958506, 0.743932, 0.257758, 0.886377, 0.038241, 0.992894,
0.633987, 0.564021, 0.054054, 0.068994, 0.200188, 0.091693, 0.256094,
0.165732, 0.988798, 0.46675, 0.997898, 0.954168, 0.993462, 0.996931,
0.932008, 0.998634, 0.957213, 0.858198, 0.22418, 0.910005, 0.045072,
0.731313, 0.995946, 0.877519, 0.998066, 0.999401, 0.953812, 0.02749,
0.043711, 0.065646, 0.032982, 0.025522, 0.023756, 0.02199, 0.020975,
0.021915, 0.026906, 0.029056, 0.025562, 0.031411, 0.021782, 0.024584,
0.033382, 0.026406), Group = structure(c(4L, 4L, 4L, 1L, 4L,
2L, 3L, 1L, 1L, 4L, 2L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 2L,
3L, 3L, 1L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 2L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 3L, 4L, 3L, 4L, 2L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 3L, 1L, 1L, 1L, 4L, 2L, 3L, 4L, 3L), .Label = c("var",
"varL", "varLQ", "varQ"), class = "factor"), Expo = structure(c(2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("DC", "DI"), class = "factor"), day = c("3",
"3", "3", "3", "3", "3", "3", "7", "7", "7", "7", "7", "7", "7",
"7", "5", "5", "5", "5", "5", "5", "5", "1", "8", "8", "8", "1",
"1", "1", "3", "3", "3", "3", "3", "7", "7", "7", "7", "7", "7",
"5", "5", "5", "5", "5", "5", "1", "1", "8", "8", "8", "1", "1",
"1", "3", "3", "3", "3", "3", "3", "7", "5", "5", "5", "5", "5",
"1", "1")), row.names = c(NA, -68L), class = "data.frame")
为此,我制作了以下列表:
p = c("1" = "3",
"3" = "5",
"5" = "7",
"7" = "9",
"8" = "10")
并尝试使用以下内容替换:
library(stringr)
# Substitute
VariantsGenomeQuails.sub <-
VariantsGenomeQuails %>%
mutate(day = case_when(Expo == "DC" ~ str_replace_all(day, p),
TRUE ~ as.character(day)))
在这样做时,我只得到 9s 和 10s 作为替换值并且缺少其他值。
如果我尝试用字母而不是数字替换数值,它会按预期工作。
我过去曾多次使用这种方法,从未遇到过问题。
你能检查一下我在这里遗漏了什么吗?
非常感谢。
一个简单的解决方法是这样,基于 'old' 数字和替换数字之间的步长总是 2
:
df$day[df$Expo=="DC"] <- as.numeric(df$day[df$Expo=="DC"])+2
我测试了以下两种情况。在测试用例1中,Expo为“DC”时的数字变成了字母a到e,而在测试用例2中,Expo为“DC”时的所有结果都变成了“e”。这意味着,当新替换的字符在您提供的列表中有另一个可用的匹配项时,此代码将继续替换该字符,直到最后一个可用的字符。所以在你原来的情况下,所有的结果都变成了“9”和“10”,而在我的测试用例2中,所有的结果都是“e”。我相信根本原因是 str_replace_all
是向量化的。当您在 case_when
中重复应用 str_replace_all
时,它总是会替换整个列。
library(dplyr)
library(stringr)
# Test case 1
p2 = c("1" = "a",
"3" = "b",
"5" = "c",
"7" = "d",
"8" = "e")
VariantsGenomeQuails.sub2 <-
VariantsGenomeQuails %>%
mutate(day = case_when(Expo == "DC" ~ str_replace_all(day, p2),
TRUE ~ as.character(day)))
# Test case 2
p3 = c("1" = "a",
"a" = "3",
"3" = "b",
"b" = "5",
"5" = "c",
"c" = "7",
"7" = "d",
"d" = "8",
"8" = "e")
VariantsGenomeQuails.sub3 <-
VariantsGenomeQuails %>%
mutate(day = case_when(Expo == "DC" ~ str_replace_all(day, p3),
TRUE ~ as.character(day)))
这是对您的代码的修复。我们不要使用 str_replace_all
,而是使用匹配。效果不错。
VariantsGenomeQuails.sub4 <-
VariantsGenomeQuails %>%
mutate(day = case_when(
Expo %in% "DC" & day %in% "1" ~"3",
Expo %in% "DC" & day %in% "3" ~"5",
Expo %in% "DC" & day %in% "5" ~"7",
Expo %in% "DC" & day %in% "7" ~"9",
Expo %in% "DC" & day %in% "8" ~"10",
TRUE ~ day
))