R: creating new variables in terms of other variables (error: NAs introduced by coercion )
R: creating new variables in terms of other variables (error: NAs introduced by coercion )
我正在使用 R 编程语言。假设我有以下数据集:
#create data
my_data <- data.frame(
"name" = c("john", "jason", "jack", "jim", "john", "jason", "jack", "jim" ),
"points_1" = c("150", "165", "183", "191", "151", "166", "184", "192"),
"points_2" = c("250", "265", "283", "291", "251", "266", "284", "292")
)
#view data
my_data
name points_1 points_2
1 john 150 250
2 jason 165 265
3 jack 183 283
4 jim 191 291
5 john 151 251
6 jason 166 266
7 jack 184 284
8 jim 192 292
我正在尝试创建如下所示的最终数据集:
my_data_final <- data.frame(
"name" = c("john", "jason", "jack", "jim", "john", "jason", "jack", "jim" ),
"points_1" = c("150", "165", "183", "191", "151", "166", "184", "192"),
"points_2" = c("250", "265", "283", "291", "251", "266", "284", "292"),
"var1" = c("< 150", "< 165", "< 183", " < 191", " < 151", " < 166", " < 184", "< 192"),
"var2" = c("<150 and > 250", "< 165 and > 265", "<183 and > 283", "< 191 and 291", "<150 and > 250", "< 165 and > 265", "<183 and > 283", "< 191 and 291"),
"var3" = c(">250", ">265", ">283", ">291", ">251", ">266", ">284", ">292")
)
#view desired data
my_data_final
name points_1 points_2 var1 var2 var3
1 john 150 250 < 150 <150 and > 250 >250
2 jason 165 265 < 165 < 165 and > 265 >265
3 jack 183 283 < 183 <183 and > 283 >283
4 jim 191 291 < 191 < 191 and 291 >291
5 john 151 251 < 151 <150 and > 250 >251
6 jason 166 266 < 166 < 165 and > 265 >266
7 jack 184 284 < 184 <183 and > 283 >284
8 jim 192 292 < 192 < 191 and 291 >292
我尝试手动创建“迷你变量”并以所需格式手动将它们组合在一起:
#example (from here: :
my_data$var_1a = ">"
my_data$var_1 <- as.numeric(paste(my_data$points_1, my_data$var_1a, sep = ""))
但这会导致以下“警告”和 NA:
Warning message:
NAs introduced by coercion
my_data
name points_1 points_2 var1 var2 points_2.1 var_1a var_1
1 john 150 250 < 150 <150 and > 250 >250 > NA
2 jason 165 265 < 165 < 165 and > 265 >265 > NA
3 jack 183 283 < 183 <183 and > 283 >283 > NA
4 jim 191 291 < 191 < 191 and 291 >291 > NA
5 john 151 251 < 151 <150 and > 250 >251 > NA
6 jason 166 266 < 166 < 165 and > 265 >266 > NA
7 jack 184 284 < 184 <183 and > 283 >284 > NA
8 jim 192 292 < 192 < 191 and 291 >292 > NA
有人可以告诉我如何修复此错误并创建所需的变量“var1”、“var2”和“var3”吗?
谢谢
这是您正在寻找的解决方案吗?
library(tidyverse)
my_data <- data.frame(
"name" = c("john", "jason", "jack", "jim", "john", "jason", "jack", "jim" ),
"points_1" = c("150", "165", "183", "191", "151", "166", "184", "192"),
"points_2" = c("250", "265", "283", "291", "251", "266", "284", "292")
)
my_data %>%
mutate(var1 = paste("<", as.character(points_1), sep = ""),
var2 = paste(">", as.character(points_1), " and ",
"<", as.character(points_2), sep = ""),
var3 = paste(">", as.character(points_2), sep = ""))
#> name points_1 points_2 var1 var2 var3
#> 1 john 150 250 <150 >150 and <250 >250
#> 2 jason 165 265 <165 >165 and <265 >265
#> 3 jack 183 283 <183 >183 and <283 >283
#> 4 jim 191 291 <191 >191 and <291 >291
#> 5 john 151 251 <151 >151 and <251 >251
#> 6 jason 166 266 <166 >166 and <266 >266
#> 7 jack 184 284 <184 >184 and <284 >284
#> 8 jim 192 292 <192 >192 and <292 >292
使用基数 R:
my_data <- data.frame(
"name" = c("john", "jason", "jack", "jim", "john", "jason", "jack", "jim" ),
"points_1" = c(150, 165, 183, 191, 151, 166, 184, 192),
"points_2" = c(250, 265, 283, 291, 251, 266, 284, 292)
)
my_data$var1 <- paste("<", my_data$points_1, sep = "")
my_data$var2 <- paste(">", my_data$points_1, " and ", "<", my_data$points_2, sep = "")
my_data$var3 <- paste(">", my_data$points_2, sep = "")
my_data
#> name points_1 points_2 var1 var2 var3
#> 1 john 150 250 <150 >150 and <250 >250
#> 2 jason 165 265 <165 >165 and <265 >265
#> 3 jack 183 283 <183 >183 and <283 >283
#> 4 jim 191 291 <191 >191 and <291 >291
#> 5 john 151 251 <151 >151 and <251 >251
#> 6 jason 166 266 <166 >166 and <266 >266
#> 7 jack 184 284 <184 >184 and <284 >284
#> 8 jim 192 292 <192 >192 and <292 >292
我们可以在 base R
中轻松做到这一点。使用分隔符在 sprintf
中创建单个表达式,使用 read.csv
和 cbind
读取原始数据
my_data <- cbind(my_data, read.csv(text = with(my_data,
sprintf("< %s,<%s and > %s,>%s", points_1, points_1, points_2, points_2)),
header = FALSE, col.names = paste0("var", 1:3)))
-输出
my_data
name points_1 points_2 var1 var2 var3
1 john 150 250 < 150 <150 and > 250 >250
2 jason 165 265 < 165 <165 and > 265 >265
3 jack 183 283 < 183 <183 and > 283 >283
4 jim 191 291 < 191 <191 and > 291 >291
5 john 151 251 < 151 <151 and > 251 >251
6 jason 166 266 < 166 <166 and > 266 >266
7 jack 184 284 < 184 <184 and > 284 >284
8 jim 192 292 < 192 <192 and > 292 >292
或者如@thelatemail 所建议的那样,我们可以通过使用位置 %1
、%2
来避免重复相同的论点
my_data <- cbind(my_data, read.csv(text = with(my_data,
sprintf("<%1$s ,<%1$s and >%2$s, >%2$s", points_1, points_2)),
header = FALSE, col.names = paste0("var", 1:3)))
或者带有 glue
和 separate
的选项
library(dplyr)
library(tidyr)
my_data %>%
mutate(vars = glue::glue("<{points_1},<{points_1} and >{points_2},>{points_2}")) %>%
separate(vars, into = c("var1", "var2", "var3"), sep = ",")
name points_1 points_2 var1 var2 var3
1 john 150 250 <150 <150 and >250 >250
2 jason 165 265 <165 <165 and >265 >265
3 jack 183 283 <183 <183 and >283 >283
4 jim 191 291 <191 <191 and >291 >291
5 john 151 251 <151 <151 and >251 >251
6 jason 166 266 <166 <166 and >266 >266
7 jack 184 284 <184 <184 and >284 >284
8 jim 192 292 <192 <192 and >292 >292
您还可以使用来自 purrr
的 map2
的以下解决方案:
library(purrr)
library(tidyr)
my_data %>%
mutate(output = map2(my_data$points_1, my_data$points_2, ~ {
tibble(var1 = paste("< ", .x),
var2 = paste("< ", .x, " and ", "> ", .y),
var3 = paste("> ", .y))
})) %>%
unnest(output)
# A tibble: 8 x 6
name points_1 points_2 var1 var2 var3
<chr> <chr> <chr> <chr> <chr> <chr>
1 john 150 250 < 150 < 150 and > 250 > 250
2 jason 165 265 < 165 < 165 and > 265 > 265
3 jack 183 283 < 183 < 183 and > 283 > 283
4 jim 191 291 < 191 < 191 and > 291 > 291
5 john 151 251 < 151 < 151 and > 251 > 251
6 jason 166 266 < 166 < 166 and > 266 > 266
7 jack 184 284 < 184 < 184 and > 284 > 284
8 jim 192 292 < 192 < 192 and > 292 > 292
我正在使用 R 编程语言。假设我有以下数据集:
#create data
my_data <- data.frame(
"name" = c("john", "jason", "jack", "jim", "john", "jason", "jack", "jim" ),
"points_1" = c("150", "165", "183", "191", "151", "166", "184", "192"),
"points_2" = c("250", "265", "283", "291", "251", "266", "284", "292")
)
#view data
my_data
name points_1 points_2
1 john 150 250
2 jason 165 265
3 jack 183 283
4 jim 191 291
5 john 151 251
6 jason 166 266
7 jack 184 284
8 jim 192 292
我正在尝试创建如下所示的最终数据集:
my_data_final <- data.frame(
"name" = c("john", "jason", "jack", "jim", "john", "jason", "jack", "jim" ),
"points_1" = c("150", "165", "183", "191", "151", "166", "184", "192"),
"points_2" = c("250", "265", "283", "291", "251", "266", "284", "292"),
"var1" = c("< 150", "< 165", "< 183", " < 191", " < 151", " < 166", " < 184", "< 192"),
"var2" = c("<150 and > 250", "< 165 and > 265", "<183 and > 283", "< 191 and 291", "<150 and > 250", "< 165 and > 265", "<183 and > 283", "< 191 and 291"),
"var3" = c(">250", ">265", ">283", ">291", ">251", ">266", ">284", ">292")
)
#view desired data
my_data_final
name points_1 points_2 var1 var2 var3
1 john 150 250 < 150 <150 and > 250 >250
2 jason 165 265 < 165 < 165 and > 265 >265
3 jack 183 283 < 183 <183 and > 283 >283
4 jim 191 291 < 191 < 191 and 291 >291
5 john 151 251 < 151 <150 and > 250 >251
6 jason 166 266 < 166 < 165 and > 265 >266
7 jack 184 284 < 184 <183 and > 283 >284
8 jim 192 292 < 192 < 191 and 291 >292
我尝试手动创建“迷你变量”并以所需格式手动将它们组合在一起:
#example (from here: :
my_data$var_1a = ">"
my_data$var_1 <- as.numeric(paste(my_data$points_1, my_data$var_1a, sep = ""))
但这会导致以下“警告”和 NA:
Warning message:
NAs introduced by coercion
my_data
name points_1 points_2 var1 var2 points_2.1 var_1a var_1
1 john 150 250 < 150 <150 and > 250 >250 > NA
2 jason 165 265 < 165 < 165 and > 265 >265 > NA
3 jack 183 283 < 183 <183 and > 283 >283 > NA
4 jim 191 291 < 191 < 191 and 291 >291 > NA
5 john 151 251 < 151 <150 and > 250 >251 > NA
6 jason 166 266 < 166 < 165 and > 265 >266 > NA
7 jack 184 284 < 184 <183 and > 283 >284 > NA
8 jim 192 292 < 192 < 191 and 291 >292 > NA
有人可以告诉我如何修复此错误并创建所需的变量“var1”、“var2”和“var3”吗?
谢谢
这是您正在寻找的解决方案吗?
library(tidyverse)
my_data <- data.frame(
"name" = c("john", "jason", "jack", "jim", "john", "jason", "jack", "jim" ),
"points_1" = c("150", "165", "183", "191", "151", "166", "184", "192"),
"points_2" = c("250", "265", "283", "291", "251", "266", "284", "292")
)
my_data %>%
mutate(var1 = paste("<", as.character(points_1), sep = ""),
var2 = paste(">", as.character(points_1), " and ",
"<", as.character(points_2), sep = ""),
var3 = paste(">", as.character(points_2), sep = ""))
#> name points_1 points_2 var1 var2 var3
#> 1 john 150 250 <150 >150 and <250 >250
#> 2 jason 165 265 <165 >165 and <265 >265
#> 3 jack 183 283 <183 >183 and <283 >283
#> 4 jim 191 291 <191 >191 and <291 >291
#> 5 john 151 251 <151 >151 and <251 >251
#> 6 jason 166 266 <166 >166 and <266 >266
#> 7 jack 184 284 <184 >184 and <284 >284
#> 8 jim 192 292 <192 >192 and <292 >292
使用基数 R:
my_data <- data.frame(
"name" = c("john", "jason", "jack", "jim", "john", "jason", "jack", "jim" ),
"points_1" = c(150, 165, 183, 191, 151, 166, 184, 192),
"points_2" = c(250, 265, 283, 291, 251, 266, 284, 292)
)
my_data$var1 <- paste("<", my_data$points_1, sep = "")
my_data$var2 <- paste(">", my_data$points_1, " and ", "<", my_data$points_2, sep = "")
my_data$var3 <- paste(">", my_data$points_2, sep = "")
my_data
#> name points_1 points_2 var1 var2 var3
#> 1 john 150 250 <150 >150 and <250 >250
#> 2 jason 165 265 <165 >165 and <265 >265
#> 3 jack 183 283 <183 >183 and <283 >283
#> 4 jim 191 291 <191 >191 and <291 >291
#> 5 john 151 251 <151 >151 and <251 >251
#> 6 jason 166 266 <166 >166 and <266 >266
#> 7 jack 184 284 <184 >184 and <284 >284
#> 8 jim 192 292 <192 >192 and <292 >292
我们可以在 base R
中轻松做到这一点。使用分隔符在 sprintf
中创建单个表达式,使用 read.csv
和 cbind
读取原始数据
my_data <- cbind(my_data, read.csv(text = with(my_data,
sprintf("< %s,<%s and > %s,>%s", points_1, points_1, points_2, points_2)),
header = FALSE, col.names = paste0("var", 1:3)))
-输出
my_data
name points_1 points_2 var1 var2 var3
1 john 150 250 < 150 <150 and > 250 >250
2 jason 165 265 < 165 <165 and > 265 >265
3 jack 183 283 < 183 <183 and > 283 >283
4 jim 191 291 < 191 <191 and > 291 >291
5 john 151 251 < 151 <151 and > 251 >251
6 jason 166 266 < 166 <166 and > 266 >266
7 jack 184 284 < 184 <184 and > 284 >284
8 jim 192 292 < 192 <192 and > 292 >292
或者如@thelatemail 所建议的那样,我们可以通过使用位置 %1
、%2
my_data <- cbind(my_data, read.csv(text = with(my_data,
sprintf("<%1$s ,<%1$s and >%2$s, >%2$s", points_1, points_2)),
header = FALSE, col.names = paste0("var", 1:3)))
或者带有 glue
和 separate
library(dplyr)
library(tidyr)
my_data %>%
mutate(vars = glue::glue("<{points_1},<{points_1} and >{points_2},>{points_2}")) %>%
separate(vars, into = c("var1", "var2", "var3"), sep = ",")
name points_1 points_2 var1 var2 var3
1 john 150 250 <150 <150 and >250 >250
2 jason 165 265 <165 <165 and >265 >265
3 jack 183 283 <183 <183 and >283 >283
4 jim 191 291 <191 <191 and >291 >291
5 john 151 251 <151 <151 and >251 >251
6 jason 166 266 <166 <166 and >266 >266
7 jack 184 284 <184 <184 and >284 >284
8 jim 192 292 <192 <192 and >292 >292
您还可以使用来自 purrr
的 map2
的以下解决方案:
library(purrr)
library(tidyr)
my_data %>%
mutate(output = map2(my_data$points_1, my_data$points_2, ~ {
tibble(var1 = paste("< ", .x),
var2 = paste("< ", .x, " and ", "> ", .y),
var3 = paste("> ", .y))
})) %>%
unnest(output)
# A tibble: 8 x 6
name points_1 points_2 var1 var2 var3
<chr> <chr> <chr> <chr> <chr> <chr>
1 john 150 250 < 150 < 150 and > 250 > 250
2 jason 165 265 < 165 < 165 and > 265 > 265
3 jack 183 283 < 183 < 183 and > 283 > 283
4 jim 191 291 < 191 < 191 and > 291 > 291
5 john 151 251 < 151 < 151 and > 251 > 251
6 jason 166 266 < 166 < 166 and > 266 > 266
7 jack 184 284 < 184 < 184 and > 284 > 284
8 jim 192 292 < 192 < 192 and > 292 > 292