rowSums(., na.rm = TRUE) 中的错误:'x' 必须是数字 - 尽管验证变量是数字
Error in rowSums(., na.rm = TRUE) : 'x' must be numeric - despite verifying variables are numeric
当我尝试对数据框中特定列的 24 行求和时,它吐出
Error in rowSums(., na.rm = TRUE) : 'x' must be numeric
我尝试了各种方法来确定感兴趣的列是否为数字。
x_isnum <- select_if(x2009, is.numeric)
names(x_isnum)
# Check data type of every variable in data frame
str(x2009)
所有感兴趣的列都列为数字。然后我什至打开数据框并将鼠标悬停在每一列上以验证它们是否为数字;他们是。
我承认由于 df 太大,我可能忽略了一些东西。因此,我对数据进行子集化以仅了解相关列。
p = x2009[,c(48,49, 70:91)]
is.numeric(p)
FALSE
既然返回false,我运行
str(p)
'data.frame': 17090 obs. of 24 variables:
$ poss_cannabis_female_over_64 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_female_under_10: num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_male_over_64 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_male_under_10 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_tot_10_12 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_tot_13_14 : num 0 1 0 0 0 0 1 0 0 0 ...
$ poss_cannabis_tot_15 : num 0 1 0 3 0 0 0 1 0 0 ...
$ poss_cannabis_tot_16 : num 1 0 3 2 1 0 2 2 2 1 ...
$ poss_cannabis_tot_17 : num 1 0 1 3 1 2 0 3 2 1 ...
$ poss_cannabis_tot_18 : num 0 0 1 2 2 1 1 1 0 0 ...
$ poss_cannabis_tot_19 : num 0 2 0 4 1 0 3 0 0 0 ...
$ poss_cannabis_tot_20 : num 0 1 0 2 0 0 2 1 1 3 ...
$ poss_cannabis_tot_21 : num 0 0 0 1 1 0 0 0 1 0 ...
$ poss_cannabis_tot_22 : num 0 2 0 1 0 0 2 0 1 0 ...
$ poss_cannabis_tot_23 : num 1 0 0 3 2 0 1 1 0 0 ...
$ poss_cannabis_tot_24 : num 1 0 0 0 1 0 0 0 0 0 ...
$ poss_cannabis_tot_25_29 : num 0 0 2 3 2 1 0 0 1 2 ...
$ poss_cannabis_tot_30_34 : num 0 0 0 1 0 1 0 1 0 0 ...
$ poss_cannabis_tot_35_39 : num 1 0 0 1 1 0 0 1 0 0 ...
$ poss_cannabis_tot_40_44 : num 0 1 0 0 0 0 0 1 0 0 ...
$ poss_cannabis_tot_45_49 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_tot_50_54 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_tot_55_59 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_tot_60_64 : num 0 0 0 0 1 0 0 0 0 0 ...
我也运行
sapply(p, is.numeric)
poss_cannabis_female_over_64
TRUE
poss_cannabis_female_under_10
TRUE
poss_cannabis_male_over_64
TRUE
poss_cannabis_male_under_10
TRUE
poss_cannabis_tot_10_12
TRUE
poss_cannabis_tot_13_14
TRUE
poss_cannabis_tot_15
TRUE
poss_cannabis_tot_16
TRUE
poss_cannabis_tot_17
TRUE
poss_cannabis_tot_18
TRUE
poss_cannabis_tot_19
TRUE
poss_cannabis_tot_20
TRUE
poss_cannabis_tot_21
TRUE
poss_cannabis_tot_22
TRUE
poss_cannabis_tot_23
TRUE
poss_cannabis_tot_24
TRUE
poss_cannabis_tot_25_29
TRUE
poss_cannabis_tot_30_34
TRUE
poss_cannabis_tot_35_39
TRUE
poss_cannabis_tot_40_44
TRUE
poss_cannabis_tot_45_49
TRUE
poss_cannabis_tot_50_54
TRUE
poss_cannabis_tot_55_59
TRUE
poss_cannabis_tot_60_64
TRUE
最后,我 运行 sapply(p, class)
,再次显示每个变量的数值。我再次将鼠标悬停在子集数据框中的每一列上,每一列都表示它是数字
如果 r 告诉我它不是数字,那一定是我遗漏了什么。我怀疑代码是问题所在,因为我 运行 它在一个较小的 df 上没有问题,但以防万一,这是我 运行 对特定列的行求和。
x2009 = x2009 %>%
mutate(poss_cannabis_juv_tot = select(., c(49,71:76))) %>%
rowSums(na.rm = TRUE) %>%
mutate(poss_cannabis_adult_tot = select(., c(48,70,77:91))) %>%
rowSums(na.rm = TRUE) %>%
relocate(poss_cannabis_juv_tot, .after = poss_cannabis_male_17) %>%
relocate(poss_cannabis_adult_tot, .after = poss_cannabis_male_over_64)
这是怎么回事??
问题在于从 select
创建列。相反,select across
中的列并获得 rowSums
library(dplyr)
x2009 %>%
mutate(poss_cannabis_juv_tot = rowSums(across(where(is.numeric)),
na.rm = TRUE))
或者如果它应该带有索引
x2009 %>%
mutate(poss_cannabis_juv_tot = rowSums(across(c(49,71:76)), na.rm = TRUE),
poss_cannabis_adult_tot = rowSums(across(c(48,70,77:91)), na.rm = TRUE)) %>%
relocate(poss_cannabis_juv_tot, .after = poss_cannabis_male_17) %>%
relocate(poss_cannabis_adult_tot, .after = poss_cannabis_male_over_64)
在 OP 的代码中,rowSums
部分是 selecting 所有列,因为使用 select
创建的列是 data.frame
(除了其他non-numeric 列)
> head(iris) %>%
mutate(new = select(., 2:4)) %>%
str
'data.frame': 6 obs. of 6 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1
$ new :'data.frame': 6 obs. of 3 variables:
..$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9
..$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7
..$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4
head(iris) %>%
mutate(new = select(., 2:4)) %>%
rowSums(na.rm = TRUE)
Error in rowSums(., na.rm = TRUE) : 'x' must be numeric
相反,across
head(iris) %>%
mutate(new = rowSums(across(2:4), na.rm = TRUE))
Sepal.Length Sepal.Width Petal.Length Petal.Width Species new
1 5.1 3.5 1.4 0.2 setosa 5.1
2 4.9 3.0 1.4 0.2 setosa 4.6
3 4.7 3.2 1.3 0.2 setosa 4.7
4 4.6 3.1 1.5 0.2 setosa 4.8
5 5.0 3.6 1.4 0.2 setosa 5.2
6 5.4 3.9 1.7 0.4 setosa 6.0
当我尝试对数据框中特定列的 24 行求和时,它吐出
Error in rowSums(., na.rm = TRUE) : 'x' must be numeric
我尝试了各种方法来确定感兴趣的列是否为数字。
x_isnum <- select_if(x2009, is.numeric)
names(x_isnum)
# Check data type of every variable in data frame
str(x2009)
所有感兴趣的列都列为数字。然后我什至打开数据框并将鼠标悬停在每一列上以验证它们是否为数字;他们是。 我承认由于 df 太大,我可能忽略了一些东西。因此,我对数据进行子集化以仅了解相关列。
p = x2009[,c(48,49, 70:91)]
is.numeric(p)
FALSE
既然返回false,我运行
str(p)
'data.frame': 17090 obs. of 24 variables:
$ poss_cannabis_female_over_64 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_female_under_10: num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_male_over_64 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_male_under_10 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_tot_10_12 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_tot_13_14 : num 0 1 0 0 0 0 1 0 0 0 ...
$ poss_cannabis_tot_15 : num 0 1 0 3 0 0 0 1 0 0 ...
$ poss_cannabis_tot_16 : num 1 0 3 2 1 0 2 2 2 1 ...
$ poss_cannabis_tot_17 : num 1 0 1 3 1 2 0 3 2 1 ...
$ poss_cannabis_tot_18 : num 0 0 1 2 2 1 1 1 0 0 ...
$ poss_cannabis_tot_19 : num 0 2 0 4 1 0 3 0 0 0 ...
$ poss_cannabis_tot_20 : num 0 1 0 2 0 0 2 1 1 3 ...
$ poss_cannabis_tot_21 : num 0 0 0 1 1 0 0 0 1 0 ...
$ poss_cannabis_tot_22 : num 0 2 0 1 0 0 2 0 1 0 ...
$ poss_cannabis_tot_23 : num 1 0 0 3 2 0 1 1 0 0 ...
$ poss_cannabis_tot_24 : num 1 0 0 0 1 0 0 0 0 0 ...
$ poss_cannabis_tot_25_29 : num 0 0 2 3 2 1 0 0 1 2 ...
$ poss_cannabis_tot_30_34 : num 0 0 0 1 0 1 0 1 0 0 ...
$ poss_cannabis_tot_35_39 : num 1 0 0 1 1 0 0 1 0 0 ...
$ poss_cannabis_tot_40_44 : num 0 1 0 0 0 0 0 1 0 0 ...
$ poss_cannabis_tot_45_49 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_tot_50_54 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_tot_55_59 : num 0 0 0 0 0 0 0 0 0 0 ...
$ poss_cannabis_tot_60_64 : num 0 0 0 0 1 0 0 0 0 0 ...
我也运行
sapply(p, is.numeric)
poss_cannabis_female_over_64
TRUE
poss_cannabis_female_under_10
TRUE
poss_cannabis_male_over_64
TRUE
poss_cannabis_male_under_10
TRUE
poss_cannabis_tot_10_12
TRUE
poss_cannabis_tot_13_14
TRUE
poss_cannabis_tot_15
TRUE
poss_cannabis_tot_16
TRUE
poss_cannabis_tot_17
TRUE
poss_cannabis_tot_18
TRUE
poss_cannabis_tot_19
TRUE
poss_cannabis_tot_20
TRUE
poss_cannabis_tot_21
TRUE
poss_cannabis_tot_22
TRUE
poss_cannabis_tot_23
TRUE
poss_cannabis_tot_24
TRUE
poss_cannabis_tot_25_29
TRUE
poss_cannabis_tot_30_34
TRUE
poss_cannabis_tot_35_39
TRUE
poss_cannabis_tot_40_44
TRUE
poss_cannabis_tot_45_49
TRUE
poss_cannabis_tot_50_54
TRUE
poss_cannabis_tot_55_59
TRUE
poss_cannabis_tot_60_64
TRUE
最后,我 运行 sapply(p, class)
,再次显示每个变量的数值。我再次将鼠标悬停在子集数据框中的每一列上,每一列都表示它是数字
如果 r 告诉我它不是数字,那一定是我遗漏了什么。我怀疑代码是问题所在,因为我 运行 它在一个较小的 df 上没有问题,但以防万一,这是我 运行 对特定列的行求和。
x2009 = x2009 %>%
mutate(poss_cannabis_juv_tot = select(., c(49,71:76))) %>%
rowSums(na.rm = TRUE) %>%
mutate(poss_cannabis_adult_tot = select(., c(48,70,77:91))) %>%
rowSums(na.rm = TRUE) %>%
relocate(poss_cannabis_juv_tot, .after = poss_cannabis_male_17) %>%
relocate(poss_cannabis_adult_tot, .after = poss_cannabis_male_over_64)
这是怎么回事??
问题在于从 select
创建列。相反,select across
中的列并获得 rowSums
library(dplyr)
x2009 %>%
mutate(poss_cannabis_juv_tot = rowSums(across(where(is.numeric)),
na.rm = TRUE))
或者如果它应该带有索引
x2009 %>%
mutate(poss_cannabis_juv_tot = rowSums(across(c(49,71:76)), na.rm = TRUE),
poss_cannabis_adult_tot = rowSums(across(c(48,70,77:91)), na.rm = TRUE)) %>%
relocate(poss_cannabis_juv_tot, .after = poss_cannabis_male_17) %>%
relocate(poss_cannabis_adult_tot, .after = poss_cannabis_male_over_64)
在 OP 的代码中,rowSums
部分是 selecting 所有列,因为使用 select
创建的列是 data.frame
(除了其他non-numeric 列)
> head(iris) %>%
mutate(new = select(., 2:4)) %>%
str
'data.frame': 6 obs. of 6 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1
$ new :'data.frame': 6 obs. of 3 variables:
..$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9
..$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7
..$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4
head(iris) %>%
mutate(new = select(., 2:4)) %>%
rowSums(na.rm = TRUE)
Error in rowSums(., na.rm = TRUE) : 'x' must be numeric
相反,across
head(iris) %>%
mutate(new = rowSums(across(2:4), na.rm = TRUE))
Sepal.Length Sepal.Width Petal.Length Petal.Width Species new
1 5.1 3.5 1.4 0.2 setosa 5.1
2 4.9 3.0 1.4 0.2 setosa 4.6
3 4.7 3.2 1.3 0.2 setosa 4.7
4 4.6 3.1 1.5 0.2 setosa 4.8
5 5.0 3.6 1.4 0.2 setosa 5.2
6 5.4 3.9 1.7 0.4 setosa 6.0