按字符和缺失条件计算每行的列数
count the number of columns for each row by condition on character and missing
我想根据字符和缺失条件计算每行的列数。
例如,我有这个数据集,test
。
我想创建 num
列,计算缺失值或空值的列数 'not'。
a<-c("aa","bb","cc","dd","",NA)
b<-c("",NA,"aa","","","dd")
c<-c("aa","",NA,NA,"cc","dd")
d<-c("aa","bb","",NA,"cc","dd")
test<-data.frame(cbind(a,b,c,d))
a b c d
1 aa aa aa
2 bb <NA> bb
3 cc aa <NA>
4 dd <NA> <NA>
5 cc cc
6 <NA> dd dd dd
我想计算包含 NA
和空值
的列数
a b c d num
1 aa aa aa 3
2 bb <NA> bb 2
3 cc aa <NA> 2
4 dd <NA> <NA> 1
5 cc cc 2
6 <NA> dd dd dd 3
我在其他帖子中尝试了一些方法,比如rowSums
Count number of columns by a condition (>) for each row
> test$num<-rowSums(test!=c("",NA),na.rm=T)
> test
a b c d num
1 aa aa aa 3
2 bb <NA> bb 0
3 cc aa <NA> 2
4 dd <NA> <NA> 0
5 cc cc 2
6 <NA> dd dd dd 0
但是returns打错了,我也找不到原因。
你能告诉我如何解决这个问题吗?
您可以使用 rowSums
计算每行中 NA
或空值的数量,然后从数据框中的列数中减去它。
test$num <- ncol(test) - rowSums(is.na(test) | test == "")
test
# a b c d num
#1 aa aa aa 3
#2 bb <NA> bb 2
#3 cc aa <NA> 2
#4 dd <NA> <NA> 1
#5 cc cc 2
#6 <NA> dd dd dd 3
另一个使用 rowSums
的想法是用 NA 替换空,即
rowSums(!is.na(replace(test, test == '', NA)))
#[1] 3 2 2 1 2 3
您可以使用 nchar
+ rowSums
test$num <- rowSums(nchar(as.matrix(test))>1,na.rm = TRUE)
或%in%
+ rowSums
test$num <- rowSums(`dim<-`(!as.matrix(test) %in% c("",NA),dim(test)))
这样
> test
a b c d num
1 aa aa aa 3
2 bb <NA> bb 2
3 cc aa <NA> 2
4 dd <NA> <NA> 1
5 cc cc 2
6 <NA> dd dd dd 3
tidyverse 中的这种方法怎么样,它还告诉您有多少列包含 NA 或空字符串?
a<-c("aa","bb","cc","dd","",NA)
b<-c("",NA,"aa","","","dd")
c<-c("aa","",NA,NA,"cc","dd")
d<-c("aa","bb","",NA,"cc","dd")
test<-data.frame(cbind(a,b,c,d))
library(magrittr) #import the pipe operator
num_cols <- test %>%
tibble::rowid_to_column("row_id") %>% #1st add a rowid column
dplyr::group_by(row_id) %>% #split the data into single row groups (i.e.
#row vectors)
tidyr::nest() %>% #turn it into a list column called data
dplyr::mutate(num_NAs = purrr::map_dbl(data, #loop over the data column of row
#vectors using map_dbl
~sum(is.na(.))), #count the number of NAs
num_empty = purrr::map_dbl(data,
#count the empty strings
~sum(. == "", na.rm = T)),
num_values = purrr::map_dbl(data,
#count columns without NAs or
#missing values (what you asked for)
~length(.)-sum(num_NAs, num_empty))
) %>%
dplyr::ungroup() %>% #remove the grouping structure
dplyr::select(num_NAs, num_empty, num_values) #extract only the variables you need
test_v2 <- cbind(test, num_cols)
test_v2
a b c d num_NAs num_empty num_values
1 aa aa aa 0 1 3
2 bb <NA> bb 1 1 2
3 cc aa <NA> 1 1 2
4 dd <NA> <NA> 2 1 1
5 cc cc 0 2 2
6 <NA> dd dd dd 1 0 3
我想根据字符和缺失条件计算每行的列数。
例如,我有这个数据集,test
。
我想创建 num
列,计算缺失值或空值的列数 'not'。
a<-c("aa","bb","cc","dd","",NA)
b<-c("",NA,"aa","","","dd")
c<-c("aa","",NA,NA,"cc","dd")
d<-c("aa","bb","",NA,"cc","dd")
test<-data.frame(cbind(a,b,c,d))
a b c d
1 aa aa aa
2 bb <NA> bb
3 cc aa <NA>
4 dd <NA> <NA>
5 cc cc
6 <NA> dd dd dd
我想计算包含 NA
和空值
a b c d num
1 aa aa aa 3
2 bb <NA> bb 2
3 cc aa <NA> 2
4 dd <NA> <NA> 1
5 cc cc 2
6 <NA> dd dd dd 3
我在其他帖子中尝试了一些方法,比如rowSums
Count number of columns by a condition (>) for each row
> test$num<-rowSums(test!=c("",NA),na.rm=T)
> test
a b c d num
1 aa aa aa 3
2 bb <NA> bb 0
3 cc aa <NA> 2
4 dd <NA> <NA> 0
5 cc cc 2
6 <NA> dd dd dd 0
但是returns打错了,我也找不到原因。
你能告诉我如何解决这个问题吗?
您可以使用 rowSums
计算每行中 NA
或空值的数量,然后从数据框中的列数中减去它。
test$num <- ncol(test) - rowSums(is.na(test) | test == "")
test
# a b c d num
#1 aa aa aa 3
#2 bb <NA> bb 2
#3 cc aa <NA> 2
#4 dd <NA> <NA> 1
#5 cc cc 2
#6 <NA> dd dd dd 3
另一个使用 rowSums
的想法是用 NA 替换空,即
rowSums(!is.na(replace(test, test == '', NA)))
#[1] 3 2 2 1 2 3
您可以使用 nchar
+ rowSums
test$num <- rowSums(nchar(as.matrix(test))>1,na.rm = TRUE)
或%in%
+ rowSums
test$num <- rowSums(`dim<-`(!as.matrix(test) %in% c("",NA),dim(test)))
这样
> test
a b c d num
1 aa aa aa 3
2 bb <NA> bb 2
3 cc aa <NA> 2
4 dd <NA> <NA> 1
5 cc cc 2
6 <NA> dd dd dd 3
tidyverse 中的这种方法怎么样,它还告诉您有多少列包含 NA 或空字符串?
a<-c("aa","bb","cc","dd","",NA)
b<-c("",NA,"aa","","","dd")
c<-c("aa","",NA,NA,"cc","dd")
d<-c("aa","bb","",NA,"cc","dd")
test<-data.frame(cbind(a,b,c,d))
library(magrittr) #import the pipe operator
num_cols <- test %>%
tibble::rowid_to_column("row_id") %>% #1st add a rowid column
dplyr::group_by(row_id) %>% #split the data into single row groups (i.e.
#row vectors)
tidyr::nest() %>% #turn it into a list column called data
dplyr::mutate(num_NAs = purrr::map_dbl(data, #loop over the data column of row
#vectors using map_dbl
~sum(is.na(.))), #count the number of NAs
num_empty = purrr::map_dbl(data,
#count the empty strings
~sum(. == "", na.rm = T)),
num_values = purrr::map_dbl(data,
#count columns without NAs or
#missing values (what you asked for)
~length(.)-sum(num_NAs, num_empty))
) %>%
dplyr::ungroup() %>% #remove the grouping structure
dplyr::select(num_NAs, num_empty, num_values) #extract only the variables you need
test_v2 <- cbind(test, num_cols)
test_v2
a b c d num_NAs num_empty num_values
1 aa aa aa 0 1 3
2 bb <NA> bb 1 1 2
3 cc aa <NA> 1 1 2
4 dd <NA> <NA> 2 1 1
5 cc cc 0 2 2
6 <NA> dd dd dd 1 0 3