努力在 r 中取消嵌套 tibble(包括输入、代码和错误)
Struggling to unnest tibble (dput, code, and error included) in r
我正在尝试取消嵌套物种计数 tibble,以便将其转换为数据框。最后四列是物种计数,目前为 'list' 形式(似乎是嵌套的?)。我希望这最后四个物种列的每一列和每一行都包含一个计数值 'int'(不包含 NULL,但在样带上未发现物种的情况下为 0)
我申请了:
'df %>% unnest(c(cols))'
并收到此错误:
Error in `fn()`:
! In row 1, can't recycle input of size 4 to size 9.
这里是 dput() 形式的数据集的一个大大缩短的版本!感谢任何能提供帮助的人!
structure(list(Year = c(2019L, 2019L, 2019L, 2019L), Location = c("Tela",
"Tela", "Tela", "Tela"), Site = c("AD", "AD", "AD", "AD"), Depth = c(10L,
10L, 10L, 10L), Transect = 1:4, ID = c("2019_Tela_AD_1_10", "2019_Tela_AD_2_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_4_10"), `Stegastes planifrons` = list(
"1", NULL, NULL, c("10", "10", "10", "10", "10", "10", "10",
"10", "10", "10")), `Anisotremus virginicus` = list(c("4",
"4", "4", "4"), "1", NULL, NULL), `Stegastes adustus` = list(
c("9", "9", "9", "9", "9", "9", "9", "9", "9"), c("10", "10",
"10", "10", "10", "10", "10", "10", "10", "10"), c("15",
"15", "15", "15", "15", "15", "15", "15", "15", "15", "15",
"15", "15", "15", "15"), c("14", "14", "14", "14", "14",
"14", "14", "14", "14", "14", "14", "14", "14", "14")), `Stegastes partitus` = list(
c("9", "9", "9", "9", "9", "9", "9", "9", "9"), "1", c("14",
"14", "14", "14", "14", "14", "14", "14", "14", "14", "14",
"14", "14", "14"), c("10", "10", "10", "10", "10", "10",
"10", "10", "10", "10"))), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
可能有更好的方法,但也许这对您有帮助:
library(purrr)
library(dplyr)
list_df <- df%>%
select(`Stegastes planifrons`, `Anisotremus virginicus`, `Stegastes adustus`,
`Stegastes partitus`) %>%
map_depth(., 2, ~ifelse(is.null(.x), 0, .x)) %>%
map_df(unlist) %>%
mutate_if(is.character, as.numeric)
df %>%
select(Year:ID) %>%
cbind(list_df)
您可以为此使用 apply
:
cols_count <- colnames(tibble)[7:10] # select relevant cols
#Overwrite relevant parts of the dataframe
tibble[cols_count] <- apply(tibble[cols_count],
c(1,2), # go into every cell
function(x) length(# get length of
unlist(x, recursive = FALSE) # a tibble cell is
# a list itself, therefore unlist first
)
)# Apply function over relevant cells
这导致:
> tibble
# A tibble: 4 x 10
Year Location Site Depth Transect ID `Stegastes planifrons` `Anisotremus virginicus` `Stegastes adustus` `Stegastes partitus`
<int> <chr> <chr> <int> <int> <chr> <int> <int> <int> <int>
1 2019 Tela AD 10 1 2019_Tela_AD_1_10 1 4 9 9
2 2019 Tela AD 10 2 2019_Tela_AD_2_10 0 1 10 1
3 2019 Tela AD 10 3 2019_Tela_AD_3_10 0 0 15 14
4 2019 Tela AD 10 4 2019_Tela_AD_4_10 10 0 14 10
我正在尝试取消嵌套物种计数 tibble,以便将其转换为数据框。最后四列是物种计数,目前为 'list' 形式(似乎是嵌套的?)。我希望这最后四个物种列的每一列和每一行都包含一个计数值 'int'(不包含 NULL,但在样带上未发现物种的情况下为 0)
我申请了:
'df %>% unnest(c(cols))'
并收到此错误:
Error in `fn()`:
! In row 1, can't recycle input of size 4 to size 9.
这里是 dput() 形式的数据集的一个大大缩短的版本!感谢任何能提供帮助的人!
structure(list(Year = c(2019L, 2019L, 2019L, 2019L), Location = c("Tela",
"Tela", "Tela", "Tela"), Site = c("AD", "AD", "AD", "AD"), Depth = c(10L,
10L, 10L, 10L), Transect = 1:4, ID = c("2019_Tela_AD_1_10", "2019_Tela_AD_2_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_4_10"), `Stegastes planifrons` = list(
"1", NULL, NULL, c("10", "10", "10", "10", "10", "10", "10",
"10", "10", "10")), `Anisotremus virginicus` = list(c("4",
"4", "4", "4"), "1", NULL, NULL), `Stegastes adustus` = list(
c("9", "9", "9", "9", "9", "9", "9", "9", "9"), c("10", "10",
"10", "10", "10", "10", "10", "10", "10", "10"), c("15",
"15", "15", "15", "15", "15", "15", "15", "15", "15", "15",
"15", "15", "15", "15"), c("14", "14", "14", "14", "14",
"14", "14", "14", "14", "14", "14", "14", "14", "14")), `Stegastes partitus` = list(
c("9", "9", "9", "9", "9", "9", "9", "9", "9"), "1", c("14",
"14", "14", "14", "14", "14", "14", "14", "14", "14", "14",
"14", "14", "14"), c("10", "10", "10", "10", "10", "10",
"10", "10", "10", "10"))), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
可能有更好的方法,但也许这对您有帮助:
library(purrr)
library(dplyr)
list_df <- df%>%
select(`Stegastes planifrons`, `Anisotremus virginicus`, `Stegastes adustus`,
`Stegastes partitus`) %>%
map_depth(., 2, ~ifelse(is.null(.x), 0, .x)) %>%
map_df(unlist) %>%
mutate_if(is.character, as.numeric)
df %>%
select(Year:ID) %>%
cbind(list_df)
您可以为此使用 apply
:
cols_count <- colnames(tibble)[7:10] # select relevant cols
#Overwrite relevant parts of the dataframe
tibble[cols_count] <- apply(tibble[cols_count],
c(1,2), # go into every cell
function(x) length(# get length of
unlist(x, recursive = FALSE) # a tibble cell is
# a list itself, therefore unlist first
)
)# Apply function over relevant cells
这导致:
> tibble
# A tibble: 4 x 10
Year Location Site Depth Transect ID `Stegastes planifrons` `Anisotremus virginicus` `Stegastes adustus` `Stegastes partitus`
<int> <chr> <chr> <int> <int> <chr> <int> <int> <int> <int>
1 2019 Tela AD 10 1 2019_Tela_AD_1_10 1 4 9 9
2 2019 Tela AD 10 2 2019_Tela_AD_2_10 0 1 10 1
3 2019 Tela AD 10 3 2019_Tela_AD_3_10 0 0 15 14
4 2019 Tela AD 10 4 2019_Tela_AD_4_10 10 0 14 10