用组数据填充数据

Fill the data with group data

我有一个 data.table 如下:-

k <- data.table(name = rep(letters[1:4], each = 5),
            year = rep(2015:2019, times = 4),
            values = c(19, 19, 18, NA, 19, 44, NA, NA, NA, NA, 33, 32, NA, NA, NA, NA, NA, NA, NA, NA))

k

#     name year values
#  1:    a 2015     19
#  2:    a 2016     19
#  3:    a 2017     18
#  4:    a 2018     NA
#  5:    a 2019     19
#  6:    b 2015     44
#  7:    b 2016     NA
#  8:    b 2017     NA
#  9:    b 2018     NA
# 10:    b 2019     NA
# 11:    c 2015     33
# 12:    c 2016     32
# 13:    c 2017     NA
# 14:    c 2018     NA
# 15:    c 2019     NA
# 16:    d 2015     NA
# 17:    d 2016     NA
# 18:    d 2017     NA
# 19:    d 2018     NA
# 20:    d 2019     NA

每个组由 name 列定义,每个名称有 5 个 year 值。

我想用组中的第一个唯一值填充组中的每个 NA 值。因此,我的结果 data.table 将如下所示:-

#     name year values
#  1:    a 2015     19
#  2:    a 2016     19
#  3:    a 2017     18
#  4:    a 2018     19
#  5:    a 2019     19
#  6:    b 2015     44
#  7:    b 2016     44
#  8:    b 2017     44
#  9:    b 2018     44
# 10:    b 2019     44
# 11:    c 2015     33
# 12:    c 2016     32
# 13:    c 2017     33
# 14:    c 2018     33
# 15:    c 2019     33
# 16:    d 2015     NA
# 17:    d 2016     NA
# 18:    d 2017     NA
# 19:    d 2018     NA
# 20:    d 2019     NA

您可以使用 replace -

library(data.table)
k[, values := replace(values, is.na(values), first(values)), name]
k

#    name year values
# 1:    a 2015     19
# 2:    a 2016     19
# 3:    a 2017     18
# 4:    a 2018     19
# 5:    a 2019     19
# 6:    b 2015     44
# 7:    b 2016     44
# 8:    b 2017     44
# 9:    b 2018     44
#10:    b 2019     44
#11:    c 2015     33
#12:    c 2016     32
#13:    c 2017     33
#14:    c 2018     33
#15:    c 2019     33
#16:    d 2015     NA
#17:    d 2016     NA
#18:    d 2017     NA
#19:    d 2018     NA
#20:    d 2019     NA 

一个可能的解决方案,使用tidyverse

library(data.table)
library(tidyverse)

k %>% 
  group_by(name) %>% 
  mutate(values = if_else(is.na(values), first(values), values)) %>% 
  ungroup() %>% as.data.table

#>     name year values
#>  1:    a 2015     19
#>  2:    a 2016     19
#>  3:    a 2017     18
#>  4:    a 2018     19
#>  5:    a 2019     19
#>  6:    b 2015     44
#>  7:    b 2016     44
#>  8:    b 2017     44
#>  9:    b 2018     44
#> 10:    b 2019     44
#> 11:    c 2015     33
#> 12:    c 2016     32
#> 13:    c 2017     33
#> 14:    c 2018     33
#> 15:    c 2019     33
#> 16:    d 2015     NA
#> 17:    d 2016     NA
#> 18:    d 2017     NA
#> 19:    d 2018     NA
#> 20:    d 2019     NA

或仅使用 data.table:

library(data.table)

k[, values := fifelse(is.na(values), first(values), values), by = name][]

#>     name year values
#>  1:    a 2015     19
#>  2:    a 2016     19
#>  3:    a 2017     18
#>  4:    a 2018     19
#>  5:    a 2019     19
#>  6:    b 2015     44
#>  7:    b 2016     44
#>  8:    b 2017     44
#>  9:    b 2018     44
#> 10:    b 2019     44
#> 11:    c 2015     33
#> 12:    c 2016     32
#> 13:    c 2017     33
#> 14:    c 2018     33
#> 15:    c 2019     33
#> 16:    d 2015     NA
#> 17:    d 2016     NA
#> 18:    d 2017     NA
#> 19:    d 2018     NA
#> 20:    d 2019     NA

nafill:

k[,values:=nafill(values,fill=first(values,na.rm=T)),by=name][]

      name  year values
    <char> <int>  <num>
 1:      a  2015     19
 2:      a  2016     19
 3:      a  2017     18
 4:      a  2018     18
 5:      a  2019     19
 6:      b  2015     44
 7:      b  2016     44
 8:      b  2017     44
k %>% 
  group_by(name) %>% 
  mutate(values = replace_na(values, first(values))) %>% 
  ungroup



  name   year values
   <chr> <int>  <dbl>
 1 a      2015     19
 2 a      2016     19
 3 a      2017     18
 4 a      2018     19
 5 a      2019     19
 6 b      2015     44
 7 b      2016     44
 8 b      2017     44
 9 b      2018     44
10 b      2019     44
11 c      2015     33
12 c      2016     32
13 c      2017     33
14 c      2018     33
15 c      2019     33
16 d      2015     NA
17 d      2016     NA
18 d      2017     NA
19 d      2018     NA
20 d      2019     NA