在 R 中填充数据框中的行

Fill up rows in a dataframe in R

我有一个数据框,其中包含客户从 5 月 (202105) 到 2021 年 10 月 (202110) 的购买信息。客户不是每个月都购买,但我想用零填充该信息。我的数据如下所示:

soex <- data.frame(client_id = c("aaa","bbb","bbb","ccc","ccc","ddd","eee","eee","eee"),
               v1 = c("xxx","xxx","xxx","yyy","yyy","xxx","yyy","xxx","yyy"),
               first_buy = c("202105","202107","202107","202106","202106","202110","202107","202107","202107"),
               sales_date = c("202105","202107","202109","202106","202110","202110","202107","202108","202109"),
               qt_prod1 = c(10,60,30,2,45,11,14,167,145),
               qt_prod2 = c(12,324,433,221,312,312,312,123,121))
  client_id  v1 first_buy sales_date qt_prod1 qt_prod2
1       aaa xxx    202105     202105       10       12
2       bbb xxx    202107     202107       60      324
3       bbb xxx    202107     202109       30      433
4       ccc yyy    202106     202106        2      221
5       ccc yyy    202106     202110       45      312
6       ddd xxx    202110     202110       11      312
7       eee yyy    202107     202107       14      312
8       eee xxx    202107     202108      167      123
9       eee yyy    202107     202109      145      121

我需要的是一个如下所示的数据框:

ideal <- data.frame(client_id = c("aaa","aaa","aaa","aaa","aaa","aaa","bbb","bbb","bbb","bbb","ccc","ccc","ccc","ccc","ccc","ddd","eee","eee","eee","eee"),
                    v1 = c("xxx","xxx","xxx","xxx","xxx","xxx","xxx","xxx","xxx","xxx","yyy","yyy","yyy","yyy","yyy","xxx","yyy","xxx","yyy","yyy"),
                    first_buy = c("202105","202105","202105","202105","202105","202105","202107","202107","202107","202107","202106","202106","202106","202106","202106","202110","202107","202107","202107","202107"),
                    sales_date = c("202105","202106","202107","202108","202109","202110","202107","202108","202109","202110","202106","202107","202108","202109","202110","202110","202107","202108","202109","202110"),
                    qt_prod1 = c(10,0,0,0,0,0,60,0,0,30,2,0,0,0,45,11,14,167,145,0),
                    qt_prod2 = c(12,0,0,0,0,0,324,0,0,433,221,0,0,0,312,312,312,123,121,0))
   client_id  v1 first_buy sales_date qt_prod1 qt_prod2
1        aaa xxx    202105     202105       10       12
2        aaa xxx    202105     202106        0        0
3        aaa xxx    202105     202107        0        0
4        aaa xxx    202105     202108        0        0
5        aaa xxx    202105     202109        0        0
6        aaa xxx    202105     202110        0        0
7        bbb xxx    202107     202107       60      324
8        bbb xxx    202107     202108        0        0
9        bbb xxx    202107     202109        0        0
10       bbb xxx    202107     202110       30      433
11       ccc yyy    202106     202106        2      221
12       ccc yyy    202106     202107        0        0
13       ccc yyy    202106     202108        0        0
14       ccc yyy    202106     202109        0        0
15       ccc yyy    202106     202110       45      312
16       ddd xxx    202110     202110       11      312
17       eee yyy    202107     202107       14      312
18       eee xxx    202107     202108      167      123
19       eee yyy    202107     202109      145      121
20       eee yyy    202107     202110        0        0

当我不得不考虑 first_buy 变量时,我的问题就来了。正如您在“bbb”客户端中看到的那样,我不希望数据从 2021 年 5 月开始……我希望它从 first_buy 月开始,直到 202110。

我的另一个问题是用sales_date的信息填充V1变量的信息。如果你看,比如“eee”客户的案例,她在202110没有购买,但是在V1她有202109的信息

谢谢,

一个tidyverse选项-

  • sales_date更改为日期对象
  • 为每个 client_id 创建一个每月的日期序列
  • fill first_buyv1 列。
library(dplyr)
library(tidyr)
library(lubridate)

soex %>%
  mutate(sales_date = ymd(paste0(sales_date, '01'))) %>%
  group_by(client_id) %>%
  complete(sales_date = seq(min(sales_date), ymd('20211001'), by = 'month'), 
           fill = list(qt_prod1 = 0, qt_prod2 = 0)) %>%
  mutate(sales_date = format(sales_date, '%Y%m')) %>%
  fill(first_buy, v1) %>%
  ungroup()

#  client_id sales_date v1    first_buy qt_prod1 qt_prod2
#   <chr>     <chr>      <chr> <chr>        <dbl>    <dbl>
# 1 aaa       202105     xxx   202105          10       12
# 2 aaa       202106     xxx   202105           0        0
# 3 aaa       202107     xxx   202105           0        0
# 4 aaa       202108     xxx   202105           0        0
# 5 aaa       202109     xxx   202105           0        0
# 6 aaa       202110     xxx   202105           0        0
# 7 bbb       202107     xxx   202107          60      324
# 8 bbb       202108     xxx   202107           0        0
# 9 bbb       202109     xxx   202107          30      433
#10 bbb       202110     xxx   202107           0        0
#11 ccc       202106     yyy   202106           2      221
#12 ccc       202107     yyy   202106           0        0
#13 ccc       202108     yyy   202106           0        0
#14 ccc       202109     yyy   202106           0        0
#15 ccc       202110     yyy   202106          45      312
#16 ddd       202110     xxx   202110          11      312
#17 eee       202107     yyy   202107          14      312
#18 eee       202108     xxx   202107         167      123
#19 eee       202109     yyy   202107         145      121
#20 eee       202110     yyy   202107           0        0