有条件地插入新行并将值添加到 R 中的新行

insert new rows conditionally and add values to the new rows in R

我有一个数据框和一个只有两个数字的向量:201 和 200。

type <- c(222, 222, 199, 251, 106, 88, 88, 88, 88, 61, 199, 251)
latency <- c(4167, 4433, 5109, 5635, 6618, 6980, 7246, 7512, 7778, 8045, 8311, 8577)
urevent <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)

acc <- c(201, 200)

df1 = data.frame(type, latency, urevent)

我需要在 df1$type == 199 的每个实例下的列 type 中依次添加来自 acc 的值。 199 和 acc 中的数字的实例数量相等。

latency 应从上面的行中复制,并添加 50。 urevent 在每个新行中应为 0。

这是所需的输出:

----------------------------
type | latency | urevent |
----------------------------
222  | 4167    | 1       |
222  | 4433    | 2       |
199  | 5109    | 3       |
201  | 5159    | 0       |
251  | 5635    | 4       |
106  | 6618    | 5       |
88   | 6980    | 6       |
88   | 7246    | 7       |
88   | 7512    | 8       |
88   | 7778    | 9       |
61   | 8045    | 10      |
199  | 8311    | 11      |
200  | 8361    | 0       |
251  | 8577    | 12      |
---------------------------



我们可以使用来自`tibble

add_row
library(tibble)
library(dplyr)
library(tidyr)
df1 %>% 
    add_row(type = acc[1], .after = 3) %>% 
    add_row(type = acc[2], .after = 12) %>% 
    fill(latency) %>%
    mutate(latency = case_when(type %in% acc ~ latency + 50,
               TRUE ~ latency), urevent = replace_na(urevent, 0))
#   type latency urevent
#1   222    4167       1
#2   222    4433       2
#3   199    5109       3
#4   201    5159       0
#5   251    5635       4
#6   106    6618       5
#7    88    6980       6
#8    88    7246       7
#9    88    7512       8
#10   88    7778       9
#11   61    8045      10
#12  199    8311      11
#13  200    8361       0
#14  251    8577      12

另一种选择是 group_split,它根据值“199”在“type

中的出现次数创建一个分组列
library(purrr)
lst1 <- df1 %>%
            group_split(grp = cumsum(type == 199), keep = FALSE)
i1 <-  map_lgl(lst1, ~ .x$type[1] == 199)

lst1[i1] <- map2(lst1[i1], acc, ~
          .x %>%
            add_row(tibble(type = .y, urevent = 0), .after = 1) %>%
            fill(latency) %>% 
            mutate(latency = case_when(type %in% acc ~ latency + 50,
           TRUE ~ latency)))

df2 <- bind_rows(lst1)
df2
# A tibble: 14 x 3
#    type latency urevent
# * <dbl>   <dbl>   <dbl>
# 1   222    4167       1
# 2   222    4433       2
# 3   199    5109       3
# 4   201    5159       0
# 5   251    5635       4
# 6   106    6618       5
# 7    88    6980       6
# 8    88    7246       7
# 9    88    7512       8
#10    88    7778       9
#11    61    8045      10
#12   199    8311      11
#13   200    8361       0
#14   251    8577      12

或者另一种选择是使用 uncount 扩展行,然后 replace 根据重复的行号

相应地扩展列中的值
df1 %>%
    mutate(rn = row_number()) %>%
    uncount(1 + (type == 199))  %>%
    mutate(type = replace(type, duplicated(rn), acc), 
           urevent = replace(urevent, duplicated(rn), 0)) %>% 
    group_by(rn) %>%
    mutate(latency = if(n() > 1) replace(latency, 2,  first(latency) + 50) 
               else latency) %>%
    ungroup %>%
    select(-rn)
# A tibble: 14 x 3
#    type latency urevent
#   <dbl>   <dbl>   <dbl>
# 1   222    4167       1
# 2   222    4433       2
# 3   199    5109       3
# 4   201    5159       0
# 5   251    5635       4
# 6   106    6618       5
# 7    88    6980       6
# 8    88    7246       7
# 9    88    7512       8
#10    88    7778       9
#11    61    8045      10
#12   199    8311      11
#13   200    8361       0
#14   251    8577      12