有条件地插入新行并将值添加到 R 中的新行
insert new rows conditionally and add values to the new rows in R
我有一个数据框和一个只有两个数字的向量:201 和 200。
type <- c(222, 222, 199, 251, 106, 88, 88, 88, 88, 61, 199, 251)
latency <- c(4167, 4433, 5109, 5635, 6618, 6980, 7246, 7512, 7778, 8045, 8311, 8577)
urevent <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
acc <- c(201, 200)
df1 = data.frame(type, latency, urevent)
我需要在 df1$type == 199
的每个实例下的列 type
中依次添加来自 acc
的值。 199 和 acc
中的数字的实例数量相等。
latency
应从上面的行中复制,并添加 50。
urevent
在每个新行中应为 0。
这是所需的输出:
----------------------------
type | latency | urevent |
----------------------------
222 | 4167 | 1 |
222 | 4433 | 2 |
199 | 5109 | 3 |
201 | 5159 | 0 |
251 | 5635 | 4 |
106 | 6618 | 5 |
88 | 6980 | 6 |
88 | 7246 | 7 |
88 | 7512 | 8 |
88 | 7778 | 9 |
61 | 8045 | 10 |
199 | 8311 | 11 |
200 | 8361 | 0 |
251 | 8577 | 12 |
---------------------------
我们可以使用来自`tibble
的add_row
library(tibble)
library(dplyr)
library(tidyr)
df1 %>%
add_row(type = acc[1], .after = 3) %>%
add_row(type = acc[2], .after = 12) %>%
fill(latency) %>%
mutate(latency = case_when(type %in% acc ~ latency + 50,
TRUE ~ latency), urevent = replace_na(urevent, 0))
# type latency urevent
#1 222 4167 1
#2 222 4433 2
#3 199 5109 3
#4 201 5159 0
#5 251 5635 4
#6 106 6618 5
#7 88 6980 6
#8 88 7246 7
#9 88 7512 8
#10 88 7778 9
#11 61 8045 10
#12 199 8311 11
#13 200 8361 0
#14 251 8577 12
另一种选择是 group_split
,它根据值“199”在“type
中的出现次数创建一个分组列
library(purrr)
lst1 <- df1 %>%
group_split(grp = cumsum(type == 199), keep = FALSE)
i1 <- map_lgl(lst1, ~ .x$type[1] == 199)
lst1[i1] <- map2(lst1[i1], acc, ~
.x %>%
add_row(tibble(type = .y, urevent = 0), .after = 1) %>%
fill(latency) %>%
mutate(latency = case_when(type %in% acc ~ latency + 50,
TRUE ~ latency)))
df2 <- bind_rows(lst1)
df2
# A tibble: 14 x 3
# type latency urevent
# * <dbl> <dbl> <dbl>
# 1 222 4167 1
# 2 222 4433 2
# 3 199 5109 3
# 4 201 5159 0
# 5 251 5635 4
# 6 106 6618 5
# 7 88 6980 6
# 8 88 7246 7
# 9 88 7512 8
#10 88 7778 9
#11 61 8045 10
#12 199 8311 11
#13 200 8361 0
#14 251 8577 12
或者另一种选择是使用 uncount
扩展行,然后 replace
根据重复的行号
相应地扩展列中的值
df1 %>%
mutate(rn = row_number()) %>%
uncount(1 + (type == 199)) %>%
mutate(type = replace(type, duplicated(rn), acc),
urevent = replace(urevent, duplicated(rn), 0)) %>%
group_by(rn) %>%
mutate(latency = if(n() > 1) replace(latency, 2, first(latency) + 50)
else latency) %>%
ungroup %>%
select(-rn)
# A tibble: 14 x 3
# type latency urevent
# <dbl> <dbl> <dbl>
# 1 222 4167 1
# 2 222 4433 2
# 3 199 5109 3
# 4 201 5159 0
# 5 251 5635 4
# 6 106 6618 5
# 7 88 6980 6
# 8 88 7246 7
# 9 88 7512 8
#10 88 7778 9
#11 61 8045 10
#12 199 8311 11
#13 200 8361 0
#14 251 8577 12
我有一个数据框和一个只有两个数字的向量:201 和 200。
type <- c(222, 222, 199, 251, 106, 88, 88, 88, 88, 61, 199, 251)
latency <- c(4167, 4433, 5109, 5635, 6618, 6980, 7246, 7512, 7778, 8045, 8311, 8577)
urevent <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
acc <- c(201, 200)
df1 = data.frame(type, latency, urevent)
我需要在 df1$type == 199
的每个实例下的列 type
中依次添加来自 acc
的值。 199 和 acc
中的数字的实例数量相等。
latency
应从上面的行中复制,并添加 50。
urevent
在每个新行中应为 0。
这是所需的输出:
----------------------------
type | latency | urevent |
----------------------------
222 | 4167 | 1 |
222 | 4433 | 2 |
199 | 5109 | 3 |
201 | 5159 | 0 |
251 | 5635 | 4 |
106 | 6618 | 5 |
88 | 6980 | 6 |
88 | 7246 | 7 |
88 | 7512 | 8 |
88 | 7778 | 9 |
61 | 8045 | 10 |
199 | 8311 | 11 |
200 | 8361 | 0 |
251 | 8577 | 12 |
---------------------------
我们可以使用来自`tibble
的add_row
library(tibble)
library(dplyr)
library(tidyr)
df1 %>%
add_row(type = acc[1], .after = 3) %>%
add_row(type = acc[2], .after = 12) %>%
fill(latency) %>%
mutate(latency = case_when(type %in% acc ~ latency + 50,
TRUE ~ latency), urevent = replace_na(urevent, 0))
# type latency urevent
#1 222 4167 1
#2 222 4433 2
#3 199 5109 3
#4 201 5159 0
#5 251 5635 4
#6 106 6618 5
#7 88 6980 6
#8 88 7246 7
#9 88 7512 8
#10 88 7778 9
#11 61 8045 10
#12 199 8311 11
#13 200 8361 0
#14 251 8577 12
另一种选择是 group_split
,它根据值“199”在“type
library(purrr)
lst1 <- df1 %>%
group_split(grp = cumsum(type == 199), keep = FALSE)
i1 <- map_lgl(lst1, ~ .x$type[1] == 199)
lst1[i1] <- map2(lst1[i1], acc, ~
.x %>%
add_row(tibble(type = .y, urevent = 0), .after = 1) %>%
fill(latency) %>%
mutate(latency = case_when(type %in% acc ~ latency + 50,
TRUE ~ latency)))
df2 <- bind_rows(lst1)
df2
# A tibble: 14 x 3
# type latency urevent
# * <dbl> <dbl> <dbl>
# 1 222 4167 1
# 2 222 4433 2
# 3 199 5109 3
# 4 201 5159 0
# 5 251 5635 4
# 6 106 6618 5
# 7 88 6980 6
# 8 88 7246 7
# 9 88 7512 8
#10 88 7778 9
#11 61 8045 10
#12 199 8311 11
#13 200 8361 0
#14 251 8577 12
或者另一种选择是使用 uncount
扩展行,然后 replace
根据重复的行号
df1 %>%
mutate(rn = row_number()) %>%
uncount(1 + (type == 199)) %>%
mutate(type = replace(type, duplicated(rn), acc),
urevent = replace(urevent, duplicated(rn), 0)) %>%
group_by(rn) %>%
mutate(latency = if(n() > 1) replace(latency, 2, first(latency) + 50)
else latency) %>%
ungroup %>%
select(-rn)
# A tibble: 14 x 3
# type latency urevent
# <dbl> <dbl> <dbl>
# 1 222 4167 1
# 2 222 4433 2
# 3 199 5109 3
# 4 201 5159 0
# 5 251 5635 4
# 6 106 6618 5
# 7 88 6980 6
# 8 88 7246 7
# 9 88 7512 8
#10 88 7778 9
#11 61 8045 10
#12 199 8311 11
#13 200 8361 0
#14 251 8577 12