我们如何使用 pivot_longer 多列转换 R 中的数据集
How do we transform a dataset in R using pivot_longer with multiple columns
我已经查看了很多关于 SO 的帖子(包括 ),但还没有找到解决我需要做的事情的方法。
具有初始数据集:
df <- tribble(
~person, ~initial_event_date , ~type_initial, ~visit_prior, ~day_cnt_prior, ~prior_visit_type, ~visit_after, ~day_cnt_after, ~visit_after_type,
'a' , '01-01-2020', 'repair' ,'N', '', '', 'Y','15', 'follow-up',
'b' , '01-17-2020', 'routine' ,'Y', '-4', 'repair', 'N','', '',
'c' , '02-11-2020', 'consult' ,'Y', '-2', 'routine', 'Y','22', 'follow-up',
'd' , '04-01-2020', 'repair' ,'N', '', '', 'Y','12', 'correction'
)
我想输出类似于下面的数据框,因为我打算使用 timevis
包在基于时间的绘图上可视化数据。
output <- tribble(
~person, ~event_date, ~instance, ~type, ~day_cnt,
'a', '01-01-2020', 'initial' ,'repair' ,'0',
'a', '' , 'visit_after', 'follow-up' , '15',
'b', '01-17-2020', 'initial' , 'routine' ,'0',
'b', '' , 'visit_prior','repair' ,'-4',
'c', '02-11-2020', 'initial' , 'consult' ,'0',
'c', '' , 'visit_prior', 'routine' , '-2',
'c', '' , 'visit_after', 'follow-up' ,'22',
'd', '04-01-2020', 'initial' ,'repair' ,'0',
'd', '' , 'visit_after', 'correction','12'
)
我尝试了 pivot_longer
的多种变体,例如:
df %>% pivot_longer(
cols = c(type_initial,prior_visit_type, visit_after_type),
names_to = 'instance',
values_to = 'day_cnt'
)
任何建议或其他 SO 帖子可能会指出我正在寻找的解决方案?
这会让你开始
library(tidyr)
pivot_longer(df,
cols=c(initial_event_date, type_initial,prior_visit_type, visit_after_type, day_cnt_after),
names_to = c('.value', 'instance'),
names_sep = "_")
示例数据:
df<-structure(list(person = c("a", "b", "c", "d"), initial_event_date = c("01-01-2020",
"01-17-2020", "02-11-2020", "04-01-2020"), type_initial = c("repair",
"routine", "consult", "repair"), visit_prior = c("N", "Y", "Y",
"N"), day_cnt_prior = c("", "-4", "-2", ""), prior_visit_type = c("",
"repair", "routine", ""), visit_after = c("Y", "N", "Y", "Y"),
day_cnt_after = c("15", "", "22", "12"), visit_after_type = c("follow-up",
"", "follow-up", "correction")), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
输出:
person visit_prior day_cnt_prior visit_after instance initial type prior visit day
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 a N "" Y event 01-01-2020 NA NA NA NA
2 a N "" Y initial NA repair NA NA NA
3 a N "" Y visit NA NA "" NA NA
4 a N "" Y after NA NA NA "follow-up" NA
5 a N "" Y cnt NA NA NA NA "15"
6 b Y "-4" N event 01-17-2020 NA NA NA NA
7 b Y "-4" N initial NA routine NA NA NA
8 b Y "-4" N visit NA NA "repair" NA NA
9 b Y "-4" N after NA NA NA "" NA
10 b Y "-4" N cnt NA NA NA NA ""
11 c Y "-2" Y event 02-11-2020 NA NA NA NA
12 c Y "-2" Y initial NA consult NA NA NA
13 c Y "-2" Y visit NA NA "routine" NA NA
14 c Y "-2" Y after NA NA NA "follow-up" NA
15 c Y "-2" Y cnt NA NA NA NA "22"
16 d N "" Y event 04-01-2020 NA NA NA NA
17 d N "" Y initial NA repair NA NA NA
18 d N "" Y visit NA NA "" NA NA
19 d N "" Y after NA NA NA "correction" NA
20 d N "" Y cnt NA NA NA NA "12"
可能不是最优雅的解决方案,但我能够使用以下步骤解决我自己的问题:
a <- df %>%
select(person,initial_event_date, type_initial) %>%
mutate(visit_type = 'initial')
b <- df %>%
filter(visit_prior == 'Y') %>%
select(person, initial_event_date, prior_visit_type, day_cnt_prior) %>%
mutate(visit_type = 'visit_prior',
day_cnt_prior = as.integer(day_cnt_prior))
c <- df %>% filter(visit_after == 'Y') %>%
select(person, initial_event_date, visit_after_type, day_cnt_after) %>%
mutate(visit_type = 'visit_after',
day_cnt_after = as.integer(day_cnt_after))
bind_rows(a,b,c) %>%
arrange(person) %>%
mutate(visit_reason = dplyr::coalesce(type_initial, prior_visit_type, visit_after_type),
visit_type = dplyr::coalesce(visit_type),
day_cnt = dplyr::coalesce(day_cnt_after, day_cnt_prior)) %>%
select(person, initial_event_date,visit_type, visit_reason, day_cnt) %>%
replace_na(list(day_cnt = 0))
我已经查看了很多关于 SO 的帖子(包括
具有初始数据集:
df <- tribble(
~person, ~initial_event_date , ~type_initial, ~visit_prior, ~day_cnt_prior, ~prior_visit_type, ~visit_after, ~day_cnt_after, ~visit_after_type,
'a' , '01-01-2020', 'repair' ,'N', '', '', 'Y','15', 'follow-up',
'b' , '01-17-2020', 'routine' ,'Y', '-4', 'repair', 'N','', '',
'c' , '02-11-2020', 'consult' ,'Y', '-2', 'routine', 'Y','22', 'follow-up',
'd' , '04-01-2020', 'repair' ,'N', '', '', 'Y','12', 'correction'
)
我想输出类似于下面的数据框,因为我打算使用 timevis
包在基于时间的绘图上可视化数据。
output <- tribble(
~person, ~event_date, ~instance, ~type, ~day_cnt,
'a', '01-01-2020', 'initial' ,'repair' ,'0',
'a', '' , 'visit_after', 'follow-up' , '15',
'b', '01-17-2020', 'initial' , 'routine' ,'0',
'b', '' , 'visit_prior','repair' ,'-4',
'c', '02-11-2020', 'initial' , 'consult' ,'0',
'c', '' , 'visit_prior', 'routine' , '-2',
'c', '' , 'visit_after', 'follow-up' ,'22',
'd', '04-01-2020', 'initial' ,'repair' ,'0',
'd', '' , 'visit_after', 'correction','12'
)
我尝试了 pivot_longer
的多种变体,例如:
df %>% pivot_longer(
cols = c(type_initial,prior_visit_type, visit_after_type),
names_to = 'instance',
values_to = 'day_cnt'
)
任何建议或其他 SO 帖子可能会指出我正在寻找的解决方案?
这会让你开始
library(tidyr)
pivot_longer(df,
cols=c(initial_event_date, type_initial,prior_visit_type, visit_after_type, day_cnt_after),
names_to = c('.value', 'instance'),
names_sep = "_")
示例数据:
df<-structure(list(person = c("a", "b", "c", "d"), initial_event_date = c("01-01-2020",
"01-17-2020", "02-11-2020", "04-01-2020"), type_initial = c("repair",
"routine", "consult", "repair"), visit_prior = c("N", "Y", "Y",
"N"), day_cnt_prior = c("", "-4", "-2", ""), prior_visit_type = c("",
"repair", "routine", ""), visit_after = c("Y", "N", "Y", "Y"),
day_cnt_after = c("15", "", "22", "12"), visit_after_type = c("follow-up",
"", "follow-up", "correction")), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
输出:
person visit_prior day_cnt_prior visit_after instance initial type prior visit day
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 a N "" Y event 01-01-2020 NA NA NA NA
2 a N "" Y initial NA repair NA NA NA
3 a N "" Y visit NA NA "" NA NA
4 a N "" Y after NA NA NA "follow-up" NA
5 a N "" Y cnt NA NA NA NA "15"
6 b Y "-4" N event 01-17-2020 NA NA NA NA
7 b Y "-4" N initial NA routine NA NA NA
8 b Y "-4" N visit NA NA "repair" NA NA
9 b Y "-4" N after NA NA NA "" NA
10 b Y "-4" N cnt NA NA NA NA ""
11 c Y "-2" Y event 02-11-2020 NA NA NA NA
12 c Y "-2" Y initial NA consult NA NA NA
13 c Y "-2" Y visit NA NA "routine" NA NA
14 c Y "-2" Y after NA NA NA "follow-up" NA
15 c Y "-2" Y cnt NA NA NA NA "22"
16 d N "" Y event 04-01-2020 NA NA NA NA
17 d N "" Y initial NA repair NA NA NA
18 d N "" Y visit NA NA "" NA NA
19 d N "" Y after NA NA NA "correction" NA
20 d N "" Y cnt NA NA NA NA "12"
可能不是最优雅的解决方案,但我能够使用以下步骤解决我自己的问题:
a <- df %>%
select(person,initial_event_date, type_initial) %>%
mutate(visit_type = 'initial')
b <- df %>%
filter(visit_prior == 'Y') %>%
select(person, initial_event_date, prior_visit_type, day_cnt_prior) %>%
mutate(visit_type = 'visit_prior',
day_cnt_prior = as.integer(day_cnt_prior))
c <- df %>% filter(visit_after == 'Y') %>%
select(person, initial_event_date, visit_after_type, day_cnt_after) %>%
mutate(visit_type = 'visit_after',
day_cnt_after = as.integer(day_cnt_after))
bind_rows(a,b,c) %>%
arrange(person) %>%
mutate(visit_reason = dplyr::coalesce(type_initial, prior_visit_type, visit_after_type),
visit_type = dplyr::coalesce(visit_type),
day_cnt = dplyr::coalesce(day_cnt_after, day_cnt_prior)) %>%
select(person, initial_event_date,visit_type, visit_reason, day_cnt) %>%
replace_na(list(day_cnt = 0))