R:如果有连续的列相等,如何只保留一个并分配一个新的列名
R: If there are consecutive columns that are equal, how to keep just one and assign a new column name
我有一个客户数据集,他们在哪些商店购物,他们在每家商店购买了什么,以及购买日期。
Shop_list <- data.frame (Names = c('Adam','Eve','Lucy','Ricky','Gomez','Morticia','Adam','Eve','Lucy','Ricky',
'Adam','Eve','Ricky','Gomez','Adam','Eve','Lucy','Adam','Eve','Lucy','Adam','Eve','Lucy'),
Day = c(1,1,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,5,5,5,6,6,6),
Store= c('None','None','None','None','None','None','Lowes',
'Home Depot','Lowes','Home Depot','Lowes',
'Home Depot','Home Depot','Lowes','None',
'Home Depot','None','None','None','Home Depot',
'Home Depot','None','None'),
Item= c('None','None','None','None','None','None','Wood','Soil','Nails','Pots','Nails',
'Pots','Soil','Wood','None','Seeds','None','None','None','Seeds','Seeds','None','None'),
stringsAsFactors=FALSE
)
我写了一个总结这些数据的函数,
library(dplyr)
library(flextable)
Shop_fcn <- function(data){
data %>%
group_by(Day) %>%
mutate(N_nam = n_distinct(Names)) %>%
group_by(Names, Day, N_nam, Store, Item) %>%
summarize(n_item = n()) %>%
group_by(Day, N_nam, Store, Item) %>%
summarize(n_nam = n(),
n_item = sum(n_item))%>%
mutate(pct = round(n_nam/N_nam*100,digits = 1),
txt = paste0( n_nam, " (", pct, "%)"),
Day_n = (paste0("Day ", Day," (N=", N_nam, ")")))%>%
ungroup %>% select(Day_n , Store, Item, txt) %>%
pivot_wider(values_from = txt, names_from = Day_n) %>%
mutate_at(vars(starts_with(c("Day"))), ~if_else(is.na(.), "", .)) %>%
arrange(Store, Item) %>%
group_by(store2 = Store) %>%
mutate(Store = if_else(row_number() != 1, "", Store))%>%
ungroup() %>% select(-store2)
}
Shop_day <- Shop_list %>%
bind_rows(Shop_list) %>%
Shop_fcn ()
flextable(Shop_day)
我得到以下输出。
第 2 天和第 3 天的列是相等的,第 4 天、第 5 天的列也是如此,6.I 我正在努力使具有相同信息的列的列标题读作第 2 - 3 天 (N=4) 和第 4 - 6 天 (N=3)。
到目前为止,我已尝试删除重复的列
Shop_nodup <- Shop_day[!duplicated(as.list(Shop_day))]
flextable(Shop_nodup)
这给了我
重复的列消失了,但我想不出一种方法来在列标题中指定该列涵盖的天数范围(第 2 天 - 3 (N=4) 和第 4 天 - 6 (N=3) )
如果我们需要更改标题,请进行如下更改
library(stringr)
Shop_fcn <- function(data){
data %>%
group_by(Day) %>%
mutate(N_nam = n_distinct(Names)) %>%
group_by(Names, Day, N_nam, Store, Item) %>%
summarize(n_item = n()) %>%
group_by(Day, N_nam, Store, Item) %>%
summarize(n_nam = n(),
n_item = sum(n_item))%>%
mutate(pct = round(n_nam/N_nam*100,digits = 1),
txt = paste0( n_nam, " (", pct, "%)"),
Day_n = (paste0("Day ", Day," (N=", N_nam, ")")))%>%
ungroup %>%
select(Day_n , Store, Item, txt) %>%
group_by(Store, Item, txt) %>%
summarise(Day_n = if(n() > 1)
sprintf('Day %s %s', paste(range(readr::parse_number(unique(Day_n))),
collapse=' - '),
str_remove(first(Day_n), '^[^(]+')) else Day_n) %>%
pivot_wider(values_from = txt, names_from = Day_n) %>%
mutate_at(vars(starts_with(c("Day"))), ~if_else(is.na(.), "", .)) %>%
arrange(Store, Item) %>%
group_by(store2 = Store) %>%
mutate(Store = if_else(row_number() != 1, "", Store))%>%
ungroup() %>%
select(Store, Item, str_sort(names(.)[-(1:2)], numeric = TRUE), -store2)
}
-测试
Shop_day <- Shop_list %>%
bind_rows(Shop_list) %>%
Shop_fcn ()
flextable(Shop_day)
-输出
我有一个客户数据集,他们在哪些商店购物,他们在每家商店购买了什么,以及购买日期。
Shop_list <- data.frame (Names = c('Adam','Eve','Lucy','Ricky','Gomez','Morticia','Adam','Eve','Lucy','Ricky',
'Adam','Eve','Ricky','Gomez','Adam','Eve','Lucy','Adam','Eve','Lucy','Adam','Eve','Lucy'),
Day = c(1,1,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,5,5,5,6,6,6),
Store= c('None','None','None','None','None','None','Lowes',
'Home Depot','Lowes','Home Depot','Lowes',
'Home Depot','Home Depot','Lowes','None',
'Home Depot','None','None','None','Home Depot',
'Home Depot','None','None'),
Item= c('None','None','None','None','None','None','Wood','Soil','Nails','Pots','Nails',
'Pots','Soil','Wood','None','Seeds','None','None','None','Seeds','Seeds','None','None'),
stringsAsFactors=FALSE
)
我写了一个总结这些数据的函数,
library(dplyr)
library(flextable)
Shop_fcn <- function(data){
data %>%
group_by(Day) %>%
mutate(N_nam = n_distinct(Names)) %>%
group_by(Names, Day, N_nam, Store, Item) %>%
summarize(n_item = n()) %>%
group_by(Day, N_nam, Store, Item) %>%
summarize(n_nam = n(),
n_item = sum(n_item))%>%
mutate(pct = round(n_nam/N_nam*100,digits = 1),
txt = paste0( n_nam, " (", pct, "%)"),
Day_n = (paste0("Day ", Day," (N=", N_nam, ")")))%>%
ungroup %>% select(Day_n , Store, Item, txt) %>%
pivot_wider(values_from = txt, names_from = Day_n) %>%
mutate_at(vars(starts_with(c("Day"))), ~if_else(is.na(.), "", .)) %>%
arrange(Store, Item) %>%
group_by(store2 = Store) %>%
mutate(Store = if_else(row_number() != 1, "", Store))%>%
ungroup() %>% select(-store2)
}
Shop_day <- Shop_list %>%
bind_rows(Shop_list) %>%
Shop_fcn ()
flextable(Shop_day)
我得到以下输出。
第 2 天和第 3 天的列是相等的,第 4 天、第 5 天的列也是如此,6.I 我正在努力使具有相同信息的列的列标题读作第 2 - 3 天 (N=4) 和第 4 - 6 天 (N=3)。
到目前为止,我已尝试删除重复的列
Shop_nodup <- Shop_day[!duplicated(as.list(Shop_day))]
flextable(Shop_nodup)
这给了我
重复的列消失了,但我想不出一种方法来在列标题中指定该列涵盖的天数范围(第 2 天 - 3 (N=4) 和第 4 天 - 6 (N=3) )
如果我们需要更改标题,请进行如下更改
library(stringr)
Shop_fcn <- function(data){
data %>%
group_by(Day) %>%
mutate(N_nam = n_distinct(Names)) %>%
group_by(Names, Day, N_nam, Store, Item) %>%
summarize(n_item = n()) %>%
group_by(Day, N_nam, Store, Item) %>%
summarize(n_nam = n(),
n_item = sum(n_item))%>%
mutate(pct = round(n_nam/N_nam*100,digits = 1),
txt = paste0( n_nam, " (", pct, "%)"),
Day_n = (paste0("Day ", Day," (N=", N_nam, ")")))%>%
ungroup %>%
select(Day_n , Store, Item, txt) %>%
group_by(Store, Item, txt) %>%
summarise(Day_n = if(n() > 1)
sprintf('Day %s %s', paste(range(readr::parse_number(unique(Day_n))),
collapse=' - '),
str_remove(first(Day_n), '^[^(]+')) else Day_n) %>%
pivot_wider(values_from = txt, names_from = Day_n) %>%
mutate_at(vars(starts_with(c("Day"))), ~if_else(is.na(.), "", .)) %>%
arrange(Store, Item) %>%
group_by(store2 = Store) %>%
mutate(Store = if_else(row_number() != 1, "", Store))%>%
ungroup() %>%
select(Store, Item, str_sort(names(.)[-(1:2)], numeric = TRUE), -store2)
}
-测试
Shop_day <- Shop_list %>%
bind_rows(Shop_list) %>%
Shop_fcn ()
flextable(Shop_day)
-输出