R 数据 transformation/manipulation

R Data transformation/manipulation

我有以下格式的数据:

Sender  Action  Recipient   Operation
Sender1 Update  Recipient3  Operation1
Sender2 Update  Recipient4  Operation2
Sender3 Update  Recipient5  Operation3
Sender1 Update  Recipient6  Operation1
Sender2 Delete  Recipient3  Operation4
Sender3 Delete  Recipient4  Operation5
Sender1 Update  Recipient5  Operation1
Sender2 Delete  Recipient6  Operation4
Sender1 Delete  Recipient3  Operation6

我希望我的数据采用以下格式,每个操作都显示在一行中,并且列会根据与操作相关联的收件人数量动态更新

Operation   User1   Action    User2       User3      User4
Operation1  Sender1 Update  Recipient3  Recipient6  Recipient5
Operation2  Sender2 Update  Recipient4      
Operation3  Sender3 Update  Recipient5      
Operation4  Sender2 Delete  Recipient3  Recipient6  
Operation5  Sender3 Delete  Recipient4      
Operation6  Sender1 Delete  Recipient3

如何在 R 中完成此操作?

您可以使用 pivot_wider 获取更宽格式的数据。

library(dplyr)

df %>%
  rename(User1 = Sender) %>%
  group_by(Operation) %>%
  mutate(col = paste0('User', row_number() + 1)) %>%
  tidyr::pivot_wider(names_from = col, values_from = Recipient) %>%
  select(Operation, User1, everything()) -> result

result

#  Operation  User1   Action User2      User3      User4     
#  <chr>      <chr>   <chr>  <chr>      <chr>      <chr>     
#1 Operation1 Sender1 Update Recipient3 Recipient6 Recipient5
#2 Operation2 Sender2 Update Recipient4 NA         NA        
#3 Operation3 Sender3 Update Recipient5 NA         NA        
#4 Operation4 Sender2 Delete Recipient3 Recipient6 NA        
#5 Operation5 Sender3 Delete Recipient4 NA         NA        
#6 Operation6 Sender1 Delete Recipient3 NA         NA        

数据

df <- structure(list(Sender = c("Sender1", "Sender2", "Sender3", "Sender1", 
"Sender2", "Sender3", "Sender1", "Sender2", "Sender1"), Action = c("Update", 
"Update", "Update", "Update", "Delete", "Delete", "Update", "Delete", 
"Delete"), Recipient = c("Recipient3", "Recipient4", "Recipient5", 
"Recipient6", "Recipient3", "Recipient4", "Recipient5", "Recipient6", 
"Recipient3"), Operation = c("Operation1", "Operation2", "Operation3", 
"Operation1", "Operation4", "Operation5", "Operation1", "Operation4", 
"Operation6")), class = "data.frame", row.names = c(NA, -9L))

对于那些习惯并喜欢 data.table 语法的人来说, 一个(可能不是最佳的)版本可能是这样的:

library(data.table)

DT <- data.table(df)
setnames(DT,"Sender","User1")
DT <- dcast(DT[,User:=paste("User", .SD[,.I]), by=Operation],
            Operation + User1 + Action~ User, value.var="Recipient")
DT

使用 reshape.

reshape(transform(dat, User.1=Sender, User=Recipient, 
                  x=ave(!is.na(Sender), Operation, FUN=cumsum) + 1), 
        v.names="User", timevar="x", idvar="Operation", 
        drop=c("Recipient", "Sender"), 
        direction="wide")
#   Action  Operation  User.1     User.2     User.3     User.4
# 1 Update Operation1 Sender1 Recipient3 Recipient6 Recipient5
# 2 Update Operation2 Sender2 Recipient4       <NA>       <NA>
# 3 Update Operation3 Sender3 Recipient5       <NA>       <NA>
# 5 Delete Operation4 Sender2 Recipient3 Recipient6       <NA>
# 6 Delete Operation5 Sender3 Recipient4       <NA>       <NA>
# 9 Delete Operation6 Sender1 Recipient3       <NA>       <NA>

reshape2::dcast.

reshape2::dcast(transform(dat, User.1=Sender, User=Recipient, 
                          Recipient=ave(!is.na(Sender), Operation, 
                      FUN=function(x) paste0("User.", cumsum(x) + 1))),
       ... ~ Recipient, value.var="User")[-1]
#   Action  Operation  User.1     User.2     User.3     User.4
# 1 Update Operation1 Sender1 Recipient3 Recipient6 Recipient5
# 2 Update Operation2 Sender2 Recipient4       <NA>       <NA>
# 3 Update Operation3 Sender3 Recipient5       <NA>       <NA>
# 5 Delete Operation4 Sender2 Recipient3 Recipient6       <NA>
# 6 Delete Operation5 Sender3 Recipient4       <NA>       <NA>
# 9 Delete Operation6 Sender1 Recipient3       <NA>       <NA>

数据

dat <- read.table(header=TRUE, text="Sender  Action  Recipient   Operation
Sender1 Update  Recipient3  Operation1
Sender2 Update  Recipient4  Operation2
Sender3 Update  Recipient5  Operation3
Sender1 Update  Recipient6  Operation1
Sender2 Delete  Recipient3  Operation4
Sender3 Delete  Recipient4  Operation5
Sender1 Update  Recipient5  Operation1
Sender2 Delete  Recipient6  Operation4
Sender1 Delete  Recipient3  Operation6")