R 数据 transformation/manipulation
R Data transformation/manipulation
我有以下格式的数据:
Sender Action Recipient Operation
Sender1 Update Recipient3 Operation1
Sender2 Update Recipient4 Operation2
Sender3 Update Recipient5 Operation3
Sender1 Update Recipient6 Operation1
Sender2 Delete Recipient3 Operation4
Sender3 Delete Recipient4 Operation5
Sender1 Update Recipient5 Operation1
Sender2 Delete Recipient6 Operation4
Sender1 Delete Recipient3 Operation6
我希望我的数据采用以下格式,每个操作都显示在一行中,并且列会根据与操作相关联的收件人数量动态更新
Operation User1 Action User2 User3 User4
Operation1 Sender1 Update Recipient3 Recipient6 Recipient5
Operation2 Sender2 Update Recipient4
Operation3 Sender3 Update Recipient5
Operation4 Sender2 Delete Recipient3 Recipient6
Operation5 Sender3 Delete Recipient4
Operation6 Sender1 Delete Recipient3
如何在 R 中完成此操作?
您可以使用 pivot_wider
获取更宽格式的数据。
library(dplyr)
df %>%
rename(User1 = Sender) %>%
group_by(Operation) %>%
mutate(col = paste0('User', row_number() + 1)) %>%
tidyr::pivot_wider(names_from = col, values_from = Recipient) %>%
select(Operation, User1, everything()) -> result
result
# Operation User1 Action User2 User3 User4
# <chr> <chr> <chr> <chr> <chr> <chr>
#1 Operation1 Sender1 Update Recipient3 Recipient6 Recipient5
#2 Operation2 Sender2 Update Recipient4 NA NA
#3 Operation3 Sender3 Update Recipient5 NA NA
#4 Operation4 Sender2 Delete Recipient3 Recipient6 NA
#5 Operation5 Sender3 Delete Recipient4 NA NA
#6 Operation6 Sender1 Delete Recipient3 NA NA
数据
df <- structure(list(Sender = c("Sender1", "Sender2", "Sender3", "Sender1",
"Sender2", "Sender3", "Sender1", "Sender2", "Sender1"), Action = c("Update",
"Update", "Update", "Update", "Delete", "Delete", "Update", "Delete",
"Delete"), Recipient = c("Recipient3", "Recipient4", "Recipient5",
"Recipient6", "Recipient3", "Recipient4", "Recipient5", "Recipient6",
"Recipient3"), Operation = c("Operation1", "Operation2", "Operation3",
"Operation1", "Operation4", "Operation5", "Operation1", "Operation4",
"Operation6")), class = "data.frame", row.names = c(NA, -9L))
对于那些习惯并喜欢 data.table 语法的人来说,
一个(可能不是最佳的)版本可能是这样的:
library(data.table)
DT <- data.table(df)
setnames(DT,"Sender","User1")
DT <- dcast(DT[,User:=paste("User", .SD[,.I]), by=Operation],
Operation + User1 + Action~ User, value.var="Recipient")
DT
使用 reshape
.
reshape(transform(dat, User.1=Sender, User=Recipient,
x=ave(!is.na(Sender), Operation, FUN=cumsum) + 1),
v.names="User", timevar="x", idvar="Operation",
drop=c("Recipient", "Sender"),
direction="wide")
# Action Operation User.1 User.2 User.3 User.4
# 1 Update Operation1 Sender1 Recipient3 Recipient6 Recipient5
# 2 Update Operation2 Sender2 Recipient4 <NA> <NA>
# 3 Update Operation3 Sender3 Recipient5 <NA> <NA>
# 5 Delete Operation4 Sender2 Recipient3 Recipient6 <NA>
# 6 Delete Operation5 Sender3 Recipient4 <NA> <NA>
# 9 Delete Operation6 Sender1 Recipient3 <NA> <NA>
或reshape2::dcast
.
reshape2::dcast(transform(dat, User.1=Sender, User=Recipient,
Recipient=ave(!is.na(Sender), Operation,
FUN=function(x) paste0("User.", cumsum(x) + 1))),
... ~ Recipient, value.var="User")[-1]
# Action Operation User.1 User.2 User.3 User.4
# 1 Update Operation1 Sender1 Recipient3 Recipient6 Recipient5
# 2 Update Operation2 Sender2 Recipient4 <NA> <NA>
# 3 Update Operation3 Sender3 Recipient5 <NA> <NA>
# 5 Delete Operation4 Sender2 Recipient3 Recipient6 <NA>
# 6 Delete Operation5 Sender3 Recipient4 <NA> <NA>
# 9 Delete Operation6 Sender1 Recipient3 <NA> <NA>
数据
dat <- read.table(header=TRUE, text="Sender Action Recipient Operation
Sender1 Update Recipient3 Operation1
Sender2 Update Recipient4 Operation2
Sender3 Update Recipient5 Operation3
Sender1 Update Recipient6 Operation1
Sender2 Delete Recipient3 Operation4
Sender3 Delete Recipient4 Operation5
Sender1 Update Recipient5 Operation1
Sender2 Delete Recipient6 Operation4
Sender1 Delete Recipient3 Operation6")
我有以下格式的数据:
Sender Action Recipient Operation
Sender1 Update Recipient3 Operation1
Sender2 Update Recipient4 Operation2
Sender3 Update Recipient5 Operation3
Sender1 Update Recipient6 Operation1
Sender2 Delete Recipient3 Operation4
Sender3 Delete Recipient4 Operation5
Sender1 Update Recipient5 Operation1
Sender2 Delete Recipient6 Operation4
Sender1 Delete Recipient3 Operation6
我希望我的数据采用以下格式,每个操作都显示在一行中,并且列会根据与操作相关联的收件人数量动态更新
Operation User1 Action User2 User3 User4
Operation1 Sender1 Update Recipient3 Recipient6 Recipient5
Operation2 Sender2 Update Recipient4
Operation3 Sender3 Update Recipient5
Operation4 Sender2 Delete Recipient3 Recipient6
Operation5 Sender3 Delete Recipient4
Operation6 Sender1 Delete Recipient3
如何在 R 中完成此操作?
您可以使用 pivot_wider
获取更宽格式的数据。
library(dplyr)
df %>%
rename(User1 = Sender) %>%
group_by(Operation) %>%
mutate(col = paste0('User', row_number() + 1)) %>%
tidyr::pivot_wider(names_from = col, values_from = Recipient) %>%
select(Operation, User1, everything()) -> result
result
# Operation User1 Action User2 User3 User4
# <chr> <chr> <chr> <chr> <chr> <chr>
#1 Operation1 Sender1 Update Recipient3 Recipient6 Recipient5
#2 Operation2 Sender2 Update Recipient4 NA NA
#3 Operation3 Sender3 Update Recipient5 NA NA
#4 Operation4 Sender2 Delete Recipient3 Recipient6 NA
#5 Operation5 Sender3 Delete Recipient4 NA NA
#6 Operation6 Sender1 Delete Recipient3 NA NA
数据
df <- structure(list(Sender = c("Sender1", "Sender2", "Sender3", "Sender1",
"Sender2", "Sender3", "Sender1", "Sender2", "Sender1"), Action = c("Update",
"Update", "Update", "Update", "Delete", "Delete", "Update", "Delete",
"Delete"), Recipient = c("Recipient3", "Recipient4", "Recipient5",
"Recipient6", "Recipient3", "Recipient4", "Recipient5", "Recipient6",
"Recipient3"), Operation = c("Operation1", "Operation2", "Operation3",
"Operation1", "Operation4", "Operation5", "Operation1", "Operation4",
"Operation6")), class = "data.frame", row.names = c(NA, -9L))
对于那些习惯并喜欢 data.table 语法的人来说, 一个(可能不是最佳的)版本可能是这样的:
library(data.table)
DT <- data.table(df)
setnames(DT,"Sender","User1")
DT <- dcast(DT[,User:=paste("User", .SD[,.I]), by=Operation],
Operation + User1 + Action~ User, value.var="Recipient")
DT
使用 reshape
.
reshape(transform(dat, User.1=Sender, User=Recipient,
x=ave(!is.na(Sender), Operation, FUN=cumsum) + 1),
v.names="User", timevar="x", idvar="Operation",
drop=c("Recipient", "Sender"),
direction="wide")
# Action Operation User.1 User.2 User.3 User.4
# 1 Update Operation1 Sender1 Recipient3 Recipient6 Recipient5
# 2 Update Operation2 Sender2 Recipient4 <NA> <NA>
# 3 Update Operation3 Sender3 Recipient5 <NA> <NA>
# 5 Delete Operation4 Sender2 Recipient3 Recipient6 <NA>
# 6 Delete Operation5 Sender3 Recipient4 <NA> <NA>
# 9 Delete Operation6 Sender1 Recipient3 <NA> <NA>
或reshape2::dcast
.
reshape2::dcast(transform(dat, User.1=Sender, User=Recipient,
Recipient=ave(!is.na(Sender), Operation,
FUN=function(x) paste0("User.", cumsum(x) + 1))),
... ~ Recipient, value.var="User")[-1]
# Action Operation User.1 User.2 User.3 User.4
# 1 Update Operation1 Sender1 Recipient3 Recipient6 Recipient5
# 2 Update Operation2 Sender2 Recipient4 <NA> <NA>
# 3 Update Operation3 Sender3 Recipient5 <NA> <NA>
# 5 Delete Operation4 Sender2 Recipient3 Recipient6 <NA>
# 6 Delete Operation5 Sender3 Recipient4 <NA> <NA>
# 9 Delete Operation6 Sender1 Recipient3 <NA> <NA>
数据
dat <- read.table(header=TRUE, text="Sender Action Recipient Operation
Sender1 Update Recipient3 Operation1
Sender2 Update Recipient4 Operation2
Sender3 Update Recipient5 Operation3
Sender1 Update Recipient6 Operation1
Sender2 Delete Recipient3 Operation4
Sender3 Delete Recipient4 Operation5
Sender1 Update Recipient5 Operation1
Sender2 Delete Recipient6 Operation4
Sender1 Delete Recipient3 Operation6")