R(arules)将数据框转换为事务并删除 NA
R (arules) Convert dataframe into transactions and remove NA
我有一组数据框。我的目的是将数据框转换为交易数据,以便使用 R 中的 Arules 包进行购物篮分析。我确实在网上做了一些关于将数据框转换为交易数据的研究,例如() and (),但结果我得到的是不同的。
dput(df)
structure(list(Transaction_ID = c("A001", "A002", "A003", "A004", "A005", "A006"),
Fruits = c(NA, "Apple", "Orange", NA, "Pear", "Grape"),
Vegetables = c(NA, NA, NA, "Potato", NA, "Yam"),
Personal = c("ToothP", "ToothP", NA, "ToothB", "ToothB", NA),
Drink = c("Coff", NA, "Coff", "Milk", "Milk", "Coff"),
Other = c(NA, NA, NA, NA, "Promo", NA)),
.Names = c("Transaction_ID", "Fruits", "Vegetables", "Personal", "Drink", "Other"),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L))
下面是我的数据帧结构
Transaction_ID Fruits Vegetables Personal Drink Other
A001 NA NA ToothP Coff NA
A002 Apple NA ToothP NA NA
A003 Orange NA NA Coff NA
A004 NA Potato ToothB Milk NA
A005 Pear NA ToothB Milk Promo
A006 Grape Yam NA Coff NA
class 每列
sapply(df, class)
Transaction_ID Fruits Vegetables Personal Drink Other
"character" "character" "character" "character" "character" "character"
将数据帧转换为交易数据
data <- as(split(df[,"Fruits"], df[,"Vegetables"],df[,"Personal"], df[,"Drink"], df[,"Other"]), "transactions")
inspect(data)
我得到的结果
[1] {NA,NA,ToothP,Coff,NA}
[2] {Apple,NA,ToothP,NA,NA}
[3] {Orange,NA,NA,Coff,NA}
[4] {NA,Potato,ToothB,Milk,NA}
[5] {Pear,NA,ToothB,Milk,Promo}
[6] {Grape,Yam,NA,Coff,NA}
交易数据转换成功,但请问有什么方法可以去掉NA项吗?因为如果它们仍然保留在交易列表中,NA 会将其视为一个项目。
我可以向您推荐这个解决方案,但我不知道这是否是您正在寻找的解决方案。
dput(df)
df <- data.frame(structure(list(Transaction_ID = as.factor(c("A001", "A002", "A003", "A004", "A005", "A006")),
Fruits = as.factor(c(NA, "Apple", "Orange", NA, "Pear", "Grape")),
Vegetables = as.factor(c(NA, NA, NA, "Potato", NA, "Yam")),
Personal = as.factor(c("ToothP", "ToothP", NA, "ToothB", "ToothB", NA)),
Drink = as.factor(c("Coff", NA, "Coff", "Milk", "Milk", "Coff")),
Other = as.factor(c(NA, NA, NA, NA, "Promo", NA))),
.Names = c("Transaction_ID", "Fruits", "Vegetables", "Personal", "Drink", "Other"),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L)))
Class 每列
注意classe都是"Factor"
sapply(df, class)
Transaction_ID Fruits Vegetables Personal Drink Other
"factor" "factor" "factor" "factor" "factor" "factor"
将数据帧转换为交易数据
data <- as(df, "transactions")
inspect(data)
我得到的结果
items transactionID
[1] {Transaction_ID=A001,
Personal=ToothP,
Drink=Coff} 1
[2] {Transaction_ID=A002,
Fruits=Apple,
Personal=ToothP} 2
[3] {Transaction_ID=A003,
Fruits=Orange,
Drink=Coff} 3
[4] {Transaction_ID=A004,
Vegetables=Potato,
Personal=ToothB,
Drink=Milk} 4
[5] {Transaction_ID=A005,
Fruits=Pear,
Personal=ToothB,
Drink=Milk,
Other=Promo} 5
[6] {Transaction_ID=A006,
Fruits=Grape,
Vegetables=Yam,
Drink=Coff} 6
我在这里 convert data frame in r to transaction or an itemMatrix 找到了部分解决方案。而且似乎你的命令
data <- as(split(df[,"Fruits"], df[,"Vegetables"],df[,"Personal"], df[,"Drink"], df[,"Other"]), "transactions")
inspect(data)
仅适用于仅包含两列的 data.frame。
奥古斯塔里是对的。这是同时处理事务 ID 的完整代码。
library("arules")
library("dplyr") ### for dbl_df
df <- structure(list(Transaction_ID = c("A001", "A002", "A003", "A004", "A005", "A006"),
Fruits = c(NA, "Apple", "Orange", NA, "Pear", "Grape"),
Vegetables = c(NA, NA, NA, "Potato", NA, "Yam"),
Personal = c("ToothP", "ToothP", NA, "ToothB", "ToothB", NA),
Drink = c("Coff", NA, "Coff", "Milk", "Milk", "Coff"),
Other = c(NA, NA, NA, NA, "Promo", NA)),
.Names = c("Transaction_ID", "Fruits", "Vegetables", "Personal", "Drink", "Other"),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L))
### remove transaction IDs
tid <- as.character(df[["Transaction_ID"]])
df <- df[,-1]
### make all columns factors
for(i in 1:ncol(df)) df[[i]] <- as.factor(df[[i]])
trans <- as(df, "transactions")
### set transactionIDs
transactionInfo(trans)[["transactionID"]] <- tid
inspect(trans)
items transactionID
[1] {Personal=ToothP,Drink=Coff} A001
[2] {Personal=ToothP} A002
[3] {Drink=Coff} A003
[4] {Vegetables=Potato,Personal=ToothB,Drink=Milk} A004
[5] {Personal=ToothB,Drink=Milk,Other=Promo} A005
[6] {Vegetables=Yam,Drink=Coff} A006
我有一组数据框。我的目的是将数据框转换为交易数据,以便使用 R 中的 Arules 包进行购物篮分析。我确实在网上做了一些关于将数据框转换为交易数据的研究,例如(
dput(df)
structure(list(Transaction_ID = c("A001", "A002", "A003", "A004", "A005", "A006"),
Fruits = c(NA, "Apple", "Orange", NA, "Pear", "Grape"),
Vegetables = c(NA, NA, NA, "Potato", NA, "Yam"),
Personal = c("ToothP", "ToothP", NA, "ToothB", "ToothB", NA),
Drink = c("Coff", NA, "Coff", "Milk", "Milk", "Coff"),
Other = c(NA, NA, NA, NA, "Promo", NA)),
.Names = c("Transaction_ID", "Fruits", "Vegetables", "Personal", "Drink", "Other"),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L))
下面是我的数据帧结构
Transaction_ID Fruits Vegetables Personal Drink Other
A001 NA NA ToothP Coff NA
A002 Apple NA ToothP NA NA
A003 Orange NA NA Coff NA
A004 NA Potato ToothB Milk NA
A005 Pear NA ToothB Milk Promo
A006 Grape Yam NA Coff NA
class 每列
sapply(df, class)
Transaction_ID Fruits Vegetables Personal Drink Other
"character" "character" "character" "character" "character" "character"
将数据帧转换为交易数据
data <- as(split(df[,"Fruits"], df[,"Vegetables"],df[,"Personal"], df[,"Drink"], df[,"Other"]), "transactions")
inspect(data)
我得到的结果
[1] {NA,NA,ToothP,Coff,NA}
[2] {Apple,NA,ToothP,NA,NA}
[3] {Orange,NA,NA,Coff,NA}
[4] {NA,Potato,ToothB,Milk,NA}
[5] {Pear,NA,ToothB,Milk,Promo}
[6] {Grape,Yam,NA,Coff,NA}
交易数据转换成功,但请问有什么方法可以去掉NA项吗?因为如果它们仍然保留在交易列表中,NA 会将其视为一个项目。
我可以向您推荐这个解决方案,但我不知道这是否是您正在寻找的解决方案。
dput(df)
df <- data.frame(structure(list(Transaction_ID = as.factor(c("A001", "A002", "A003", "A004", "A005", "A006")),
Fruits = as.factor(c(NA, "Apple", "Orange", NA, "Pear", "Grape")),
Vegetables = as.factor(c(NA, NA, NA, "Potato", NA, "Yam")),
Personal = as.factor(c("ToothP", "ToothP", NA, "ToothB", "ToothB", NA)),
Drink = as.factor(c("Coff", NA, "Coff", "Milk", "Milk", "Coff")),
Other = as.factor(c(NA, NA, NA, NA, "Promo", NA))),
.Names = c("Transaction_ID", "Fruits", "Vegetables", "Personal", "Drink", "Other"),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L)))
Class 每列 注意classe都是"Factor"
sapply(df, class)
Transaction_ID Fruits Vegetables Personal Drink Other
"factor" "factor" "factor" "factor" "factor" "factor"
将数据帧转换为交易数据
data <- as(df, "transactions")
inspect(data)
我得到的结果
items transactionID
[1] {Transaction_ID=A001,
Personal=ToothP,
Drink=Coff} 1
[2] {Transaction_ID=A002,
Fruits=Apple,
Personal=ToothP} 2
[3] {Transaction_ID=A003,
Fruits=Orange,
Drink=Coff} 3
[4] {Transaction_ID=A004,
Vegetables=Potato,
Personal=ToothB,
Drink=Milk} 4
[5] {Transaction_ID=A005,
Fruits=Pear,
Personal=ToothB,
Drink=Milk,
Other=Promo} 5
[6] {Transaction_ID=A006,
Fruits=Grape,
Vegetables=Yam,
Drink=Coff} 6
我在这里 convert data frame in r to transaction or an itemMatrix 找到了部分解决方案。而且似乎你的命令
data <- as(split(df[,"Fruits"], df[,"Vegetables"],df[,"Personal"], df[,"Drink"], df[,"Other"]), "transactions")
inspect(data)
仅适用于仅包含两列的 data.frame。
奥古斯塔里是对的。这是同时处理事务 ID 的完整代码。
library("arules")
library("dplyr") ### for dbl_df
df <- structure(list(Transaction_ID = c("A001", "A002", "A003", "A004", "A005", "A006"),
Fruits = c(NA, "Apple", "Orange", NA, "Pear", "Grape"),
Vegetables = c(NA, NA, NA, "Potato", NA, "Yam"),
Personal = c("ToothP", "ToothP", NA, "ToothB", "ToothB", NA),
Drink = c("Coff", NA, "Coff", "Milk", "Milk", "Coff"),
Other = c(NA, NA, NA, NA, "Promo", NA)),
.Names = c("Transaction_ID", "Fruits", "Vegetables", "Personal", "Drink", "Other"),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L))
### remove transaction IDs
tid <- as.character(df[["Transaction_ID"]])
df <- df[,-1]
### make all columns factors
for(i in 1:ncol(df)) df[[i]] <- as.factor(df[[i]])
trans <- as(df, "transactions")
### set transactionIDs
transactionInfo(trans)[["transactionID"]] <- tid
inspect(trans)
items transactionID
[1] {Personal=ToothP,Drink=Coff} A001
[2] {Personal=ToothP} A002
[3] {Drink=Coff} A003
[4] {Vegetables=Potato,Personal=ToothB,Drink=Milk} A004
[5] {Personal=ToothB,Drink=Milk,Other=Promo} A005
[6] {Vegetables=Yam,Drink=Coff} A006