在规则中如何将稀疏数据框转换为事务?
In arules how to turn a sparse dataframe into transactions?
嗨,我有一个像这样的杂货订单的稀疏数据框
library(arules)
a_df <- data.frame(
apple = as.factor(c(1,0,0,0,1,1)),
banana = as.factor(c(0,1,1,0,0,0)),
peeler = as.factor(c(1,0,0,0,1,1)))
a_tran = as(a_df, "transactions" )
inspect(a_tran)
rules <- apriori(a_tran, parameter=list(minlen=2, supp=0.5,conf = 0.5))
inspect(rules)
但是结果包含 0(未订购的商品),如下所示:
lhs rhs 支持信心提升计数
{香蕉=0} => {苹果=1} 0.5 0.6 1.2 3
如何忽略数据帧中的 0,或将数据帧转换为类似
的内容
order 1: apple, peeler
order 2: banana
谢谢。
这里有几个选项
library(magrittr)
idx <- which(a_df==1, arr.ind = T)
(lst <- split(names(a_df)[idx[,2]], idx[,1]))
# $`1`
# [1] "apple" "peeler"
#
# $`2`
# [1] "banana"
#
# $`3`
# [1] "banana"
#
# $`5`
# [1] "apple" "peeler"
#
# $`6`
# [1] "apple" "peeler"
rules <- function(x, app=NULL) {
x %>% as("transactions") %>% apriori(parameter=list(minlen=2, supp=0.5,conf = 0.5), appearance=app)
}
# use a list without "0"s:
lst %>% rules %>% inspect
# filter "0"s afterwards:
a_df %>% rules %>% subset(!lhs%pin%"0" & !rhs%pin%"0") %>% inspect
# filter "0"s in apriori:
a_df %>% rules(list(none = paste(names(a_df), "0", sep="="), default="both")) %>% inspect
看起来你的数据是一个完整的 0-1 矩阵。这是最快的方法:
trans <- as(a_df == "1", "transactions")
inspect(trans)
items
[1] {apple,peeler}
[2] {banana}
[3] {banana}
[4] {}
[5] {apple,peeler}
[6] {apple,peeler}
现在您可以挖掘规则了。
嗨,我有一个像这样的杂货订单的稀疏数据框
library(arules)
a_df <- data.frame(
apple = as.factor(c(1,0,0,0,1,1)),
banana = as.factor(c(0,1,1,0,0,0)),
peeler = as.factor(c(1,0,0,0,1,1)))
a_tran = as(a_df, "transactions" )
inspect(a_tran)
rules <- apriori(a_tran, parameter=list(minlen=2, supp=0.5,conf = 0.5))
inspect(rules)
但是结果包含 0(未订购的商品),如下所示: lhs rhs 支持信心提升计数 {香蕉=0} => {苹果=1} 0.5 0.6 1.2 3
如何忽略数据帧中的 0,或将数据帧转换为类似
的内容order 1: apple, peeler
order 2: banana
谢谢。
这里有几个选项
library(magrittr)
idx <- which(a_df==1, arr.ind = T)
(lst <- split(names(a_df)[idx[,2]], idx[,1]))
# $`1`
# [1] "apple" "peeler"
#
# $`2`
# [1] "banana"
#
# $`3`
# [1] "banana"
#
# $`5`
# [1] "apple" "peeler"
#
# $`6`
# [1] "apple" "peeler"
rules <- function(x, app=NULL) {
x %>% as("transactions") %>% apriori(parameter=list(minlen=2, supp=0.5,conf = 0.5), appearance=app)
}
# use a list without "0"s:
lst %>% rules %>% inspect
# filter "0"s afterwards:
a_df %>% rules %>% subset(!lhs%pin%"0" & !rhs%pin%"0") %>% inspect
# filter "0"s in apriori:
a_df %>% rules(list(none = paste(names(a_df), "0", sep="="), default="both")) %>% inspect
看起来你的数据是一个完整的 0-1 矩阵。这是最快的方法:
trans <- as(a_df == "1", "transactions")
inspect(trans)
items
[1] {apple,peeler}
[2] {banana}
[3] {banana}
[4] {}
[5] {apple,peeler}
[6] {apple,peeler}
现在您可以挖掘规则了。