使用 ifelse 语句但来自 R 中的数据集
using ifelse statement but from dataset in R
要创建属性,我可以这样做
dat$clas <- ifelse(grepl("den", dat$stuff), "bak",
ifelse(grepl("kro", dat$stuff), "bak1",
ifelse(grepl("ris", dat$stuff), "bak3",
ifelse(grepl("muka", dat$stuff), "rty",
ifelse(grepl("chlo", dat$stuff), "cos",
ifelse(grepl("prokl", dat$stuff), "gig", "no"))))))
但是我有很多属性,假设有200个属性。
使用这样的ifelse语句,写起来时间长,代码也长。
我可以从数据框中使用它吗?
templatedata<-prod clas
den bak
kro bak1
ris bak3
muka rty
chlo cos
prokl gig
)
templatedata=structure(list(prod = structure(c(2L, 3L, 6L, 4L, 1L, 5L), .Label = c("chlo\t",
"den", "kro\t", "muka", "prokl\t", "ris\t"), class = "factor"),
class = structure(c(1L, 2L, 3L, 6L, 4L, 5L), .Label = c("bak",
"bak1", "bak3", "cos", "gig", "rty"), class = "factor")), .Names = c("prod",
"class"), class = "data.frame", row.names = c(NA, -6L))
所以
workingdataset<-(
prod
den sg
kro serdgt
ris szdg
muka aszgt
chlo sdgt
prokl zfdsgr
den zdasfh)
workingdataset=structure(list(prod = structure(c(2L, 4L, 7L, 5L, 1L, 6L, 3L), .Label = c("chlo sdgt",
"den sg", "den zdasfh)", "kro serdgt", "muka aszgt", "prokl zfdsgr",
"ris szdg"), class = "factor")), .Names = "prod", class = "data.frame", row.names = c(NA,
-7L))
我使用模板数据获取工作数据集中的属性。
作为输出 workingdataset
看起来像
prod clas
den sg bak
kro serdgt bak1
ris szdg bak3
muka aszgt rty
chlo sdgt cos
prokl zfdsgr gig
den vv bak
怎么做
您可以使用 David Robinson 创建的精彩 fuzzyjoin 包来实现这一点。它允许您使用模糊逻辑连接两个表,包括字符串距离或正则表达式。在这里,我们将使用正则表达式。
library(fuzzyjoin)
library(magrittr)
workingdataset=structure(list(prod = structure(c(2L, 4L, 7L, 5L, 1L, 6L, 3L),
.Label = c("chlo sdgt",
"den sg",
"den zdasfh)",
"kro serdgt",
"muka aszgt",
"prokl zfdsgr",
"ris szdg"),
class = "factor")),
.Names = "prod",
class = "data.frame",
row.names = c(NA, -7L))
templatedata=structure(list(prod = structure(c(2L, 3L, 6L, 4L, 1L, 5L),
.Label = c("chlo",
"den",
"kro",
"muka",
"prokl",
"ris"),
class = "factor"),
class = structure(c(1L, 2L, 3L, 6L, 4L, 5L),
.Label = c("bak", "bak1", "bak3",
"cos", "gig", "rty"),
class = "factor")),
.Names = c("prod", "class"),
class = "data.frame",
row.names = c(NA, -6L))
workingdataset %>%
regex_inner_join(templatedata, by='prod')
prod.x prod.y class
1 den sg den bak
2 kro serdgt kro bak1
3 ris szdg ris bak3
4 muka aszgt muka rty
5 chlo sdgt chlo cos
6 prokl zfdsgr prokl gig
7 den zdasfh) den bak
要创建属性,我可以这样做
dat$clas <- ifelse(grepl("den", dat$stuff), "bak",
ifelse(grepl("kro", dat$stuff), "bak1",
ifelse(grepl("ris", dat$stuff), "bak3",
ifelse(grepl("muka", dat$stuff), "rty",
ifelse(grepl("chlo", dat$stuff), "cos",
ifelse(grepl("prokl", dat$stuff), "gig", "no"))))))
但是我有很多属性,假设有200个属性。 使用这样的ifelse语句,写起来时间长,代码也长。 我可以从数据框中使用它吗?
templatedata<-prod clas
den bak
kro bak1
ris bak3
muka rty
chlo cos
prokl gig
)
templatedata=structure(list(prod = structure(c(2L, 3L, 6L, 4L, 1L, 5L), .Label = c("chlo\t",
"den", "kro\t", "muka", "prokl\t", "ris\t"), class = "factor"),
class = structure(c(1L, 2L, 3L, 6L, 4L, 5L), .Label = c("bak",
"bak1", "bak3", "cos", "gig", "rty"), class = "factor")), .Names = c("prod",
"class"), class = "data.frame", row.names = c(NA, -6L))
所以
workingdataset<-(
prod
den sg
kro serdgt
ris szdg
muka aszgt
chlo sdgt
prokl zfdsgr
den zdasfh)
workingdataset=structure(list(prod = structure(c(2L, 4L, 7L, 5L, 1L, 6L, 3L), .Label = c("chlo sdgt",
"den sg", "den zdasfh)", "kro serdgt", "muka aszgt", "prokl zfdsgr",
"ris szdg"), class = "factor")), .Names = "prod", class = "data.frame", row.names = c(NA,
-7L))
我使用模板数据获取工作数据集中的属性。
作为输出 workingdataset
看起来像
prod clas
den sg bak
kro serdgt bak1
ris szdg bak3
muka aszgt rty
chlo sdgt cos
prokl zfdsgr gig
den vv bak
怎么做
您可以使用 David Robinson 创建的精彩 fuzzyjoin 包来实现这一点。它允许您使用模糊逻辑连接两个表,包括字符串距离或正则表达式。在这里,我们将使用正则表达式。
library(fuzzyjoin)
library(magrittr)
workingdataset=structure(list(prod = structure(c(2L, 4L, 7L, 5L, 1L, 6L, 3L),
.Label = c("chlo sdgt",
"den sg",
"den zdasfh)",
"kro serdgt",
"muka aszgt",
"prokl zfdsgr",
"ris szdg"),
class = "factor")),
.Names = "prod",
class = "data.frame",
row.names = c(NA, -7L))
templatedata=structure(list(prod = structure(c(2L, 3L, 6L, 4L, 1L, 5L),
.Label = c("chlo",
"den",
"kro",
"muka",
"prokl",
"ris"),
class = "factor"),
class = structure(c(1L, 2L, 3L, 6L, 4L, 5L),
.Label = c("bak", "bak1", "bak3",
"cos", "gig", "rty"),
class = "factor")),
.Names = c("prod", "class"),
class = "data.frame",
row.names = c(NA, -6L))
workingdataset %>%
regex_inner_join(templatedata, by='prod')
prod.x prod.y class
1 den sg den bak
2 kro serdgt kro bak1
3 ris szdg ris bak3
4 muka aszgt muka rty
5 chlo sdgt chlo cos
6 prokl zfdsgr prokl gig
7 den zdasfh) den bak