如果可能,将列转换为数字在函数中不起作用
Converting columns to numeric if possible does not work within a function
我有如下示例数据:
library(data.table)
set.seed(1)
DT <- data.table(panelID = sample(50,50), # Creates a panel ID
Country = c(rep("Albania",30),rep("Belarus",50), rep("Chilipepper",20)),
some_NA = sample(0:5, 6),
some_NA_factor = sample(0:5, 6),
Group = c(rep(1,20),rep(2,20),rep(3,20),rep(4,20),rep(5,20)),
Time = rep(seq(as.Date("2010-01-03"), length=20, by="1 month") - 1,5),
wt = 15*round(runif(100)/10,2),
Income = round(rnorm(10,-5,5),2),
Happiness = sample(10,10),
Sex = round(rnorm(10,0.75,0.3),2),
Age = sample(100,100),
Educ = round(rnorm(10,0.75,0.3),2))
DT [, uniqueID := .I] # Creates a unique ID #
DT$some_NA_factor <- factor(DT$some_NA_factor)
DT$Group <- as.character(DT$Group)
DT2 <- copy(DT)
这就是我想要做的,如果可能的话,将列(在本例中为第 5 列 Group
)转换为数字。
dfs <- c("DT", "DT2")
conv_to_num_check <- function(z) is.character(z) && (mean(grepl("^ *-?[\d.]+(?:e-?\d+)?$", z, perl = TRUE), na.rm=TRUE)>0.9)
for (i in length(dfs)) {
cols <- which(sapply(get(dfs[i]), conv_to_num_check))
setDT(get(dfs[i]))[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
}
但是当我检查 class 时:
class(DT$Group) # Is character
当我这样做时:
cols <- which(sapply(DT, conv_to_num_check))
setDT(DT)[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
class(DT$Group) # Is numeric
有效..我做错了什么?
只是 for (i in length(dfs))
行中的一个小错误,因为 length(dfs)
只是 returns 2
:
for (i in length(dfs)) {
print(i)
}
# [1] 2
如果您将其更改为:
,它将起作用
for (i in seq_along(dfs)) {
cols <- which(sapply(get(dfs[i]), conv_to_num_check))
setDT(get(dfs[i]))[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
}
我有如下示例数据:
library(data.table)
set.seed(1)
DT <- data.table(panelID = sample(50,50), # Creates a panel ID
Country = c(rep("Albania",30),rep("Belarus",50), rep("Chilipepper",20)),
some_NA = sample(0:5, 6),
some_NA_factor = sample(0:5, 6),
Group = c(rep(1,20),rep(2,20),rep(3,20),rep(4,20),rep(5,20)),
Time = rep(seq(as.Date("2010-01-03"), length=20, by="1 month") - 1,5),
wt = 15*round(runif(100)/10,2),
Income = round(rnorm(10,-5,5),2),
Happiness = sample(10,10),
Sex = round(rnorm(10,0.75,0.3),2),
Age = sample(100,100),
Educ = round(rnorm(10,0.75,0.3),2))
DT [, uniqueID := .I] # Creates a unique ID #
DT$some_NA_factor <- factor(DT$some_NA_factor)
DT$Group <- as.character(DT$Group)
DT2 <- copy(DT)
这就是我想要做的,如果可能的话,将列(在本例中为第 5 列 Group
)转换为数字。
dfs <- c("DT", "DT2")
conv_to_num_check <- function(z) is.character(z) && (mean(grepl("^ *-?[\d.]+(?:e-?\d+)?$", z, perl = TRUE), na.rm=TRUE)>0.9)
for (i in length(dfs)) {
cols <- which(sapply(get(dfs[i]), conv_to_num_check))
setDT(get(dfs[i]))[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
}
但是当我检查 class 时:
class(DT$Group) # Is character
当我这样做时:
cols <- which(sapply(DT, conv_to_num_check))
setDT(DT)[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
class(DT$Group) # Is numeric
有效..我做错了什么?
只是 for (i in length(dfs))
行中的一个小错误,因为 length(dfs)
只是 returns 2
:
for (i in length(dfs)) {
print(i)
}
# [1] 2
如果您将其更改为:
,它将起作用for (i in seq_along(dfs)) {
cols <- which(sapply(get(dfs[i]), conv_to_num_check))
setDT(get(dfs[i]))[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
}