在 R 中将多个参数作为列表传递
Passing multiple arguments as a list in R
我希望将参数列表作为向量传递给 R 中的另一个命令。我不想每次都重复同一组参数。
这是我必须对范围 t1 to t6
.
范围内的每个 $full_text
列数据帧 运行 6 次的代码
library(quanteda)
t1t <- tokens(t1$full_text, what = 'word', remove_numbers = TRUE,
remove_punct = TRUE,
remove_symbols = TRUE,
remove_separators = TRUE,
remove_twitter = TRUE,
remove_hyphens = TRUE,
remove_url = TRUE)
t1t <- tokens_tolower(t1t)
t1t <- tokens_select(t1t, stopwords(), selection = "remove")
t1t <- unlist(t1t)
t1t <- unique(t1t)
t1t <- as.data.frame(t1t)
t1t <- as.data.frame.matrix(t1t)
有没有办法传递一次性参数。
如错误消息中所述tokens
期望字符向量、语料库或标记作为输入。您正在将数据框传递给它。而是将相应的文本列传递给它。
tokens
还可以处理向量,因此您可以将多列作为一个向量一起传递。
library(quanteda)
tokens(c(t1$colname, t2$colname, t3$colname), what = "word", remove_numbers = TRUE,
remove_punct = TRUE, remove_symbols = TRUE, remove_separators = TRUE,
remove_twitter = TRUE, remove_hyphens =TRUE, remove_url = TRUE)
基于 ?tokens
的更新并以帮助页面为例
t1 <- data.frame(full_text = "#textanalysis is MY <3 4U @myhandle gr8 #stuff :-)",
stringsAsFactors = FALSE)
t2 <- data.frame(full_text = c("This is in 999 different ways,\n up and down;
left and right!", "@kenbenoit working: on #quanteda 2day\t4ever,
http://textasdata.com?page=123."), stringsAsFactors = FALSE)
我们可以创建一个函数将其应用于所有数据帧
complete_function <- function(x) {
t1t <- tokens(x, what = 'word', remove_numbers = TRUE,
remove_punct = TRUE,
remove_symbols = TRUE,
remove_separators = TRUE,
remove_twitter = TRUE,
remove_hyphens = TRUE,
remove_url = TRUE)
t1t <- tokens_tolower(t1t)
t1t <- tokens_select(t1t, stopwords(), selection = "remove")
t1t <- unlist(t1t)
t1t <- unique(t1t)
t1t <- as.data.frame(t1t)
t1t <- as.data.frame.matrix(t1t)
}
然后使用mget
获取数据帧t1
、t2
、t3
等并将该函数应用于每个数据帧的"full_text"
列。
lapply(mget(ls(pattern = "^t\d+")), function(x) complete_function(x$full_text))
#$t1
# t1t
#1 textanalysis
#2 4u
#3 myhandle
#4 gr8
#5 stuff
#$t2
# t1t
#1 different
#2 ways
#3 left
#4 right
#5 kenbenoit
#6 working
#7 quanteda
#8 2day
#9 4ever
我希望将参数列表作为向量传递给 R 中的另一个命令。我不想每次都重复同一组参数。
这是我必须对范围 t1 to t6
.
$full_text
列数据帧 运行 6 次的代码
library(quanteda)
t1t <- tokens(t1$full_text, what = 'word', remove_numbers = TRUE,
remove_punct = TRUE,
remove_symbols = TRUE,
remove_separators = TRUE,
remove_twitter = TRUE,
remove_hyphens = TRUE,
remove_url = TRUE)
t1t <- tokens_tolower(t1t)
t1t <- tokens_select(t1t, stopwords(), selection = "remove")
t1t <- unlist(t1t)
t1t <- unique(t1t)
t1t <- as.data.frame(t1t)
t1t <- as.data.frame.matrix(t1t)
有没有办法传递一次性参数。
如错误消息中所述tokens
期望字符向量、语料库或标记作为输入。您正在将数据框传递给它。而是将相应的文本列传递给它。
tokens
还可以处理向量,因此您可以将多列作为一个向量一起传递。
library(quanteda)
tokens(c(t1$colname, t2$colname, t3$colname), what = "word", remove_numbers = TRUE,
remove_punct = TRUE, remove_symbols = TRUE, remove_separators = TRUE,
remove_twitter = TRUE, remove_hyphens =TRUE, remove_url = TRUE)
基于 ?tokens
t1 <- data.frame(full_text = "#textanalysis is MY <3 4U @myhandle gr8 #stuff :-)",
stringsAsFactors = FALSE)
t2 <- data.frame(full_text = c("This is in 999 different ways,\n up and down;
left and right!", "@kenbenoit working: on #quanteda 2day\t4ever,
http://textasdata.com?page=123."), stringsAsFactors = FALSE)
我们可以创建一个函数将其应用于所有数据帧
complete_function <- function(x) {
t1t <- tokens(x, what = 'word', remove_numbers = TRUE,
remove_punct = TRUE,
remove_symbols = TRUE,
remove_separators = TRUE,
remove_twitter = TRUE,
remove_hyphens = TRUE,
remove_url = TRUE)
t1t <- tokens_tolower(t1t)
t1t <- tokens_select(t1t, stopwords(), selection = "remove")
t1t <- unlist(t1t)
t1t <- unique(t1t)
t1t <- as.data.frame(t1t)
t1t <- as.data.frame.matrix(t1t)
}
然后使用mget
获取数据帧t1
、t2
、t3
等并将该函数应用于每个数据帧的"full_text"
列。
lapply(mget(ls(pattern = "^t\d+")), function(x) complete_function(x$full_text))
#$t1
# t1t
#1 textanalysis
#2 4u
#3 myhandle
#4 gr8
#5 stuff
#$t2
# t1t
#1 different
#2 ways
#3 left
#4 right
#5 kenbenoit
#6 working
#7 quanteda
#8 2day
#9 4ever