如何使用 'by' 并选择某些列在 data.table 中应用自定义函数
How to apply a custom function in a data.table using 'by' and selecting certain columns
我有以下data.table
# id category sales Num share
# 1: 1 a -0.25174915 -0.2130797 -0.67909764
# 2: 2 a -0.35569766 0.6014930 0.35201386
# 3: 3 a -0.31600957 0.4398968 -1.15475814
# 4: 4 b -0.54113762 -2.3497952 0.64503654
# 5: 5 c -0.25174915 -0.2130797 -0.67909764
# 6: 6 b -0.35569766 0.6014930 0.35201386
# 7: 7 c -0.31600957 0.4398968 -1.15475814
# 8: 8 a -0.54113762 -2.3497952 0.64503654
目前我正在执行以下操作以获得每个类别的 PCA 分析:
PCA <- prcomp(df[category == a, .(sales, Num, share)], center = T, scale. = T)
df$score <- apply(df[category == a, .(sales, Num, share), ], 1, function(x) sum(x*PCA$rotation))
但是这个操作只针对一个特定的类别,我想对df中的所有类别单独执行。
我正在尝试以下操作:
pca.weighting <- function(df,y) {
PCA <- prcomp(df[, y], center = T, scale. = T)
scores <- apply(df[, y, ], 1, function(x) sum(x*PCA$rotation))
return(scores)
}
df[, lapply(.SD, function(x) pca.weighting(df,x) ), by = 'category', .SDcols = c('sales', 'Num', 'share')]
但是我除了错误什么也没有得到。我究竟做错了什么 ?任何见解将不胜感激。
可能是这样的:
pca.weighting <- function(df,y) {
PCA <- prcomp(df, center=TRUE, scale.=TRUE)
rowSums(as.matrix(df) %*% PCA$rotation)
}
DT[, pca.weighting(.SD, x), category, .SDcols=sales:share]
输出:
category V1
1: a -0.41538278
2: a 1.15660781
3: a -0.38428373
4: a -0.06165818
5: b -1.26069494
6: b -0.45360065
7: c 0.45628037
8: c 1.54026745
数据:
library(data.table)
DT <- fread("id category sales Num share
1 a -0.25174915 -0.2130797 -0.67909764
2 a -0.35569766 0.6014930 0.35201386
3 a -0.31600957 0.4398968 -1.15475814
4 b -0.54113762 -2.3497952 0.64503654
5 c -0.25174915 -0.2130797 -0.67909764
6 b -0.35569766 0.6014930 0.35201386
7 c -0.31600957 0.4398968 -1.15475814
8 a -0.54113762 -2.3497952 0.64503654")
我有以下data.table
# id category sales Num share
# 1: 1 a -0.25174915 -0.2130797 -0.67909764
# 2: 2 a -0.35569766 0.6014930 0.35201386
# 3: 3 a -0.31600957 0.4398968 -1.15475814
# 4: 4 b -0.54113762 -2.3497952 0.64503654
# 5: 5 c -0.25174915 -0.2130797 -0.67909764
# 6: 6 b -0.35569766 0.6014930 0.35201386
# 7: 7 c -0.31600957 0.4398968 -1.15475814
# 8: 8 a -0.54113762 -2.3497952 0.64503654
目前我正在执行以下操作以获得每个类别的 PCA 分析:
PCA <- prcomp(df[category == a, .(sales, Num, share)], center = T, scale. = T)
df$score <- apply(df[category == a, .(sales, Num, share), ], 1, function(x) sum(x*PCA$rotation))
但是这个操作只针对一个特定的类别,我想对df中的所有类别单独执行。 我正在尝试以下操作:
pca.weighting <- function(df,y) {
PCA <- prcomp(df[, y], center = T, scale. = T)
scores <- apply(df[, y, ], 1, function(x) sum(x*PCA$rotation))
return(scores)
}
df[, lapply(.SD, function(x) pca.weighting(df,x) ), by = 'category', .SDcols = c('sales', 'Num', 'share')]
但是我除了错误什么也没有得到。我究竟做错了什么 ?任何见解将不胜感激。
可能是这样的:
pca.weighting <- function(df,y) {
PCA <- prcomp(df, center=TRUE, scale.=TRUE)
rowSums(as.matrix(df) %*% PCA$rotation)
}
DT[, pca.weighting(.SD, x), category, .SDcols=sales:share]
输出:
category V1
1: a -0.41538278
2: a 1.15660781
3: a -0.38428373
4: a -0.06165818
5: b -1.26069494
6: b -0.45360065
7: c 0.45628037
8: c 1.54026745
数据:
library(data.table)
DT <- fread("id category sales Num share
1 a -0.25174915 -0.2130797 -0.67909764
2 a -0.35569766 0.6014930 0.35201386
3 a -0.31600957 0.4398968 -1.15475814
4 b -0.54113762 -2.3497952 0.64503654
5 c -0.25174915 -0.2130797 -0.67909764
6 b -0.35569766 0.6014930 0.35201386
7 c -0.31600957 0.4398968 -1.15475814
8 a -0.54113762 -2.3497952 0.64503654")