R - Lapply 相关分析函数
R - Lapply Function for correlation analysis
我的 R 数据集有问题。
如何使用 lapply 函数计算销售额和股价之间的相关性 - 作为快速参考,可以这么说。我试过了 - 但不起作用:
my_correlation <- function(subset_df) {
subset_correlation <- image(cor(subset_df), x=Sales, y=Stockprice_quarterly)
subset_correlation
}
ss <- lapply(unique(Nasdaq_100$TickerSymbol), function(ticker)
my_correlation(subset(Nasdaq_100, Nasdaq_100$TickerSymbol == ticker)))
这是我创建的示例,用于显示我的数据集的结构:
TickerSymbol Quarter Sales Stockprice_quarterly
AMD 31.03.2021 [=12=].45 502.500
AMD 31.12.2020 .47 361.100
AMD 30.09.2020 [=12=].32 280.700
AMD 30.06.2020 [=12=].13 377.400
AMD 31.03.2020 [=12=].14 296.900
AMD 31.12.2019 [=12=].15 274.800
AMD 30.09.2019 [=12=].11 561.200
AMD 30.06.2019 [=12=].03 548.650
AMD 31.03.2019 [=12=].01 509.977
AAPL 31.03.2021 .40 359.038
AAPL 31.12.2020 .68 358.514
AAPL 30.09.2020 [=12=].75 357.991
AAPL 30.06.2020 [=12=].65 357.467
AAPL 31.03.2020 [=12=].64 356.944
AAPL 31.12.2019 .25 356.421
AAPL 30.09.2019 [=12=].77 355.897
AAPL 30.06.2019 [=12=].55 355.374
AAPL 31.03.2019 [=12=].62 354.851
EBAY 31.03.2021 [=12=].92 325.020
EBAY 31.12.2020 .39 324.496
EBAY 30.09.2020 [=12=].94 323.973
EBAY 30.06.2020 .05 323.449
EBAY 31.03.2020 .51 322.926
EBAY 31.12.2019 [=12=].69 322.403
EBAY 30.09.2019 [=12=].37 321.879
EBAY 30.06.2019 [=12=].46 321.356
EBAY 31.03.2019 [=12=].57 320.833
在此先感谢您的帮助!
Sales 中有一个 $ 符号。也许 Sales 在数据导入期间被转换为字符向量?您可以删除符号并将其转换为数字。以下是 my_correlation()
的两种可能变体 - 一种使用 subset()
,另一种使用 [
.
# Remove $ sign
dat$Sales <- as.numeric(sub("\$", "", dat$Sales))
# First variation
my_correlation_1 <- function(ticker_subset, data) {
cor(subset(data, TickerSymbol == ticker_subset, c(Sales, Stockprice_quarterly)))
}
mycor1 <- lapply(unique(dat$TickerSymbol), my_correlation_1, data = dat)
names(mycor1) <- unique(dat$TickerSymbol)
# Second variation
my_correlation_2 <- function(ticker_subset, data) {
cor(data[data$TickerSymbol == ticker_subset, c("Sales", "Stockprice_quarterly")])
}
mycor2 <- lapply(unique(dat$TickerSymbol), my_correlation_2, data = dat)
names(mycor2) <- unique(dat$TickerSymbol)
mycor2
# $AMD
# Sales Stockprice_quarterly
# Sales 1.0000000 -0.2261417
# Stockprice_quarterly -0.2261417 1.0000000
#
# $AAPL
# Sales Stockprice_quarterly
# Sales 1.0000000 0.6531391
# Stockprice_quarterly 0.6531391 1.0000000
#
# $EBAY
# Sales Stockprice_quarterly
# Sales 1.0000000 0.2032839
# Stockprice_quarterly 0.2032839 1.0000000
数据:
dat <- structure(list(TickerSymbol = c("AMD", "AMD", "AMD", "AMD", "AMD",
"AMD", "AMD", "AMD", "AMD", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL",
"AAPL", "AAPL", "AAPL", "AAPL", "EBAY", "EBAY", "EBAY", "EBAY",
"EBAY", "EBAY", "EBAY", "EBAY", "EBAY"), Quarter = c("31.03.2021",
"31.12.2020", "30.09.2020", "30.06.2020", "31.03.2020", "31.12.2019",
"30.09.2019", "30.06.2019", "31.03.2019", "31.03.2021", "31.12.2020",
"30.09.2020", "30.06.2020", "31.03.2020", "31.12.2019", "30.09.2019",
"30.06.2019", "31.03.2019", "31.03.2021", "31.12.2020", "30.09.2020",
"30.06.2020", "31.03.2020", "31.12.2019", "30.09.2019", "30.06.2019",
"31.03.2019"), Sales = c("[=11=].45", ".47", "[=11=].32", "[=11=].13",
"[=11=].14", "[=11=].15", "[=11=].11", "[=11=].03", "[=11=].01", ".40", ".68",
"[=11=].75", "[=11=].65", "[=11=].64", ".25", "[=11=].77", "[=11=].55", "[=11=].62",
"[=11=].92", ".39", "[=11=].94", ".05", ".51", "[=11=].69", "[=11=].37",
"[=11=].46", "[=11=].57"), Stockprice_quarterly = c(502.5, 361.1, 280.7,
377.4, 296.9, 274.8, 561.2, 548.65, 509.977, 359.038, 358.514,
357.991, 357.467, 356.944, 356.421, 355.897, 355.374, 354.851,
325.02, 324.496, 323.973, 323.449, 322.926, 322.403, 321.879,
321.356, 320.833)), class = "data.frame", row.names = c(NA, -27L
))
多元化
library(tidyverse)
df %>%
mutate(Sales = parse_number(Sales)) %>%
group_split(TickerSymbol) %>%
map(~cor(select(.data = .x, Sales, Stockprice_quarterly))) %>%
purrr::set_names(., nm = unique(df$TickerSymbol))
$AMD
Sales Stockprice_quarterly
Sales 1.0000000 0.6531391
Stockprice_quarterly 0.6531391 1.0000000
$AAPL
Sales Stockprice_quarterly
Sales 1.0000000 -0.2261417
Stockprice_quarterly -0.2261417 1.0000000
$EBAY
Sales Stockprice_quarterly
Sales 1.0000000 0.2032839
Stockprice_quarterly 0.2032839 1.0000000
我的 R 数据集有问题。 如何使用 lapply 函数计算销售额和股价之间的相关性 - 作为快速参考,可以这么说。我试过了 - 但不起作用:
my_correlation <- function(subset_df) {
subset_correlation <- image(cor(subset_df), x=Sales, y=Stockprice_quarterly)
subset_correlation
}
ss <- lapply(unique(Nasdaq_100$TickerSymbol), function(ticker)
my_correlation(subset(Nasdaq_100, Nasdaq_100$TickerSymbol == ticker)))
这是我创建的示例,用于显示我的数据集的结构:
TickerSymbol Quarter Sales Stockprice_quarterly
AMD 31.03.2021 [=12=].45 502.500
AMD 31.12.2020 .47 361.100
AMD 30.09.2020 [=12=].32 280.700
AMD 30.06.2020 [=12=].13 377.400
AMD 31.03.2020 [=12=].14 296.900
AMD 31.12.2019 [=12=].15 274.800
AMD 30.09.2019 [=12=].11 561.200
AMD 30.06.2019 [=12=].03 548.650
AMD 31.03.2019 [=12=].01 509.977
AAPL 31.03.2021 .40 359.038
AAPL 31.12.2020 .68 358.514
AAPL 30.09.2020 [=12=].75 357.991
AAPL 30.06.2020 [=12=].65 357.467
AAPL 31.03.2020 [=12=].64 356.944
AAPL 31.12.2019 .25 356.421
AAPL 30.09.2019 [=12=].77 355.897
AAPL 30.06.2019 [=12=].55 355.374
AAPL 31.03.2019 [=12=].62 354.851
EBAY 31.03.2021 [=12=].92 325.020
EBAY 31.12.2020 .39 324.496
EBAY 30.09.2020 [=12=].94 323.973
EBAY 30.06.2020 .05 323.449
EBAY 31.03.2020 .51 322.926
EBAY 31.12.2019 [=12=].69 322.403
EBAY 30.09.2019 [=12=].37 321.879
EBAY 30.06.2019 [=12=].46 321.356
EBAY 31.03.2019 [=12=].57 320.833
在此先感谢您的帮助!
Sales 中有一个 $ 符号。也许 Sales 在数据导入期间被转换为字符向量?您可以删除符号并将其转换为数字。以下是 my_correlation()
的两种可能变体 - 一种使用 subset()
,另一种使用 [
.
# Remove $ sign
dat$Sales <- as.numeric(sub("\$", "", dat$Sales))
# First variation
my_correlation_1 <- function(ticker_subset, data) {
cor(subset(data, TickerSymbol == ticker_subset, c(Sales, Stockprice_quarterly)))
}
mycor1 <- lapply(unique(dat$TickerSymbol), my_correlation_1, data = dat)
names(mycor1) <- unique(dat$TickerSymbol)
# Second variation
my_correlation_2 <- function(ticker_subset, data) {
cor(data[data$TickerSymbol == ticker_subset, c("Sales", "Stockprice_quarterly")])
}
mycor2 <- lapply(unique(dat$TickerSymbol), my_correlation_2, data = dat)
names(mycor2) <- unique(dat$TickerSymbol)
mycor2
# $AMD
# Sales Stockprice_quarterly
# Sales 1.0000000 -0.2261417
# Stockprice_quarterly -0.2261417 1.0000000
#
# $AAPL
# Sales Stockprice_quarterly
# Sales 1.0000000 0.6531391
# Stockprice_quarterly 0.6531391 1.0000000
#
# $EBAY
# Sales Stockprice_quarterly
# Sales 1.0000000 0.2032839
# Stockprice_quarterly 0.2032839 1.0000000
数据:
dat <- structure(list(TickerSymbol = c("AMD", "AMD", "AMD", "AMD", "AMD",
"AMD", "AMD", "AMD", "AMD", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL",
"AAPL", "AAPL", "AAPL", "AAPL", "EBAY", "EBAY", "EBAY", "EBAY",
"EBAY", "EBAY", "EBAY", "EBAY", "EBAY"), Quarter = c("31.03.2021",
"31.12.2020", "30.09.2020", "30.06.2020", "31.03.2020", "31.12.2019",
"30.09.2019", "30.06.2019", "31.03.2019", "31.03.2021", "31.12.2020",
"30.09.2020", "30.06.2020", "31.03.2020", "31.12.2019", "30.09.2019",
"30.06.2019", "31.03.2019", "31.03.2021", "31.12.2020", "30.09.2020",
"30.06.2020", "31.03.2020", "31.12.2019", "30.09.2019", "30.06.2019",
"31.03.2019"), Sales = c("[=11=].45", ".47", "[=11=].32", "[=11=].13",
"[=11=].14", "[=11=].15", "[=11=].11", "[=11=].03", "[=11=].01", ".40", ".68",
"[=11=].75", "[=11=].65", "[=11=].64", ".25", "[=11=].77", "[=11=].55", "[=11=].62",
"[=11=].92", ".39", "[=11=].94", ".05", ".51", "[=11=].69", "[=11=].37",
"[=11=].46", "[=11=].57"), Stockprice_quarterly = c(502.5, 361.1, 280.7,
377.4, 296.9, 274.8, 561.2, 548.65, 509.977, 359.038, 358.514,
357.991, 357.467, 356.944, 356.421, 355.897, 355.374, 354.851,
325.02, 324.496, 323.973, 323.449, 322.926, 322.403, 321.879,
321.356, 320.833)), class = "data.frame", row.names = c(NA, -27L
))
多元化
library(tidyverse)
df %>%
mutate(Sales = parse_number(Sales)) %>%
group_split(TickerSymbol) %>%
map(~cor(select(.data = .x, Sales, Stockprice_quarterly))) %>%
purrr::set_names(., nm = unique(df$TickerSymbol))
$AMD
Sales Stockprice_quarterly
Sales 1.0000000 0.6531391
Stockprice_quarterly 0.6531391 1.0000000
$AAPL
Sales Stockprice_quarterly
Sales 1.0000000 -0.2261417
Stockprice_quarterly -0.2261417 1.0000000
$EBAY
Sales Stockprice_quarterly
Sales 1.0000000 0.2032839
Stockprice_quarterly 0.2032839 1.0000000