R - Lapply 相关分析函数

R - Lapply Function for correlation analysis

我的 R 数据集有问题。 如何使用 lapply 函数计算销售额和股价之间的相关性 - 作为快速参考,可以这么说。我试过了 - 但不起作用:

my_correlation <- function(subset_df) {
  subset_correlation <- image(cor(subset_df), x=Sales, y=Stockprice_quarterly)
  subset_correlation
}

ss <- lapply(unique(Nasdaq_100$TickerSymbol), function(ticker) 
  my_correlation(subset(Nasdaq_100, Nasdaq_100$TickerSymbol == ticker)))

这是我创建的示例,用于显示我的数据集的结构:

TickerSymbol Quarter Sales Stockprice_quarterly
AMD 31.03.2021 [=12=].45 502.500
AMD 31.12.2020 .47 361.100
AMD 30.09.2020 [=12=].32 280.700
AMD 30.06.2020 [=12=].13 377.400
AMD 31.03.2020 [=12=].14 296.900
AMD 31.12.2019 [=12=].15 274.800
AMD 30.09.2019 [=12=].11 561.200
AMD 30.06.2019 [=12=].03 548.650
AMD 31.03.2019 [=12=].01 509.977
AAPL 31.03.2021 .40 359.038
AAPL 31.12.2020 .68 358.514
AAPL 30.09.2020 [=12=].75 357.991
AAPL 30.06.2020 [=12=].65 357.467
AAPL 31.03.2020 [=12=].64 356.944
AAPL 31.12.2019 .25 356.421
AAPL 30.09.2019 [=12=].77 355.897
AAPL 30.06.2019 [=12=].55 355.374
AAPL 31.03.2019 [=12=].62 354.851
EBAY 31.03.2021 [=12=].92 325.020
EBAY 31.12.2020 .39 324.496
EBAY 30.09.2020 [=12=].94 323.973
EBAY 30.06.2020 .05 323.449
EBAY 31.03.2020 .51 322.926
EBAY 31.12.2019 [=12=].69 322.403
EBAY 30.09.2019 [=12=].37 321.879
EBAY 30.06.2019 [=12=].46 321.356
EBAY 31.03.2019 [=12=].57 320.833

在此先感谢您的帮助!

Sales 中有一个 $ 符号。也许 Sales 在数据导入期间被转换为字符向量?您可以删除符号并将其转换为数字。以下是 my_correlation() 的两种可能变体 - 一种使用 subset(),另一种使用 [.

# Remove $ sign
dat$Sales <- as.numeric(sub("\$", "", dat$Sales))

# First variation
my_correlation_1 <- function(ticker_subset, data) {
  cor(subset(data, TickerSymbol == ticker_subset, c(Sales, Stockprice_quarterly)))
}

mycor1 <- lapply(unique(dat$TickerSymbol), my_correlation_1, data = dat)
names(mycor1) <- unique(dat$TickerSymbol)

# Second variation
my_correlation_2 <- function(ticker_subset, data) {
  cor(data[data$TickerSymbol == ticker_subset, c("Sales", "Stockprice_quarterly")])
}

mycor2 <- lapply(unique(dat$TickerSymbol), my_correlation_2, data = dat)
names(mycor2) <- unique(dat$TickerSymbol)

mycor2

# $AMD
#                           Sales Stockprice_quarterly
# Sales                 1.0000000           -0.2261417
# Stockprice_quarterly -0.2261417            1.0000000
# 
# $AAPL
#                          Sales Stockprice_quarterly
# Sales                1.0000000            0.6531391
# Stockprice_quarterly 0.6531391            1.0000000
# 
# $EBAY
#                          Sales Stockprice_quarterly
# Sales                1.0000000            0.2032839
# Stockprice_quarterly 0.2032839            1.0000000

数据:

dat <- structure(list(TickerSymbol = c("AMD", "AMD", "AMD", "AMD", "AMD", 
"AMD", "AMD", "AMD", "AMD", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", 
"AAPL", "AAPL", "AAPL", "AAPL", "EBAY", "EBAY", "EBAY", "EBAY", 
"EBAY", "EBAY", "EBAY", "EBAY", "EBAY"), Quarter = c("31.03.2021", 
"31.12.2020", "30.09.2020", "30.06.2020", "31.03.2020", "31.12.2019", 
"30.09.2019", "30.06.2019", "31.03.2019", "31.03.2021", "31.12.2020", 
"30.09.2020", "30.06.2020", "31.03.2020", "31.12.2019", "30.09.2019", 
"30.06.2019", "31.03.2019", "31.03.2021", "31.12.2020", "30.09.2020", 
"30.06.2020", "31.03.2020", "31.12.2019", "30.09.2019", "30.06.2019", 
"31.03.2019"), Sales = c("[=11=].45", ".47", "[=11=].32", "[=11=].13", 
"[=11=].14", "[=11=].15", "[=11=].11", "[=11=].03", "[=11=].01", ".40", ".68", 
"[=11=].75", "[=11=].65", "[=11=].64", ".25", "[=11=].77", "[=11=].55", "[=11=].62", 
"[=11=].92", ".39", "[=11=].94", ".05", ".51", "[=11=].69", "[=11=].37", 
"[=11=].46", "[=11=].57"), Stockprice_quarterly = c(502.5, 361.1, 280.7, 
377.4, 296.9, 274.8, 561.2, 548.65, 509.977, 359.038, 358.514, 
357.991, 357.467, 356.944, 356.421, 355.897, 355.374, 354.851, 
325.02, 324.496, 323.973, 323.449, 322.926, 322.403, 321.879, 
321.356, 320.833)), class = "data.frame", row.names = c(NA, -27L
))

多元化

library(tidyverse)
df %>% 
  mutate(Sales = parse_number(Sales)) %>% 
  group_split(TickerSymbol) %>% 
  map(~cor(select(.data = .x, Sales, Stockprice_quarterly))) %>% 
  purrr::set_names(., nm = unique(df$TickerSymbol))

$AMD
                         Sales Stockprice_quarterly
Sales                1.0000000            0.6531391
Stockprice_quarterly 0.6531391            1.0000000

$AAPL
                          Sales Stockprice_quarterly
Sales                 1.0000000           -0.2261417
Stockprice_quarterly -0.2261417            1.0000000

$EBAY
                         Sales Stockprice_quarterly
Sales                1.0000000            0.2032839
Stockprice_quarterly 0.2032839            1.0000000