计算具有正数和负数的列中的四分位数(仅正数)的函数
Function to calculate quartiles (only positive numbers) in a column with positive and negative numbers
我想计算显示差距的列中数字的第一个四分位数。此列中既有正数也有负数,因此我只想过滤掉正数并计算四分位数(第 1 位),但我遇到了函数问题(请参见下面的代码)
#我已经尝试使用“>0”和“0.25”的函数来获得第一个四分位数,但不知何故失败了,我不知道为什么。
library(quantmod)
Symbols <- c("FB","AAPL","AMZN","NFLX","GOOG")
getSymbols(Symbols,from="2018-06-01")
stock_data = sapply(.GlobalEnv, is.xts)
all_stocks <- do.call(list, mget(names(stock_data)[stock_data]))
# Calculate gap
Gap_function <- function(x) {
stock_name <- stringi::stri_extract(names(x)[1], regex = "^[A-Z]+")
stock_name <- paste0(stock_name, ".Gap")
column_names <- c(names(x), stock_name)
x$rgap <- quantmod::Op(x) - lag(quantmod::Cl(x))
x <- setNames(x, column_names)
return(x)
}
all_stocks <- lapply(all_stocks, Gap_function)
# Calculate 1st Quantile of positive numbers in .GAP column, currently it is not working.
# I don't know how to indicate to use only positive numbers (>0) and (1st quartile 0.25)
Quartile1_function <- function(x) {
stock_name <- stringi::stri_extract(names(x)[1], regex = "^[A-Z]+")
stock_name <- paste0(stock_name, ".GapQ1")
column_names <- c(names(x), stock_name)
x$quartile1 <- (quantile(x,na.rm=TRUE [,grep(".Gap" >0, colnames(x))], 0.25))
x <- setNames(x, column_names)
return(x)
}
all_stocks <- lapply(all_stocks, Quartile1_function)
#The desired result is to get a 1st quartile of .Gap column (taking only positive numbers) in a new column =(7.609986)
#If you run this code you will get AMZN quartiles only for positive numbers in the .Gap column. but I don't know what is failing to get a function to do this for all stocks.
quantile(all_stocks$AMZN$AMZN.Gap[which(all_stocks$AMZN$AMZN.Gap >0)])
0% 25% 50% 75% 100%
0.060059 7.609986 11.709961 21.319946 88.640015
是这样的吗?
x <- c(-10,-5,-7,0:10)
quantile(x[x>=0],0.25)
# 25%
# 2.5
我想你想要像下面这样的东西。这将 return xxx.GapQ1 中的数据,其中数字为正,它们将被分为 NA 1、2、3 和 4。NA 是 0(和 0)以下的数字。您可以稍后过滤掉不需要的内容。
Quartile1_function <- function(x) {
stock_name <- stringi::stri_extract(names(x)[1], regex = "^[A-Z]+")
stock_name <- paste0(stock_name, ".GapQ1")
column_names <- c(names(x), stock_name)
# use this grep the column name of interest and plug that in the next section.
column_of_interest <- grep(".Gap", colnames(x))
# Find the quantiles based only on positive numbers.
# The negative numbers or 0 will be returned as NA
x$quartile1 <- as.integer(cut(x[, column_of_interest],
quantile(x[x[, column_of_interest] > 0, column_of_interest], na.rm = TRUE),
include.lowest = TRUE))
names(x) <- column_names
return(x)
}
all_stocks <- lapply(all_stocks, Quartile1_function)
head(all_stocks$AMZN)
AMZN.Open AMZN.High AMZN.Low AMZN.Close AMZN.Volume AMZN.Adjusted AMZN.Gap AMZN.GapQ1
2018-06-01 1637.03 1646.73 1635.09 1641.54 3313400 1641.54 NA NA
2018-06-04 1648.90 1665.68 1645.49 1665.27 3187700 1665.27 7.359985 1
2018-06-05 1672.99 1699.00 1670.06 1696.35 4782200 1696.35 7.719970 2
2018-06-06 1704.51 1714.50 1686.47 1695.75 5473200 1695.75 8.160034 2
2018-06-07 1698.56 1699.90 1676.11 1689.30 3765700 1689.30 2.810059 1
2018-06-08 1681.12 1689.44 1673.01 1683.99 2955100 1683.99 -8.180054 NA
我想计算显示差距的列中数字的第一个四分位数。此列中既有正数也有负数,因此我只想过滤掉正数并计算四分位数(第 1 位),但我遇到了函数问题(请参见下面的代码)
#我已经尝试使用“>0”和“0.25”的函数来获得第一个四分位数,但不知何故失败了,我不知道为什么。
library(quantmod)
Symbols <- c("FB","AAPL","AMZN","NFLX","GOOG")
getSymbols(Symbols,from="2018-06-01")
stock_data = sapply(.GlobalEnv, is.xts)
all_stocks <- do.call(list, mget(names(stock_data)[stock_data]))
# Calculate gap
Gap_function <- function(x) {
stock_name <- stringi::stri_extract(names(x)[1], regex = "^[A-Z]+")
stock_name <- paste0(stock_name, ".Gap")
column_names <- c(names(x), stock_name)
x$rgap <- quantmod::Op(x) - lag(quantmod::Cl(x))
x <- setNames(x, column_names)
return(x)
}
all_stocks <- lapply(all_stocks, Gap_function)
# Calculate 1st Quantile of positive numbers in .GAP column, currently it is not working.
# I don't know how to indicate to use only positive numbers (>0) and (1st quartile 0.25)
Quartile1_function <- function(x) {
stock_name <- stringi::stri_extract(names(x)[1], regex = "^[A-Z]+")
stock_name <- paste0(stock_name, ".GapQ1")
column_names <- c(names(x), stock_name)
x$quartile1 <- (quantile(x,na.rm=TRUE [,grep(".Gap" >0, colnames(x))], 0.25))
x <- setNames(x, column_names)
return(x)
}
all_stocks <- lapply(all_stocks, Quartile1_function)
#The desired result is to get a 1st quartile of .Gap column (taking only positive numbers) in a new column =(7.609986)
#If you run this code you will get AMZN quartiles only for positive numbers in the .Gap column. but I don't know what is failing to get a function to do this for all stocks.
quantile(all_stocks$AMZN$AMZN.Gap[which(all_stocks$AMZN$AMZN.Gap >0)])
0% 25% 50% 75% 100%
0.060059 7.609986 11.709961 21.319946 88.640015
是这样的吗?
x <- c(-10,-5,-7,0:10)
quantile(x[x>=0],0.25)
# 25%
# 2.5
我想你想要像下面这样的东西。这将 return xxx.GapQ1 中的数据,其中数字为正,它们将被分为 NA 1、2、3 和 4。NA 是 0(和 0)以下的数字。您可以稍后过滤掉不需要的内容。
Quartile1_function <- function(x) {
stock_name <- stringi::stri_extract(names(x)[1], regex = "^[A-Z]+")
stock_name <- paste0(stock_name, ".GapQ1")
column_names <- c(names(x), stock_name)
# use this grep the column name of interest and plug that in the next section.
column_of_interest <- grep(".Gap", colnames(x))
# Find the quantiles based only on positive numbers.
# The negative numbers or 0 will be returned as NA
x$quartile1 <- as.integer(cut(x[, column_of_interest],
quantile(x[x[, column_of_interest] > 0, column_of_interest], na.rm = TRUE),
include.lowest = TRUE))
names(x) <- column_names
return(x)
}
all_stocks <- lapply(all_stocks, Quartile1_function)
head(all_stocks$AMZN)
AMZN.Open AMZN.High AMZN.Low AMZN.Close AMZN.Volume AMZN.Adjusted AMZN.Gap AMZN.GapQ1
2018-06-01 1637.03 1646.73 1635.09 1641.54 3313400 1641.54 NA NA
2018-06-04 1648.90 1665.68 1645.49 1665.27 3187700 1665.27 7.359985 1
2018-06-05 1672.99 1699.00 1670.06 1696.35 4782200 1696.35 7.719970 2
2018-06-06 1704.51 1714.50 1686.47 1695.75 5473200 1695.75 8.160034 2
2018-06-07 1698.56 1699.90 1676.11 1689.30 3765700 1689.30 2.810059 1
2018-06-08 1681.12 1689.44 1673.01 1683.99 2955100 1683.99 -8.180054 NA