高效的数据整理(库存数据)
Data wrangling in efficient way (Stock data)
以下是我开始使用的代码:
library(quantmod)
library(timetk)
library(dplyr)
library(tibble)
library(tidyr)
mdate <- "2019-05-01"
edate <- "2019-05-03"
tickers <- c("MMM","C", "AAPL")
for(ticker in tickers)
Open_Raw <- cbind(Open_Raw, getSymbols(ticker, from = mdate, to = edate, auto.assign = F)[,1])
timetk::tk_tbl(Open_Raw)
Open_Raw <- timetk::tk_tbl(Open_Raw)[, -1]
colnames(Open_Raw) = tickers
## Open Price
Open_Raw <- NULL
for(ticker in tickers)
Open_Raw <- cbind(Open_Raw, getSymbols(ticker, from = mdate, to = edate, auto.assign = F)[,1])
Topen_Raw <- t(Open_Raw)
## High Price
High_Raw <- NULL
for(ticker in tickers)
High_Raw <- cbind(High_Raw, getSymbols(ticker, from = mdate, to = edate, auto.assign = F)[,2])
Thigh_Raw <- t(High_Raw)
## Low Price
Low_Raw <- NULL
for(ticker in tickers)
Low_Raw <- cbind(Low_Raw, getSymbols(ticker, from = mdate, to = edate, auto.assign = F)[,2])
Tlow_Raw <- t(Low_Raw)
## write in the same way for Close, Adjusted and Volume, and;
##Cbind Open and High
Open_High <- cbind(Topen_Raw, Thigh_Raw)
##Cbind Open_High and Low_Raw
Open_to_Low <- cbind(Open_High, Tlow_Raw)
如您所见,前两列是开盘价,第三和第四列是最高价。我可以用这些代码得到我想要的输出,但是当我尝试导入数千个股票数据时出现错误,所以我不能使用那些。如果可能的话,我想在数据集(开盘价、最高价、最低价、收盘价、调整后和成交量)之间做一个 space
我该怎么做才能做到这一点?
基础 R 解决方案:
# Transpose data.frame:
td_data <- within(data.frame(price_var = row.names(t(data)), t(data), row.names = NULL),
{
ticker_cd <- as.factor(gsub("[.].*", "", price_var))
price_var <- as.factor(gsub(".*[.]", "", price_var))
}
)
# Reshape:
do.call("cbind", split(td_data, td_data$price_var))
以下是我开始使用的代码:
library(quantmod)
library(timetk)
library(dplyr)
library(tibble)
library(tidyr)
mdate <- "2019-05-01"
edate <- "2019-05-03"
tickers <- c("MMM","C", "AAPL")
for(ticker in tickers)
Open_Raw <- cbind(Open_Raw, getSymbols(ticker, from = mdate, to = edate, auto.assign = F)[,1])
timetk::tk_tbl(Open_Raw)
Open_Raw <- timetk::tk_tbl(Open_Raw)[, -1]
colnames(Open_Raw) = tickers
## Open Price
Open_Raw <- NULL
for(ticker in tickers)
Open_Raw <- cbind(Open_Raw, getSymbols(ticker, from = mdate, to = edate, auto.assign = F)[,1])
Topen_Raw <- t(Open_Raw)
## High Price
High_Raw <- NULL
for(ticker in tickers)
High_Raw <- cbind(High_Raw, getSymbols(ticker, from = mdate, to = edate, auto.assign = F)[,2])
Thigh_Raw <- t(High_Raw)
## Low Price
Low_Raw <- NULL
for(ticker in tickers)
Low_Raw <- cbind(Low_Raw, getSymbols(ticker, from = mdate, to = edate, auto.assign = F)[,2])
Tlow_Raw <- t(Low_Raw)
## write in the same way for Close, Adjusted and Volume, and;
##Cbind Open and High
Open_High <- cbind(Topen_Raw, Thigh_Raw)
##Cbind Open_High and Low_Raw
Open_to_Low <- cbind(Open_High, Tlow_Raw)
如您所见,前两列是开盘价,第三和第四列是最高价。我可以用这些代码得到我想要的输出,但是当我尝试导入数千个股票数据时出现错误,所以我不能使用那些。如果可能的话,我想在数据集(开盘价、最高价、最低价、收盘价、调整后和成交量)之间做一个 space
我该怎么做才能做到这一点?
基础 R 解决方案:
# Transpose data.frame:
td_data <- within(data.frame(price_var = row.names(t(data)), t(data), row.names = NULL),
{
ticker_cd <- as.factor(gsub("[.].*", "", price_var))
price_var <- as.factor(gsub(".*[.]", "", price_var))
}
)
# Reshape:
do.call("cbind", split(td_data, td_data$price_var))