难以在 R 中绘制时间序列
Difficulty Plotting Time Series in R
我一直在尝试在 R 中绘制时间序列数据。我在网上咨询了几个不同的来源,但不知怎的,我在创建这个图时仍然遇到问题。我在下面模拟了一些数据,代表一家虚构公司从 2014 年到 2016 年收到的每日信息:
#create data
date_decision_made = seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")
date_decision_made <- format(as.Date(date_decision_made), "%Y/%m/%d")
property_damages_in_dollars <- rnorm(731,100,10)
car_damages_in_dollars <- rnorm(731,105,8)
other_damages_in_dollars <- rnorm(731,104,9)
location <- c("canada","usa")
location <- sample(location, 731, replace=TRUE, prob=c(0.3, 0.7))
type_of_house <- c("single","townhome", "rental" )
type_of_house<- sample(type_of_house , 731, replace=TRUE, prob=c(0.5, 0.3, 0.2))
response_variable <- c("claim_approved","claim_rejected")
response_variable<- sample(response_variable, 731, replace=TRUE, prob=c(0.4, 0.6))
final_dataset <- cbind(date_decision_made, property_damages_in_dollars, car_damages_in_dollars, other_damages_in_dollars, location, type_of_house, response_variable)
final_dataset <- as.data.frame(final_dataset)
final_dataset$other_damages_in_dollars = as.numeric(final_dataset$other_damages_in_dollars)
final_dataset$property_damages_in_dollars = as.numeric(final_dataset$property_damages_in_dollars)
final_dataset$car_damages_in_dollars = as.numeric(final_dataset$car_damages_in_dollars)
prop_damage <-subset(final_dataset, select = c(date_decision_made, property_damages_in_dollars))
car_damage <-subset(final_dataset, select = c(date_decision_made, car_damages_in_dollars))
other_damage <-subset(final_dataset, select = c(date_decision_made, other_damages_in_dollars))
new <-subset(final_dataset, select = c(date_decision_made, property_damages_in_dollars, car_damages_in_dollars, other_damages_in_dollars))
基于此数据,我尝试将此数据绘制为 R 中的时间序列。我尝试了几种方法,但都产生了错误。我试图解决这些问题,但我似乎无法弄清楚。有人可以帮我吗?
#first way (error)
library(ggplot2)
library(reshape2) library(dplyr)
ggplot() + geom_line(data = prop_damage, aes(x = date_decision_made, y = property_damages_in_dollars, group = 1), color = "red")
+ scale_x_date(date_breaks = "days" , date_labels = "%b %d %a")+
geom_line(data = car_damage, aes(x = date_decision_made, y = car_damages_in_dollars, group =1 ), color = "blue")
+ geom_line(data = other_damage, aes(x = date_decision_made, y = other_damages_in_dollars, group =1), color = "green")
+ xlab('data_date') + ylab('percent.change')
#second way (error)
ggplot(data = new, aes(x = date_decision_made)) +
geom_line(aes(y = property_damages_in_dollars, colour = "property_damages_in_dollars")) +
geom_line(aes(y = car_damages_in_dollars, colour = "car_damages_in_dollars")) +
geom_line(aes(y =other_damages_in_dollars, colour = "other_damages_in_dollars")) +
scale_colour_manual("",
breaks = c("property_damages_in_dollars", "car_damages_in_dollars", "other_damages_in_dollars"),
values = c("red", "green", "blue")) +
xlab(" ") +
scale_y_continuous("Dollars", limits = c(0,10000)) +
labs(title="demo graph")
#3rd way error
##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
ggplot(dd) + geom_line(aes(x=date_decision_made, y=value, colour=variable, group=1)) + scale_x_date(date_breaks = "days" , date_labels = "%b %d %a")+ scale_colour_manual(values=c("red","green","blue"))
#4th error
mymts = ts(new,
frequency = 1,
start = c(2014, 1))
autoplot(mymts) +
ggtitle("Time Series Plot") +
theme(plot.title = element_text(hjust = 0.5))
#5th Method error
x1 = ts(prop_damage, frequency = 1, start = c(2014,1))
x2 = ts(other_damage, frequency = 1, start = c(2014,1))
ts.plot(x, y, gpars = list(col = c("black", "red")))
ts.plot(date_decision_made,gpars= list(col=rainbow(10)))
#6th method error
##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
qplot(date_decision_made,value,data=dd,geom='line',color=variable)
#7th way error
x1 = ts(prop_damage, frequency = 1, start = c(2014,1))
x2 = ts(other_damage, frequency = 1, start = c(2014,1))
comb_ts <- cbind(x1, x2)
plot.ts(comb_ts, plot.type = "single")
有人可以告诉我我在这些代码中做错了什么吗?谢谢
试试这个方法,注意日期。由于年代久远,日期很多:
#Data
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
dd$date_decision_made <- as.Date(as.character(dd$date_decision_made),'%Y/%m/%d')
#Plot
ggplot(dd) +
geom_line(aes(x=date_decision_made, y=value, colour=variable, group=1))+
scale_x_date(date_breaks = "months" ,breaks = '12 months', date_labels = "%b %d %a")+
scale_colour_manual(values=c("red","green","blue"))+
theme(axis.text.x = element_text(angle=90))
输出:
另一种选择是使用 pivot_longer
来自 tidyr
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
dd_sub %>%
pivot_longer(cols = -date_decision_made) %>%
mutate(date_decision_made = ymd(date_decision_made)) %>%
ggplot() +
geom_line(aes(x = date_decision_made, y=value, colour = name, group = 1)) +
scale_x_date(date_breaks = "months", breaks = '12 months',
date_labels = "%b %d %a")+
scale_colour_manual(values=c("red","green","blue"))+
theme(axis.text.x = element_text(angle = 90))
我一直在尝试在 R 中绘制时间序列数据。我在网上咨询了几个不同的来源,但不知怎的,我在创建这个图时仍然遇到问题。我在下面模拟了一些数据,代表一家虚构公司从 2014 年到 2016 年收到的每日信息:
#create data
date_decision_made = seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")
date_decision_made <- format(as.Date(date_decision_made), "%Y/%m/%d")
property_damages_in_dollars <- rnorm(731,100,10)
car_damages_in_dollars <- rnorm(731,105,8)
other_damages_in_dollars <- rnorm(731,104,9)
location <- c("canada","usa")
location <- sample(location, 731, replace=TRUE, prob=c(0.3, 0.7))
type_of_house <- c("single","townhome", "rental" )
type_of_house<- sample(type_of_house , 731, replace=TRUE, prob=c(0.5, 0.3, 0.2))
response_variable <- c("claim_approved","claim_rejected")
response_variable<- sample(response_variable, 731, replace=TRUE, prob=c(0.4, 0.6))
final_dataset <- cbind(date_decision_made, property_damages_in_dollars, car_damages_in_dollars, other_damages_in_dollars, location, type_of_house, response_variable)
final_dataset <- as.data.frame(final_dataset)
final_dataset$other_damages_in_dollars = as.numeric(final_dataset$other_damages_in_dollars)
final_dataset$property_damages_in_dollars = as.numeric(final_dataset$property_damages_in_dollars)
final_dataset$car_damages_in_dollars = as.numeric(final_dataset$car_damages_in_dollars)
prop_damage <-subset(final_dataset, select = c(date_decision_made, property_damages_in_dollars))
car_damage <-subset(final_dataset, select = c(date_decision_made, car_damages_in_dollars))
other_damage <-subset(final_dataset, select = c(date_decision_made, other_damages_in_dollars))
new <-subset(final_dataset, select = c(date_decision_made, property_damages_in_dollars, car_damages_in_dollars, other_damages_in_dollars))
基于此数据,我尝试将此数据绘制为 R 中的时间序列。我尝试了几种方法,但都产生了错误。我试图解决这些问题,但我似乎无法弄清楚。有人可以帮我吗?
#first way (error)
library(ggplot2)
library(reshape2) library(dplyr)
ggplot() + geom_line(data = prop_damage, aes(x = date_decision_made, y = property_damages_in_dollars, group = 1), color = "red")
+ scale_x_date(date_breaks = "days" , date_labels = "%b %d %a")+
geom_line(data = car_damage, aes(x = date_decision_made, y = car_damages_in_dollars, group =1 ), color = "blue")
+ geom_line(data = other_damage, aes(x = date_decision_made, y = other_damages_in_dollars, group =1), color = "green")
+ xlab('data_date') + ylab('percent.change')
#second way (error)
ggplot(data = new, aes(x = date_decision_made)) +
geom_line(aes(y = property_damages_in_dollars, colour = "property_damages_in_dollars")) +
geom_line(aes(y = car_damages_in_dollars, colour = "car_damages_in_dollars")) +
geom_line(aes(y =other_damages_in_dollars, colour = "other_damages_in_dollars")) +
scale_colour_manual("",
breaks = c("property_damages_in_dollars", "car_damages_in_dollars", "other_damages_in_dollars"),
values = c("red", "green", "blue")) +
xlab(" ") +
scale_y_continuous("Dollars", limits = c(0,10000)) +
labs(title="demo graph")
#3rd way error
##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
ggplot(dd) + geom_line(aes(x=date_decision_made, y=value, colour=variable, group=1)) + scale_x_date(date_breaks = "days" , date_labels = "%b %d %a")+ scale_colour_manual(values=c("red","green","blue"))
#4th error
mymts = ts(new,
frequency = 1,
start = c(2014, 1))
autoplot(mymts) +
ggtitle("Time Series Plot") +
theme(plot.title = element_text(hjust = 0.5))
#5th Method error
x1 = ts(prop_damage, frequency = 1, start = c(2014,1))
x2 = ts(other_damage, frequency = 1, start = c(2014,1))
ts.plot(x, y, gpars = list(col = c("black", "red")))
ts.plot(date_decision_made,gpars= list(col=rainbow(10)))
#6th method error
##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
qplot(date_decision_made,value,data=dd,geom='line',color=variable)
#7th way error
x1 = ts(prop_damage, frequency = 1, start = c(2014,1))
x2 = ts(other_damage, frequency = 1, start = c(2014,1))
comb_ts <- cbind(x1, x2)
plot.ts(comb_ts, plot.type = "single")
有人可以告诉我我在这些代码中做错了什么吗?谢谢
试试这个方法,注意日期。由于年代久远,日期很多:
#Data
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
dd$date_decision_made <- as.Date(as.character(dd$date_decision_made),'%Y/%m/%d')
#Plot
ggplot(dd) +
geom_line(aes(x=date_decision_made, y=value, colour=variable, group=1))+
scale_x_date(date_breaks = "months" ,breaks = '12 months', date_labels = "%b %d %a")+
scale_colour_manual(values=c("red","green","blue"))+
theme(axis.text.x = element_text(angle=90))
输出:
另一种选择是使用 pivot_longer
来自 tidyr
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
dd_sub %>%
pivot_longer(cols = -date_decision_made) %>%
mutate(date_decision_made = ymd(date_decision_made)) %>%
ggplot() +
geom_line(aes(x = date_decision_made, y=value, colour = name, group = 1)) +
scale_x_date(date_breaks = "months", breaks = '12 months',
date_labels = "%b %d %a")+
scale_colour_manual(values=c("red","green","blue"))+
theme(axis.text.x = element_text(angle = 90))