难以在 R 中绘制时间序列

Difficulty Plotting Time Series in R

我一直在尝试在 R 中绘制时间序列数据。我在网上咨询了几个不同的来源,但不知怎的,我在创建这个图时仍然遇到问题。我在下面模拟了一些数据,代表一家虚构公司从 2014 年到 2016 年收到的每日信息:

#create data
date_decision_made = seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")

date_decision_made <- format(as.Date(date_decision_made), "%Y/%m/%d")

property_damages_in_dollars <- rnorm(731,100,10)

car_damages_in_dollars <- rnorm(731,105,8)

other_damages_in_dollars <- rnorm(731,104,9)

location <- c("canada","usa")

location <- sample(location, 731, replace=TRUE, prob=c(0.3, 0.7))

type_of_house <- c("single","townhome", "rental" )

type_of_house<- sample(type_of_house , 731, replace=TRUE, prob=c(0.5, 0.3, 0.2))

response_variable <- c("claim_approved","claim_rejected")

response_variable<- sample(response_variable, 731, replace=TRUE, prob=c(0.4, 0.6))

final_dataset <- cbind(date_decision_made, property_damages_in_dollars, car_damages_in_dollars, other_damages_in_dollars, location, type_of_house, response_variable)

final_dataset <- as.data.frame(final_dataset)

final_dataset$other_damages_in_dollars = as.numeric(final_dataset$other_damages_in_dollars)

final_dataset$property_damages_in_dollars = as.numeric(final_dataset$property_damages_in_dollars)

final_dataset$car_damages_in_dollars = as.numeric(final_dataset$car_damages_in_dollars)

prop_damage <-subset(final_dataset, select = c(date_decision_made, property_damages_in_dollars))

car_damage <-subset(final_dataset, select = c(date_decision_made, car_damages_in_dollars))

other_damage <-subset(final_dataset, select = c(date_decision_made, other_damages_in_dollars))

new <-subset(final_dataset, select = c(date_decision_made, property_damages_in_dollars, car_damages_in_dollars, other_damages_in_dollars))

基于此数据,我尝试将此数据绘制为 R 中的时间序列。我尝试了几种方法,但都产生了错误。我试图解决这些问题,但我似乎无法弄清楚。有人可以帮我吗?

#first way (error)
library(ggplot2)
library(reshape2) library(dplyr)

ggplot() + geom_line(data = prop_damage, aes(x = date_decision_made, y = property_damages_in_dollars, group = 1), color = "red") 
+ scale_x_date(date_breaks = "days" , date_labels = "%b %d %a")+ 
geom_line(data = car_damage, aes(x = date_decision_made, y = car_damages_in_dollars, group =1 ), color = "blue") 
+ geom_line(data = other_damage, aes(x = date_decision_made, y = other_damages_in_dollars, group =1), color = "green") 
+ xlab('data_date') + ylab('percent.change')

#second way (error)

ggplot(data = new, aes(x = date_decision_made)) +
  geom_line(aes(y = property_damages_in_dollars, colour = "property_damages_in_dollars")) +
  geom_line(aes(y = car_damages_in_dollars, colour = "car_damages_in_dollars")) +
  geom_line(aes(y =other_damages_in_dollars, colour = "other_damages_in_dollars")) +

  scale_colour_manual("", 
                      breaks = c("property_damages_in_dollars", "car_damages_in_dollars", "other_damages_in_dollars"),
                      values = c("red", "green", "blue")) +
  xlab(" ") +
  scale_y_continuous("Dollars", limits = c(0,10000)) + 
  labs(title="demo graph")

#3rd way error

##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))


ggplot(dd) + geom_line(aes(x=date_decision_made, y=value, colour=variable, group=1)) + scale_x_date(date_breaks = "days" , date_labels = "%b %d %a")+  scale_colour_manual(values=c("red","green","blue"))


#4th error

mymts = ts(new,
           frequency = 1,
           start = c(2014, 1))

autoplot(mymts) +
  ggtitle("Time Series Plot") +
  theme(plot.title = element_text(hjust = 0.5))

#5th Method error

x1 = ts(prop_damage, frequency = 1, start = c(2014,1))
x2 = ts(other_damage, frequency = 1, start = c(2014,1))
ts.plot(x, y, gpars = list(col = c("black", "red")))
ts.plot(date_decision_made,gpars= list(col=rainbow(10)))



#6th method error


##Subset the necessary columns
dd_sub = new[,c(1,2,3,4)]
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))

qplot(date_decision_made,value,data=dd,geom='line',color=variable)

#7th way error

x1 = ts(prop_damage, frequency = 1, start = c(2014,1))
x2 = ts(other_damage, frequency = 1, start = c(2014,1))

comb_ts <- cbind(x1, x2) 
plot.ts(comb_ts, plot.type = "single")

有人可以告诉我我在这些代码中做错了什么吗?谢谢

试试这个方法,注意日期。由于年代久远,日期很多:

#Data
##Then rearrange your data frame
library(reshape2)
dd = melt(dd_sub, id=c("date_decision_made"))
dd$date_decision_made <- as.Date(as.character(dd$date_decision_made),'%Y/%m/%d')
#Plot
ggplot(dd) +
  geom_line(aes(x=date_decision_made, y=value, colour=variable, group=1))+
  scale_x_date(date_breaks = "months" ,breaks = '12 months', date_labels = "%b %d %a")+
  scale_colour_manual(values=c("red","green","blue"))+
  theme(axis.text.x = element_text(angle=90))

输出:

另一种选择是使用 pivot_longer 来自 tidyr

library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)
dd_sub %>%
    pivot_longer(cols = -date_decision_made) %>%
    mutate(date_decision_made = ymd(date_decision_made)) %>%
    ggplot() + 
     geom_line(aes(x = date_decision_made, y=value, colour = name, group = 1)) + 
     scale_x_date(date_breaks = "months", breaks = '12 months',
         date_labels = "%b %d %a")+
     scale_colour_manual(values=c("red","green","blue"))+
    theme(axis.text.x = element_text(angle = 90))