如何在 R 上创建速率
How to create rate on R
我想更改我的数据,以便它给出行人与该州人口的比率。我使用的是线性模型,我的汇总值如下所示:
系数:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.087061 0.029876 2.914 0.00438 **
intersection 0.009192 0.003086 2.978 0.00362 **
在这里,我的 beta 值交集是 .009192,这没有意义,因为与人口较少的州相比,这个值可能算不了什么。
下面是我的数据的压缩版本,没有我使用的所有列,但这里是 csv 的 link,以防有人想从 there 下载它。
> head(c)
# A tibble: 6 x 15
STATE STATENAME PEDS PERSONS PERMVIT PERNOTMVIT COUNTY COUNTYNAME CITY DAY MONTH YEAR LATITUDE LONGITUD
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 Alabama 0 3 3 0 81 LEE (81) 2340 7 2 2019 32.7 -85.3
2 1 Alabama 0 2 2 0 55 ETOWAH (55) 1280 23 1 2019 34.0 -86.1
3 1 Alabama 0 4 4 0 29 CLEBURNE (29) 0 22 1 2019 33.7 -85.4
4 1 Alabama 1 1 1 1 55 ETOWAH (55) 2562 22 1 2019 34.0 -86.1
5 1 Alabama 0 1 1 0 3 BALDWIN (3) 0 18 1 2019 30.7 -87.8
6 1 Alabama 0 2 2 0 85 LOWNDES (85) 0 7 1 2019 32.2 -86.4
# … with 1 more variable: FATALS <dbl>
这是我在整个过程中运行的代码。我不知道如何更改它,以便每个值都是一个比率(值如 peds 或 type_int)
#Libraries
rm(list=ls()) # this is to clear anything in memory
library(leaflet)
library(tidyverse)
library(ggmap)
library(leaflet.extras)
library(htmltools)
library(ggplot2)
library(maps)
library(mapproj)
library(mapdata)
library(zoo)
library(tsibble)
setwd("~/Desktop/Statistics790/DataSets/FARS2019NationalCSV")
df <- read.csv("accident.csv")
state <- unique(df$STATE)
for(i in state){
df1<- df %>%
filter(STATE==i) %>%
dplyr::select(c(STATE,PEDS,DAY,MONTH,YEAR,TYP_INT)) %>%
mutate(date = as.Date(as.character(paste(YEAR, MONTH, DAY, sep = "-"),"%Y-%m-%d"))) %>% # create a date
group_by(date) %>% # Group by State id and date
# summarise_at(.vars = vars(PEDS), sum)
summarise(pedday=sum(PEDS),intersection=mean(TYP_INT))
#ts1<-ts(df,start=c(2019,1,1), frequency=365)
setwd("~/Desktop/Statistics790/States_ts/figures")
plots<-df1 %>%
ggplot()+
geom_line(aes(x=date,y=pedday))+ylim(0,13)+
theme_bw()
ggsave(paste0("state_",i,".png"),width=8,height=6, )
ts1<-ts(df1,start=c(2019,1,1), frequency=365)
setwd("~/Desktop/Statistics790/States_ts")
ts1 %>% write.csv(paste0("state_",i,".csv"),row.names = F)
#Plots
}
#date1<- as.character(df$date)
#df1<- df%>% filter(STATE=="1")
#ts2<-xts(df,order.by = as.Date(df$date,"%Y-%m-%d"))
setwd("~/Desktop/Statistics790/States_ts")
cat("\f")
#df <- read.csv(paste0("state_1.csv"))
#print("------Linear Model------")
#summary(lm(pedday~weather,data=df))
for(i in state){
print(paste0("-------------------------Analysis for State: ",i," -------------------------------"))
df <- read.csv(paste0("state_",i,".csv"))
print("------Linear Model------")
print(summary(lm(pedday~intersection,data=df)))
}
从评论中整理我的答案:您需要从美国人口普查等外部来源获取州人口数据 https://www.census.gov/data/tables/time-series/demo/popest/2010s-state-total.html#par_textimage_1574439295,读入,将其加入您的数据集,然后计算行人比率每个人口,按比例缩放以便于在图表上阅读。您可以通过将一些计算从循环中取出来使您的代码更快。下面的代码假设人口普查数据被称为 'census.csv' 并且有列 'Geographic Area' 用于状态和 'X2019' 用于最新可用的人口数据。
pop <- read.csv('census.csv')
df <- read.csv('accidents.csv') %>%
left_join(pop, by = c('STATENAME' = 'Geographic Area') %>%
mutate(rate = (PEDS / X2019) * <scale>) %>%
mutate(date = as.Date(as.character(paste(YEAR, MONTH, DAY, sep = "-"),"%Y-%m-%d")))
left_join 将匹配州名称并根据其州为每一行赋予人口值,无论有多少行。
我想更改我的数据,以便它给出行人与该州人口的比率。我使用的是线性模型,我的汇总值如下所示: 系数:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.087061 0.029876 2.914 0.00438 **
intersection 0.009192 0.003086 2.978 0.00362 **
在这里,我的 beta 值交集是 .009192,这没有意义,因为与人口较少的州相比,这个值可能算不了什么。
下面是我的数据的压缩版本,没有我使用的所有列,但这里是 csv 的 link,以防有人想从 there 下载它。
> head(c)
# A tibble: 6 x 15
STATE STATENAME PEDS PERSONS PERMVIT PERNOTMVIT COUNTY COUNTYNAME CITY DAY MONTH YEAR LATITUDE LONGITUD
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 Alabama 0 3 3 0 81 LEE (81) 2340 7 2 2019 32.7 -85.3
2 1 Alabama 0 2 2 0 55 ETOWAH (55) 1280 23 1 2019 34.0 -86.1
3 1 Alabama 0 4 4 0 29 CLEBURNE (29) 0 22 1 2019 33.7 -85.4
4 1 Alabama 1 1 1 1 55 ETOWAH (55) 2562 22 1 2019 34.0 -86.1
5 1 Alabama 0 1 1 0 3 BALDWIN (3) 0 18 1 2019 30.7 -87.8
6 1 Alabama 0 2 2 0 85 LOWNDES (85) 0 7 1 2019 32.2 -86.4
# … with 1 more variable: FATALS <dbl>
这是我在整个过程中运行的代码。我不知道如何更改它,以便每个值都是一个比率(值如 peds 或 type_int)
#Libraries
rm(list=ls()) # this is to clear anything in memory
library(leaflet)
library(tidyverse)
library(ggmap)
library(leaflet.extras)
library(htmltools)
library(ggplot2)
library(maps)
library(mapproj)
library(mapdata)
library(zoo)
library(tsibble)
setwd("~/Desktop/Statistics790/DataSets/FARS2019NationalCSV")
df <- read.csv("accident.csv")
state <- unique(df$STATE)
for(i in state){
df1<- df %>%
filter(STATE==i) %>%
dplyr::select(c(STATE,PEDS,DAY,MONTH,YEAR,TYP_INT)) %>%
mutate(date = as.Date(as.character(paste(YEAR, MONTH, DAY, sep = "-"),"%Y-%m-%d"))) %>% # create a date
group_by(date) %>% # Group by State id and date
# summarise_at(.vars = vars(PEDS), sum)
summarise(pedday=sum(PEDS),intersection=mean(TYP_INT))
#ts1<-ts(df,start=c(2019,1,1), frequency=365)
setwd("~/Desktop/Statistics790/States_ts/figures")
plots<-df1 %>%
ggplot()+
geom_line(aes(x=date,y=pedday))+ylim(0,13)+
theme_bw()
ggsave(paste0("state_",i,".png"),width=8,height=6, )
ts1<-ts(df1,start=c(2019,1,1), frequency=365)
setwd("~/Desktop/Statistics790/States_ts")
ts1 %>% write.csv(paste0("state_",i,".csv"),row.names = F)
#Plots
}
#date1<- as.character(df$date)
#df1<- df%>% filter(STATE=="1")
#ts2<-xts(df,order.by = as.Date(df$date,"%Y-%m-%d"))
setwd("~/Desktop/Statistics790/States_ts")
cat("\f")
#df <- read.csv(paste0("state_1.csv"))
#print("------Linear Model------")
#summary(lm(pedday~weather,data=df))
for(i in state){
print(paste0("-------------------------Analysis for State: ",i," -------------------------------"))
df <- read.csv(paste0("state_",i,".csv"))
print("------Linear Model------")
print(summary(lm(pedday~intersection,data=df)))
}
从评论中整理我的答案:您需要从美国人口普查等外部来源获取州人口数据 https://www.census.gov/data/tables/time-series/demo/popest/2010s-state-total.html#par_textimage_1574439295,读入,将其加入您的数据集,然后计算行人比率每个人口,按比例缩放以便于在图表上阅读。您可以通过将一些计算从循环中取出来使您的代码更快。下面的代码假设人口普查数据被称为 'census.csv' 并且有列 'Geographic Area' 用于状态和 'X2019' 用于最新可用的人口数据。
pop <- read.csv('census.csv')
df <- read.csv('accidents.csv') %>%
left_join(pop, by = c('STATENAME' = 'Geographic Area') %>%
mutate(rate = (PEDS / X2019) * <scale>) %>%
mutate(date = as.Date(as.character(paste(YEAR, MONTH, DAY, sep = "-"),"%Y-%m-%d")))
left_join 将匹配州名称并根据其州为每一行赋予人口值,无论有多少行。