计算 R 中每个 ID 每年的中位数并绘制结果
calculating medians per year per ID in R and plotting the outcome
数据集:
structure(list(ID = c(1234, 1234, 1234, 1234, 1234, 1234, 1234,
1234, 8769, 8769, 8769, 8769, 8769, 7457, 7457, 7457, 7457, 7457,
7457, 55667, 55667, 55667, 55667, 55667, 55667, 55667, 3789,
3789, 3789, 3789, 3789, 3789), date_of_bloods = structure(c(978307200,
981072000, 1173052800, 1175731200, 1367798400, 1465171200, 1467936000,
1659916800, 1072915200, 1075680000, 1173052800, 1175731200, 1367798400,
978307200, 981072000, 1173052800, 1175731200, 1367798400, 1465171200,
978307200, 981072000, 1173052800, 1270425600, 1273104000, 1465171200,
1467936000, 1270425600, 1367798400, 1465171200, 1465257600, 1465344000,
1465430400), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
result = c(90, 80, 60, 40, 25, 22, 22, 21, 70, 65, 43, 23,
22, 90, 90, 88, 86, 76, 74, 58, 46, 35, 34, 33, 30, 24, 76,
67, 56, 34, 33, 23), `mutation type` = c(1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3,
3, 1, 1, 1, 1, 1, 1)), row.names = c(NA, -32L), class = "data.frame")
我希望每个 ID 每年的结果中位数采用一种格式,其中年份仅为 0、1、2、3 等,以确保队列之间的一致性,然后绘制这些线并标明其突变类别。
我完成了:
filtered$date_of_bloods <-format(filtered$date_of_bloods,format="%Y")
#split into individual ID groups
a <- with(filtered, split(filtered, list(ID)))
#aggregate median results per year
medianfunc <- function(y) {aggregate(results ~ date_of_bloods, data = y, median)}
medians <- sapply(a, medianfunc)
# do lm per ID cohort and get slope of lines
g<- as.data.frame(medians)
coefLM <- function(x) {coef(lm(date_of_bloods ~ results, data = x))}
coefs<- sapply(g, coefLM)
实际年份并不重要,为了统一起见,我希望每个 ID 为 0、1、2、3、4 等。我不知道该怎么做?然后我想用某种形式的想法绘制这些数据(每个 ID 的年血液中位数),以了解它们属于哪个突变类别。
我希望这不是一个太宽泛的问题。
非常感谢
你的命名结构不清楚,如果你提供的数据叫df
那么你可以这样做:
df$year <-format(df$date_of_bloods,format="%Y")
aggregate(result ~ year + ID, data = df, median)
year ID result
1 2001 1234 85.0
2 2007 1234 50.0
3 2013 1234 25.0
4 2016 1234 22.0
5 2022 1234 21.0
6 2010 3789 76.0
7 2013 3789 67.0
8 2016 3789 33.5
9 2001 7457 90.0
10 2007 7457 87.0
11 2013 7457 76.0
12 2016 7457 74.0
13 2004 8769 67.5
14 2007 8769 33.0
15 2013 8769 22.0
16 2001 55667 52.0
17 2007 55667 35.0
18 2010 55667 33.5
19 2016 55667 27.0
您可以试试这个(filtered
是您包含的 dput()
)。希望对您有所帮助:
library(dplyr)
library(lubridate)
library(ggplot2)
library(broom)
#Data
filtered %>% mutate(year=year(date_of_bloods)) %>%
group_by(ID,year,`mutation type`) %>% summarise(med=median(result)) -> df1
#Variables
df1 %>% ungroup()%>% mutate(ID=as.factor(ID),
year=as.factor(year),
`mutation type`=as.factor(`mutation type`)) -> df1
#Plot
ggplot(df1,aes(x=ID,y=med,fill=`mutation type`,color=year,group=year))+
geom_line()
对于模型:
#Models
fits <- df1 %>%group_by(ID) %>%
do(fitmodel = lm(med ~ year, data = .))
#Coefs
dfCoef = tidy(fits, fitmodel)
# A tibble: 10 x 6
# Groups: ID [5]
ID term estimate std.error statistic p.value
<dbl> <chr> <dbl> <dbl> <dbl> <dbl>
1 1234 (Intercept) 6329. 1546. 4.09 0.0264
2 1234 year -3.13 0.769 -4.07 0.0268
3 3789 (Intercept) 14318. 4746. 3.02 0.204
4 3789 year -7.08 2.36 -3.00 0.205
5 7457 (Intercept) 2409. 403. 5.98 0.0269
6 7457 year -1.16 0.201 -5.78 0.0287
7 8769 (Intercept) 9268. 4803. 1.93 0.304
8 8769 year -4.60 2.39 -1.92 0.306
9 55667 (Intercept) 3294. 759. 4.34 0.0492
10 55667 year -1.62 0.378 -4.29 0.0503
所需情节代码:
#Plot 2
#Data modifications
df1 %>% mutate(year2=as.numeric(year)-1) -> df2
df2 %>% mutate(year2=factor(year2,levels = sort(unique(year2)))) -> df2
#Plot 2
ggplot(df2,aes(x=year2,y=med,color=ID,group=ID))+
facet_wrap(.~`mutation type`)+
geom_line()
数据集:
structure(list(ID = c(1234, 1234, 1234, 1234, 1234, 1234, 1234,
1234, 8769, 8769, 8769, 8769, 8769, 7457, 7457, 7457, 7457, 7457,
7457, 55667, 55667, 55667, 55667, 55667, 55667, 55667, 3789,
3789, 3789, 3789, 3789, 3789), date_of_bloods = structure(c(978307200,
981072000, 1173052800, 1175731200, 1367798400, 1465171200, 1467936000,
1659916800, 1072915200, 1075680000, 1173052800, 1175731200, 1367798400,
978307200, 981072000, 1173052800, 1175731200, 1367798400, 1465171200,
978307200, 981072000, 1173052800, 1270425600, 1273104000, 1465171200,
1467936000, 1270425600, 1367798400, 1465171200, 1465257600, 1465344000,
1465430400), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
result = c(90, 80, 60, 40, 25, 22, 22, 21, 70, 65, 43, 23,
22, 90, 90, 88, 86, 76, 74, 58, 46, 35, 34, 33, 30, 24, 76,
67, 56, 34, 33, 23), `mutation type` = c(1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3,
3, 1, 1, 1, 1, 1, 1)), row.names = c(NA, -32L), class = "data.frame")
我希望每个 ID 每年的结果中位数采用一种格式,其中年份仅为 0、1、2、3 等,以确保队列之间的一致性,然后绘制这些线并标明其突变类别。
我完成了:
filtered$date_of_bloods <-format(filtered$date_of_bloods,format="%Y")
#split into individual ID groups
a <- with(filtered, split(filtered, list(ID)))
#aggregate median results per year
medianfunc <- function(y) {aggregate(results ~ date_of_bloods, data = y, median)}
medians <- sapply(a, medianfunc)
# do lm per ID cohort and get slope of lines
g<- as.data.frame(medians)
coefLM <- function(x) {coef(lm(date_of_bloods ~ results, data = x))}
coefs<- sapply(g, coefLM)
实际年份并不重要,为了统一起见,我希望每个 ID 为 0、1、2、3、4 等。我不知道该怎么做?然后我想用某种形式的想法绘制这些数据(每个 ID 的年血液中位数),以了解它们属于哪个突变类别。
我希望这不是一个太宽泛的问题。
非常感谢
你的命名结构不清楚,如果你提供的数据叫df
那么你可以这样做:
df$year <-format(df$date_of_bloods,format="%Y")
aggregate(result ~ year + ID, data = df, median)
year ID result
1 2001 1234 85.0
2 2007 1234 50.0
3 2013 1234 25.0
4 2016 1234 22.0
5 2022 1234 21.0
6 2010 3789 76.0
7 2013 3789 67.0
8 2016 3789 33.5
9 2001 7457 90.0
10 2007 7457 87.0
11 2013 7457 76.0
12 2016 7457 74.0
13 2004 8769 67.5
14 2007 8769 33.0
15 2013 8769 22.0
16 2001 55667 52.0
17 2007 55667 35.0
18 2010 55667 33.5
19 2016 55667 27.0
您可以试试这个(filtered
是您包含的 dput()
)。希望对您有所帮助:
library(dplyr)
library(lubridate)
library(ggplot2)
library(broom)
#Data
filtered %>% mutate(year=year(date_of_bloods)) %>%
group_by(ID,year,`mutation type`) %>% summarise(med=median(result)) -> df1
#Variables
df1 %>% ungroup()%>% mutate(ID=as.factor(ID),
year=as.factor(year),
`mutation type`=as.factor(`mutation type`)) -> df1
#Plot
ggplot(df1,aes(x=ID,y=med,fill=`mutation type`,color=year,group=year))+
geom_line()
对于模型:
#Models
fits <- df1 %>%group_by(ID) %>%
do(fitmodel = lm(med ~ year, data = .))
#Coefs
dfCoef = tidy(fits, fitmodel)
# A tibble: 10 x 6
# Groups: ID [5]
ID term estimate std.error statistic p.value
<dbl> <chr> <dbl> <dbl> <dbl> <dbl>
1 1234 (Intercept) 6329. 1546. 4.09 0.0264
2 1234 year -3.13 0.769 -4.07 0.0268
3 3789 (Intercept) 14318. 4746. 3.02 0.204
4 3789 year -7.08 2.36 -3.00 0.205
5 7457 (Intercept) 2409. 403. 5.98 0.0269
6 7457 year -1.16 0.201 -5.78 0.0287
7 8769 (Intercept) 9268. 4803. 1.93 0.304
8 8769 year -4.60 2.39 -1.92 0.306
9 55667 (Intercept) 3294. 759. 4.34 0.0492
10 55667 year -1.62 0.378 -4.29 0.0503
所需情节代码:
#Plot 2
#Data modifications
df1 %>% mutate(year2=as.numeric(year)-1) -> df2
df2 %>% mutate(year2=factor(year2,levels = sort(unique(year2)))) -> df2
#Plot 2
ggplot(df2,aes(x=year2,y=med,color=ID,group=ID))+
facet_wrap(.~`mutation type`)+
geom_line()