如何检查非时间序列数据中的增加和减少趋势

How to check for the increasing and decreasing trend in a non-time series data

我有以下数据框:

dat <- structure(list(peptide_name = c("P2", "P2", "P2", "P2", "P2", 
"P2", "P4", "P4", "P4", "P4", "P4", "P4", "P1", "P1", "P1", "P1", 
"P1", "P1", "P3", "P3", "P3", "P3", "P3", "P3"), dose = c("0mM", 
"0.3mM", "1mM", "3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", 
"3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", 
"20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM"), prolif_score = c(1, 
1.174927114, 1.279883382, 1.752186589, 1.994169096, 2.358600583, 
1, 1.046454768, 1.339853301, 1.293398533, 1.026894866, 1.17264410515205, 
1, 0.928020566, 0.920308483, 1.071979434, 1.195372751, 1.524421594, 
1, 1.293233083, 1.483709273, 1.468671679, 1.192982456, 0.463659148
)), row.names = c(NA, -24L), class = c("tbl_df", "tbl", "data.frame"
))

剧情是这样的:

我想要的是一个可以区分向上(P2,P1)的指标 和非向上(P4 和 P3)趋势。如您所见,用于此的 linear model 的 R 值没有用。 例如 P3 中的 R 分数与 P2 和 P1 一样为正。

我如何在 R 中做到这一点?

这是我必须创建该图的代码:

library(tidyverse)
library(broom)
library(ggpubr)

lm_rsq_dat <- dat %>% 
    mutate(dose = as.numeric(gsub("mM", "", dose))) %>% 
    group_by(peptide_name) %>% 
    do(model = glance(lm(dose ~ prolif_score, data = .))) %>% 
    unnest(model) %>% 
    arrange(desc(adj.r.squared)) %>%
    dplyr::select(peptide_name, adj.r.squared) %>% 
    print(n = 100)  
  

# Plot --------------------------------------------------------------------
plot_dat <- dat %>% 
  left_join(lm_rsq_dat, by = "peptide_name") %>%
  mutate(r.squared = formatC(adj.r.squared, format = "e", digits = 2)) %>% 
  mutate(npeptide_name = paste0( peptide_name, " (R=", r.squared, ")")) #%>% 

nspn <- plot_dat %>% 
  dplyr::select(peptide_name, npeptide_name, adj.r.squared) %>% 
  arrange(match(peptide_name, rsq_dat$peptide_name)) %>% 
  unique() %>% 
  pull(npeptide_name)


plot_dat <- plot_dat %>% 
  mutate(npeptide_name = factor(npeptide_name, levels =   nspn))


end_dat <- plot_dat %>% 
  filter(dose == "20mM")

  ggline(plot_dat,
         y = "prolif_score", x = "dose",
         color = "npeptide_name", 
         size = 1, 
         facet.by = "npeptide_name", scales = "free_y",
         palette = get_palette("npg",  length( dat$peptide_name))
  ) +
  xlab("Dose") +
  ylab("Prolif. Score") +
  grids(linetype = "dashed")  +
  rremove("legend") + 
theme(axis.text.x=element_text(angle = 60, hjust = 0.5, vjust = 0.5, size = 12)) 

Kendall 和 Spearman 相关性测试都评估 rank-based 相关性,因此读出一个值与另一个值单调变化的程度。这可以通过简单地 运行 cor(x, y, method = "kendall").

获得
library(tidyverse)
  
dat <- structure(list(peptide_name = c("P2", "P2", "P2", "P2", "P2", "P2", "P4", "P4", "P4", "P4", "P4", "P4", "P1", "P1", "P1", "P1", "P1", "P1", "P3", "P3", "P3", "P3", "P3", "P3"), dose = c("0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM", "0mM", "0.3mM", "1mM", "3mM", "10mM", "20mM"), prolif_score = c(1, 1.174927114, 1.279883382, 1.752186589, 1.994169096, 2.358600583, 1, 1.046454768, 1.339853301, 1.293398533, 1.026894866, 1.17264410515205, 1, 0.928020566, 0.920308483, 1.071979434, 1.195372751, 1.524421594, 1, 1.293233083, 1.483709273, 1.468671679, 1.192982456, 0.463659148)), row.names = c(NA, -24L), class = c("tbl_df", "tbl", "data.frame"))

dat_proc <- dat %>% 
  mutate(dose = parse_number(dose),
         unit = "nM",
         peptide = parse_number(peptide_name)) 

dat_proc %>% 
  group_split(peptide_name) %>% 
  map(~cor(.x$prolif_score, .x$dose, method = "kendall")) %>%
  map(data.frame) %>% 
  map(rename, kendall = 1) %>% 
  bind_rows(.id = "peptide") %>% 
  mutate(peptide = as.numeric(peptide)) %>% 
  left_join(dat_proc, .) %>% 
  ggplot(aes(factor(dose), prolif_score, color = kendall)) +
  geom_point() +
  geom_text(aes(x = 0, y = 2, label = paste0("kendall correlation \n coefficent = ", kendall)), hjust = -0.2) +
  geom_line(aes(group = peptide)) +
  facet_wrap(~peptide, ncol = 2)
#> Joining, by = "peptide"

reprex package (v2.0.1)

于 2022-03-10 创建