R:如何创建统计检验的散点图?
R: How to create scatter plot for statistical test?
我想创建一个基因散点图,x 轴是突变患者的数量,统计检验的结果是y 轴。
本质上,x 轴表示 res.sig
数据框 Group1
列中具有 Responder
值的患者数量,并标有 Hugo_Symbol
信息. y 轴表示 res.sig
数据框中的 -log10 fdr
值。
# Clinical enrichment
response.ce = clinicalEnrichment(maf=d, clinicalFeature="Response")
# Significant associations p-value < 0.05
res.sig <- response.ce$groupwise_comparision[p_value < 0.05]
# Genes with the number of mutated patients
library(dplyr)
x <- res.sig %>%
filter(Group1=='Responder') %>%
mutate(first_letter = substr(n_mutated_group1, 1, 1)) %>%
pull(first_letter)
ann <- res.sig %>%
filter(Group1=="Responder") %>%
pull(Hugo_Symbol)
# FDR
y <- res.sig %>%
filter(Group1=="Responder") %>%
pull(fdr)
# Scatter plot
png("Rplot06.png")
plot(x, -log10(y), type="p", xlim=c(0,25), main="Scatterplot of statistically significant mutated genes", xlab="Number of mutated patients", ylab="-log10(fdr)", pch=19)
text(x, y-1, labels=ann)
dev.off()
我的图只显示了三个点(没有标签),尽管我期望有 7 个带标签的点(对应于 x 和 y 轴上的 7 个值)。
res.sig
> dput(res.sig)
structure(list(Hugo_Symbol = c("ERCC2", "ERCC2", "AKAP9", "AKAP9",
"HERC1", "HERC1", "HECTD1", "HECTD1", "MACF1", "MACF1", "MROH2B",
"MROH2B", "KMT2C", "KMT2C"), Group1 = c("Non-Responder", "Responder",
"Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder",
"Responder", "Non-Responder", "Responder", "Non-Responder", "Responder",
"Non-Responder", "Responder"), Group2 = c("Rest", "Rest", "Rest",
"Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest",
"Rest", "Rest", "Rest"), n_mutated_group1 = c("0 of 25", "9",
"0 of 25", "6", "0 of 25", "6", "0 of 25", "6", "0 of 25", "6",
"0 of 25", "6", "1 of 25", "7"), n_mutated_group2 = c("9 of 25",
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", "6 of 25",
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", "7 of 25",
"1 of 25"), p_value = c(0.00163083541184905, 0.00163083541184905,
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.022289766970618,
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.022289766970618,
0.022289766970618, 0.022289766970618, 0.0487971536957187, 0.0487971536957187
), OR = c(0, Inf, 0, Inf, 0, Inf, 0, Inf, 0, Inf, 0, Inf, 0.111488645279478,
8.96952328636894), OR_low = c(0, 2.56647319276964, 0, 1.33358819424024,
0, 1.33358819424024, 0, 1.33358819424024, 0, 1.33358819424024,
0, 1.33358819424024, 0.00228988507629356, 1.0079479819766), OR_high = c(0.38963976043749,
Inf, 0.749856668137133, Inf, 0.749856668137133, Inf, 0.749856668137133,
Inf, 0.749856668137133, Inf, 0.749856668137133, Inf, 0.992114690322592,
436.703138665198), fdr = c(0.109265972593886, 0.109265972593886,
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568,
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568,
0.248902397838568, 0.248902397838568, 0.467058471087594, 0.467058471087594
)), row.names = c(NA, -14L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x000002adab171ef0>, index = structure(integer(0), "`__Group1`" = c(1L,
3L, 5L, 7L, 9L, 11L, 13L, 2L, 4L, 6L, 8L, 10L, 12L, 14L)))
My plot only show three points (no labels), although I'm expecting 7 points with labels (corresponding with the 7 values in the x and y axis).
所有 7 个点都在那里,只是其中 5 个是相同的,因此它们被绘制在彼此之上,导致“缺失”点。只有3个唯一点。
如果你使用 ggplot
那么你可以“抖动”这些点来增加一些噪音:
set.seed(6)
data.frame(x = x, y = (-log10(y))) %>%
mutate(x = as.numeric(x)) %>%
ggplot(aes(x = x, y = y)) +
geom_point(position=position_jitter(height = 0.05), alpha = 0.5, size = 3) +
xlim(0,25)
我想创建一个基因散点图,x 轴是突变患者的数量,统计检验的结果是y 轴。
本质上,x 轴表示 res.sig
数据框 Group1
列中具有 Responder
值的患者数量,并标有 Hugo_Symbol
信息. y 轴表示 res.sig
数据框中的 -log10 fdr
值。
# Clinical enrichment
response.ce = clinicalEnrichment(maf=d, clinicalFeature="Response")
# Significant associations p-value < 0.05
res.sig <- response.ce$groupwise_comparision[p_value < 0.05]
# Genes with the number of mutated patients
library(dplyr)
x <- res.sig %>%
filter(Group1=='Responder') %>%
mutate(first_letter = substr(n_mutated_group1, 1, 1)) %>%
pull(first_letter)
ann <- res.sig %>%
filter(Group1=="Responder") %>%
pull(Hugo_Symbol)
# FDR
y <- res.sig %>%
filter(Group1=="Responder") %>%
pull(fdr)
# Scatter plot
png("Rplot06.png")
plot(x, -log10(y), type="p", xlim=c(0,25), main="Scatterplot of statistically significant mutated genes", xlab="Number of mutated patients", ylab="-log10(fdr)", pch=19)
text(x, y-1, labels=ann)
dev.off()
我的图只显示了三个点(没有标签),尽管我期望有 7 个带标签的点(对应于 x 和 y 轴上的 7 个值)。
res.sig
> dput(res.sig)
structure(list(Hugo_Symbol = c("ERCC2", "ERCC2", "AKAP9", "AKAP9",
"HERC1", "HERC1", "HECTD1", "HECTD1", "MACF1", "MACF1", "MROH2B",
"MROH2B", "KMT2C", "KMT2C"), Group1 = c("Non-Responder", "Responder",
"Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder",
"Responder", "Non-Responder", "Responder", "Non-Responder", "Responder",
"Non-Responder", "Responder"), Group2 = c("Rest", "Rest", "Rest",
"Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest",
"Rest", "Rest", "Rest"), n_mutated_group1 = c("0 of 25", "9",
"0 of 25", "6", "0 of 25", "6", "0 of 25", "6", "0 of 25", "6",
"0 of 25", "6", "1 of 25", "7"), n_mutated_group2 = c("9 of 25",
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", "6 of 25",
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", "7 of 25",
"1 of 25"), p_value = c(0.00163083541184905, 0.00163083541184905,
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.022289766970618,
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.022289766970618,
0.022289766970618, 0.022289766970618, 0.0487971536957187, 0.0487971536957187
), OR = c(0, Inf, 0, Inf, 0, Inf, 0, Inf, 0, Inf, 0, Inf, 0.111488645279478,
8.96952328636894), OR_low = c(0, 2.56647319276964, 0, 1.33358819424024,
0, 1.33358819424024, 0, 1.33358819424024, 0, 1.33358819424024,
0, 1.33358819424024, 0.00228988507629356, 1.0079479819766), OR_high = c(0.38963976043749,
Inf, 0.749856668137133, Inf, 0.749856668137133, Inf, 0.749856668137133,
Inf, 0.749856668137133, Inf, 0.749856668137133, Inf, 0.992114690322592,
436.703138665198), fdr = c(0.109265972593886, 0.109265972593886,
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568,
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568,
0.248902397838568, 0.248902397838568, 0.467058471087594, 0.467058471087594
)), row.names = c(NA, -14L), class = c("data.table", "data.frame"
), .internal.selfref = <pointer: 0x000002adab171ef0>, index = structure(integer(0), "`__Group1`" = c(1L,
3L, 5L, 7L, 9L, 11L, 13L, 2L, 4L, 6L, 8L, 10L, 12L, 14L)))
My plot only show three points (no labels), although I'm expecting 7 points with labels (corresponding with the 7 values in the x and y axis).
所有 7 个点都在那里,只是其中 5 个是相同的,因此它们被绘制在彼此之上,导致“缺失”点。只有3个唯一点。
如果你使用 ggplot
那么你可以“抖动”这些点来增加一些噪音:
set.seed(6)
data.frame(x = x, y = (-log10(y))) %>%
mutate(x = as.numeric(x)) %>%
ggplot(aes(x = x, y = y)) +
geom_point(position=position_jitter(height = 0.05), alpha = 0.5, size = 3) +
xlim(0,25)