Geom_density bin 宽度
Geom_density bin width
我有一个数据框:
dummyDF<-structure(list(snp = list(8399674L, 11696479L, 15916970L, 22829279L,
657163L, 20414154L, 23358775L, 2585288L, 15523632L, 22648550L,
19467126L, 15737142L, 1639261L, 11707283L, 16799668L, 10661430L,
8142421L, 9337952L, 8468957L, 8142472L, 4635689L, 16360746L,
23178865L, 15579170L, 8430979L, 16729557L, 10365980L, 8387614L,
12954259L, 11510303L, 15528925L, 23783949L, 3067157L, 9553180L,
9553178L, 9985294L, 23851154L, 23412260L, 9797873L, 11131961L,
21263476L, 8864537L, 10738260L, 10738260L, 15048533L, 10738261L,
10738261L, 7148103L, 9780586L, 15672503L, 17275491L, 15328638L,
21757656L, 19899793L, 16365499L, 4472195L, 6351441L, 19363898L,
18924664L, 9089091L, 20902568L, 20726967L, 23341032L, 23174994L,
25263347L, 19537953L, 19491191L, 3263728L, 19565585L, 20967128L,
14911386L, 16443836L, 15334684L, 8959580L, 9196703L, 18192428L,
6138916L, 14839671L, 14839672L, 2308671L, 4299259L, 19650366L,
9902435L, 834683L, 9881329L, 2442104L, 15499962L, 13826671L,
25273050L, 31396800L, 13073515L, 10004867L, 4820755L, 19748093L,
11555014L, 16300654L, 4642172L, 6372702L, 22944605L, 3051791L),
closest_tss = list(8399677L, 11696450L, 15916939L, 22829238L,
657111L, 20414086L, 23358856L, 2585206L, 15523716L, 22648452L,
19467028L, 15737032L, 1639135L, 11707410L, 16799809L,
10661576L, 8142267L, 9338156L, 8468753L, 8142267L, 4635899L,
16360535L, 23179080L, 15578954L, 8431200L, 16729779L,
10366210L, 8387892L, 12954538L, 11510588L, 15529229L,
23784254L, 3067468L, 9553514L, 9553514L, 9984951L, 23850811L,
23412606L, 9797525L, 11132319L, 21263113L, 8864905L,
10737889L, 10737889L, 15048904L, 10737889L, 10737889L,
7147729L, 9780208L, 15672119L, 17275876L, 15328243L,
21757241L, 19899376L, 16365920L, 4471766L, 6350960L,
19364395L, 18925170L, 9089597L, 20902057L, 20726455L,
23340513L, 23174460L, 25262811L, 19538497L, 19491735L,
3263181L, 19566137L, 20967687L, 14910818L, 16443251L,
15334095L, 8958978L, 9196101L, 18191802L, 6139550L, 14839034L,
14839034L, 2308028L, 4298594L, 19651031L, 9903116L, 835379L,
9882029L, 2442825L, 15499222L, 13827469L, 25273849L,
31395992L, 13072697L, 10004048L, 4819934L, 19748914L,
11554190L, 16301485L, 4643004L, 6373578L, 22945486L,
3052675L), min_dist = c(-3, 29, 31, 41, 52, 68, -81,
82, -84, 98, 98, 110, 126, -127, -141, -146, 154, -204, 204,
205, -210, 211, -215, 216, -221, -222, -230, -278, -279,
-285, -304, -305, -311, -334, -336, 343, 343, -346, 348,
-358, 363, -368, 371, 371, -371, 372, 372, 374, 378, 384,
-385, 395, 415, 417, -421, 429, 481, -497, -506, -506, 511,
512, 519, 534, 536, -544, -544, 547, -552, -559, 568, 585,
589, 602, 602, 626, -634, 637, 638, 643, 665, -665, -681,
-696, -700, -721, 740, -798, -799, 808, 818, 819, 821, -821,
824, -831, -832, -876, -881, -884), chrom = c("3L", "X",
"2L", "X", "X", "X", "3R", "X", "3L", "2R", "3L", "X", "3L",
"3L", "2L", "3L", "2R", "2L", "X", "2R", "X", "2R", "3R",
"2L", "2R", "2L", "X", "2R", "X", "3L", "3L", "3L", "X",
"X", "X", "2L", "2R", "3R", "3L", "3L", "3L", "2L", "2L",
"2L", "3L", "2L", "2L", "2R", "X", "2R", "2R", "2R", "2L",
"X", "2L", "3L", "2L", "2L", "3L", "X", "2R", "2L", "3R",
"2L", "3R", "2L", "2R", "Y", "2L", "2L", "X", "2R", "2L",
"3L", "X", "3R", "2L", "X", "X", "2L", "3L", "3R", "2L",
"3R", "X", "X", "2L", "3R", "3R", "3R", "2R", "2L", "2R",
"X", "3R", "3L", "2L", "2L", "X", "3L"), closest_gene = list(
"CG7120", "PhKgamma", "CG13244", "fog", "SkpA", "CG15458",
"RpS3", "CG13760", "CG7945", "CG34445", "CG14096", "Arp6",
"metl", "CG11658", "Ugt36Bc", "CG8003", "Lpin", "Eaat1",
"CG12116", "Lpin", "CG12179", "CG6262", "Irk1", "kek3",
"Lcp1", "CG31812", "l(1)G0289", "pdm3", "hec", "CG33272",
"obst-H", "CG43968", "CG4116", "l(1)G0320", "l(1)G0320",
"bib", "ytr", "CG4374", "Zasp67", "CG32074", "Eip78C",
"CG32988", "CG12299", "CG12299", "Sytbeta", "CG12299",
"CG12299", "Spn43Aa", "CG1354", "fus", "mbl", "CG8160",
"step", "Hers", "jhamt", "CG32248", "CG9498", "CG13086",
"CG43049", "CG7033", "otp", "CG9316", "pnt", "CG12567",
"CG11858", "CG13082", "Oseg6", "CG46193", "CG13077",
"CG42238", "CG14406", "CG4409", "esg", "CG13313", "CG12118",
"CG14313", "CG13991", "CG32590", "CG32590", "CG16995",
"dyl", "CG31219", "Muc30E", "Myo81F", "CG9689", "tko",
"lace", "Mst87F", "CG11878", "Gcn2", "Drl-2", "CG4747",
"CG17528", "Alr", "CG14708", "CG4982", "CG15635", "slam",
"CG41106", "CG1271")), .Names = c("snp", "closest_tss",
"min_dist", "chrom", "closest_gene"), row.names = c(NA, 100L), class = "data.frame")
当我使用 geom_denisty
绘制时,我得到:
p<-ggplot(dummyDF)
p<-p + geom_density(aes(min_dist), alpha = 0.3)
p<-p + scale_x_continuous("Distance to TSS", limits=c(-1000, 1000))
p<-p + geom_vline(xintercept = 0, colour="black", linetype="dotted")
但是当我尝试按列 chrom
填写时,我得到:
p<-ggplot(dummyDF)
p<-p + geom_density(aes(min_dist, fill = chrom), alpha = 0.3)
p<-p + scale_x_continuous("Distance to TSS", limits=c(-1000, 1000))
p<-p + geom_vline(xintercept = 0, colour="black", linetype="dotted")
然而,绘制为直方图的相同数据看起来还不错:
p<-ggplot(dummyDF)
p<-p + geom_histogram(aes(min_dist, fill = chrom), alpha = 0.6, bins=500)
p<-p + scale_x_continuous("Distance to TSS", limits=c(-1000, 1000))
p<-p + geom_vline(xintercept = 0, colour="black", linetype="dotted")
p
为什么密度图没有显示任何内容?
这是否与正在选择的 bin 大小有关?
Y染色体上只有一个观测值,也就是说那个点的密度正好是1.00。由于其他染色体的密度在 min_dist
的大部分范围内约为 10-4,它看起来像一条密度为 0 的平线和一个尖峰在 min_dist = 500 时密度 = 1。您可以通过添加 + facet_wrap(~chrom, scales = "free_y")
.
来查看
我有一个数据框:
dummyDF<-structure(list(snp = list(8399674L, 11696479L, 15916970L, 22829279L,
657163L, 20414154L, 23358775L, 2585288L, 15523632L, 22648550L,
19467126L, 15737142L, 1639261L, 11707283L, 16799668L, 10661430L,
8142421L, 9337952L, 8468957L, 8142472L, 4635689L, 16360746L,
23178865L, 15579170L, 8430979L, 16729557L, 10365980L, 8387614L,
12954259L, 11510303L, 15528925L, 23783949L, 3067157L, 9553180L,
9553178L, 9985294L, 23851154L, 23412260L, 9797873L, 11131961L,
21263476L, 8864537L, 10738260L, 10738260L, 15048533L, 10738261L,
10738261L, 7148103L, 9780586L, 15672503L, 17275491L, 15328638L,
21757656L, 19899793L, 16365499L, 4472195L, 6351441L, 19363898L,
18924664L, 9089091L, 20902568L, 20726967L, 23341032L, 23174994L,
25263347L, 19537953L, 19491191L, 3263728L, 19565585L, 20967128L,
14911386L, 16443836L, 15334684L, 8959580L, 9196703L, 18192428L,
6138916L, 14839671L, 14839672L, 2308671L, 4299259L, 19650366L,
9902435L, 834683L, 9881329L, 2442104L, 15499962L, 13826671L,
25273050L, 31396800L, 13073515L, 10004867L, 4820755L, 19748093L,
11555014L, 16300654L, 4642172L, 6372702L, 22944605L, 3051791L),
closest_tss = list(8399677L, 11696450L, 15916939L, 22829238L,
657111L, 20414086L, 23358856L, 2585206L, 15523716L, 22648452L,
19467028L, 15737032L, 1639135L, 11707410L, 16799809L,
10661576L, 8142267L, 9338156L, 8468753L, 8142267L, 4635899L,
16360535L, 23179080L, 15578954L, 8431200L, 16729779L,
10366210L, 8387892L, 12954538L, 11510588L, 15529229L,
23784254L, 3067468L, 9553514L, 9553514L, 9984951L, 23850811L,
23412606L, 9797525L, 11132319L, 21263113L, 8864905L,
10737889L, 10737889L, 15048904L, 10737889L, 10737889L,
7147729L, 9780208L, 15672119L, 17275876L, 15328243L,
21757241L, 19899376L, 16365920L, 4471766L, 6350960L,
19364395L, 18925170L, 9089597L, 20902057L, 20726455L,
23340513L, 23174460L, 25262811L, 19538497L, 19491735L,
3263181L, 19566137L, 20967687L, 14910818L, 16443251L,
15334095L, 8958978L, 9196101L, 18191802L, 6139550L, 14839034L,
14839034L, 2308028L, 4298594L, 19651031L, 9903116L, 835379L,
9882029L, 2442825L, 15499222L, 13827469L, 25273849L,
31395992L, 13072697L, 10004048L, 4819934L, 19748914L,
11554190L, 16301485L, 4643004L, 6373578L, 22945486L,
3052675L), min_dist = c(-3, 29, 31, 41, 52, 68, -81,
82, -84, 98, 98, 110, 126, -127, -141, -146, 154, -204, 204,
205, -210, 211, -215, 216, -221, -222, -230, -278, -279,
-285, -304, -305, -311, -334, -336, 343, 343, -346, 348,
-358, 363, -368, 371, 371, -371, 372, 372, 374, 378, 384,
-385, 395, 415, 417, -421, 429, 481, -497, -506, -506, 511,
512, 519, 534, 536, -544, -544, 547, -552, -559, 568, 585,
589, 602, 602, 626, -634, 637, 638, 643, 665, -665, -681,
-696, -700, -721, 740, -798, -799, 808, 818, 819, 821, -821,
824, -831, -832, -876, -881, -884), chrom = c("3L", "X",
"2L", "X", "X", "X", "3R", "X", "3L", "2R", "3L", "X", "3L",
"3L", "2L", "3L", "2R", "2L", "X", "2R", "X", "2R", "3R",
"2L", "2R", "2L", "X", "2R", "X", "3L", "3L", "3L", "X",
"X", "X", "2L", "2R", "3R", "3L", "3L", "3L", "2L", "2L",
"2L", "3L", "2L", "2L", "2R", "X", "2R", "2R", "2R", "2L",
"X", "2L", "3L", "2L", "2L", "3L", "X", "2R", "2L", "3R",
"2L", "3R", "2L", "2R", "Y", "2L", "2L", "X", "2R", "2L",
"3L", "X", "3R", "2L", "X", "X", "2L", "3L", "3R", "2L",
"3R", "X", "X", "2L", "3R", "3R", "3R", "2R", "2L", "2R",
"X", "3R", "3L", "2L", "2L", "X", "3L"), closest_gene = list(
"CG7120", "PhKgamma", "CG13244", "fog", "SkpA", "CG15458",
"RpS3", "CG13760", "CG7945", "CG34445", "CG14096", "Arp6",
"metl", "CG11658", "Ugt36Bc", "CG8003", "Lpin", "Eaat1",
"CG12116", "Lpin", "CG12179", "CG6262", "Irk1", "kek3",
"Lcp1", "CG31812", "l(1)G0289", "pdm3", "hec", "CG33272",
"obst-H", "CG43968", "CG4116", "l(1)G0320", "l(1)G0320",
"bib", "ytr", "CG4374", "Zasp67", "CG32074", "Eip78C",
"CG32988", "CG12299", "CG12299", "Sytbeta", "CG12299",
"CG12299", "Spn43Aa", "CG1354", "fus", "mbl", "CG8160",
"step", "Hers", "jhamt", "CG32248", "CG9498", "CG13086",
"CG43049", "CG7033", "otp", "CG9316", "pnt", "CG12567",
"CG11858", "CG13082", "Oseg6", "CG46193", "CG13077",
"CG42238", "CG14406", "CG4409", "esg", "CG13313", "CG12118",
"CG14313", "CG13991", "CG32590", "CG32590", "CG16995",
"dyl", "CG31219", "Muc30E", "Myo81F", "CG9689", "tko",
"lace", "Mst87F", "CG11878", "Gcn2", "Drl-2", "CG4747",
"CG17528", "Alr", "CG14708", "CG4982", "CG15635", "slam",
"CG41106", "CG1271")), .Names = c("snp", "closest_tss",
"min_dist", "chrom", "closest_gene"), row.names = c(NA, 100L), class = "data.frame")
当我使用 geom_denisty
绘制时,我得到:
p<-ggplot(dummyDF)
p<-p + geom_density(aes(min_dist), alpha = 0.3)
p<-p + scale_x_continuous("Distance to TSS", limits=c(-1000, 1000))
p<-p + geom_vline(xintercept = 0, colour="black", linetype="dotted")
但是当我尝试按列 chrom
填写时,我得到:
p<-ggplot(dummyDF)
p<-p + geom_density(aes(min_dist, fill = chrom), alpha = 0.3)
p<-p + scale_x_continuous("Distance to TSS", limits=c(-1000, 1000))
p<-p + geom_vline(xintercept = 0, colour="black", linetype="dotted")
然而,绘制为直方图的相同数据看起来还不错:
p<-ggplot(dummyDF)
p<-p + geom_histogram(aes(min_dist, fill = chrom), alpha = 0.6, bins=500)
p<-p + scale_x_continuous("Distance to TSS", limits=c(-1000, 1000))
p<-p + geom_vline(xintercept = 0, colour="black", linetype="dotted")
p
为什么密度图没有显示任何内容?
这是否与正在选择的 bin 大小有关?
Y染色体上只有一个观测值,也就是说那个点的密度正好是1.00。由于其他染色体的密度在 min_dist
的大部分范围内约为 10-4,它看起来像一条密度为 0 的平线和一个尖峰在 min_dist = 500 时密度 = 1。您可以通过添加 + facet_wrap(~chrom, scales = "free_y")
.