Geom_density bin 宽度

Geom_density bin width

我有一个数据框:

dummyDF<-structure(list(snp = list(8399674L, 11696479L, 15916970L, 22829279L, 
    657163L, 20414154L, 23358775L, 2585288L, 15523632L, 22648550L, 
    19467126L, 15737142L, 1639261L, 11707283L, 16799668L, 10661430L, 
    8142421L, 9337952L, 8468957L, 8142472L, 4635689L, 16360746L, 
    23178865L, 15579170L, 8430979L, 16729557L, 10365980L, 8387614L, 
    12954259L, 11510303L, 15528925L, 23783949L, 3067157L, 9553180L, 
    9553178L, 9985294L, 23851154L, 23412260L, 9797873L, 11131961L, 
    21263476L, 8864537L, 10738260L, 10738260L, 15048533L, 10738261L, 
    10738261L, 7148103L, 9780586L, 15672503L, 17275491L, 15328638L, 
    21757656L, 19899793L, 16365499L, 4472195L, 6351441L, 19363898L, 
    18924664L, 9089091L, 20902568L, 20726967L, 23341032L, 23174994L, 
    25263347L, 19537953L, 19491191L, 3263728L, 19565585L, 20967128L, 
    14911386L, 16443836L, 15334684L, 8959580L, 9196703L, 18192428L, 
    6138916L, 14839671L, 14839672L, 2308671L, 4299259L, 19650366L, 
    9902435L, 834683L, 9881329L, 2442104L, 15499962L, 13826671L, 
    25273050L, 31396800L, 13073515L, 10004867L, 4820755L, 19748093L, 
    11555014L, 16300654L, 4642172L, 6372702L, 22944605L, 3051791L), 
    closest_tss = list(8399677L, 11696450L, 15916939L, 22829238L, 
        657111L, 20414086L, 23358856L, 2585206L, 15523716L, 22648452L, 
        19467028L, 15737032L, 1639135L, 11707410L, 16799809L, 
        10661576L, 8142267L, 9338156L, 8468753L, 8142267L, 4635899L, 
        16360535L, 23179080L, 15578954L, 8431200L, 16729779L, 
        10366210L, 8387892L, 12954538L, 11510588L, 15529229L, 
        23784254L, 3067468L, 9553514L, 9553514L, 9984951L, 23850811L, 
        23412606L, 9797525L, 11132319L, 21263113L, 8864905L, 
        10737889L, 10737889L, 15048904L, 10737889L, 10737889L, 
        7147729L, 9780208L, 15672119L, 17275876L, 15328243L, 
        21757241L, 19899376L, 16365920L, 4471766L, 6350960L, 
        19364395L, 18925170L, 9089597L, 20902057L, 20726455L, 
        23340513L, 23174460L, 25262811L, 19538497L, 19491735L, 
        3263181L, 19566137L, 20967687L, 14910818L, 16443251L, 
        15334095L, 8958978L, 9196101L, 18191802L, 6139550L, 14839034L, 
        14839034L, 2308028L, 4298594L, 19651031L, 9903116L, 835379L, 
        9882029L, 2442825L, 15499222L, 13827469L, 25273849L, 
        31395992L, 13072697L, 10004048L, 4819934L, 19748914L, 
        11554190L, 16301485L, 4643004L, 6373578L, 22945486L, 
        3052675L), min_dist = c(-3, 29, 31, 41, 52, 68, -81, 
    82, -84, 98, 98, 110, 126, -127, -141, -146, 154, -204, 204, 
    205, -210, 211, -215, 216, -221, -222, -230, -278, -279, 
    -285, -304, -305, -311, -334, -336, 343, 343, -346, 348, 
    -358, 363, -368, 371, 371, -371, 372, 372, 374, 378, 384, 
    -385, 395, 415, 417, -421, 429, 481, -497, -506, -506, 511, 
    512, 519, 534, 536, -544, -544, 547, -552, -559, 568, 585, 
    589, 602, 602, 626, -634, 637, 638, 643, 665, -665, -681, 
    -696, -700, -721, 740, -798, -799, 808, 818, 819, 821, -821, 
    824, -831, -832, -876, -881, -884), chrom = c("3L", "X", 
    "2L", "X", "X", "X", "3R", "X", "3L", "2R", "3L", "X", "3L", 
    "3L", "2L", "3L", "2R", "2L", "X", "2R", "X", "2R", "3R", 
    "2L", "2R", "2L", "X", "2R", "X", "3L", "3L", "3L", "X", 
    "X", "X", "2L", "2R", "3R", "3L", "3L", "3L", "2L", "2L", 
    "2L", "3L", "2L", "2L", "2R", "X", "2R", "2R", "2R", "2L", 
    "X", "2L", "3L", "2L", "2L", "3L", "X", "2R", "2L", "3R", 
    "2L", "3R", "2L", "2R", "Y", "2L", "2L", "X", "2R", "2L", 
    "3L", "X", "3R", "2L", "X", "X", "2L", "3L", "3R", "2L", 
    "3R", "X", "X", "2L", "3R", "3R", "3R", "2R", "2L", "2R", 
    "X", "3R", "3L", "2L", "2L", "X", "3L"), closest_gene = list(
        "CG7120", "PhKgamma", "CG13244", "fog", "SkpA", "CG15458", 
        "RpS3", "CG13760", "CG7945", "CG34445", "CG14096", "Arp6", 
        "metl", "CG11658", "Ugt36Bc", "CG8003", "Lpin", "Eaat1", 
        "CG12116", "Lpin", "CG12179", "CG6262", "Irk1", "kek3", 
        "Lcp1", "CG31812", "l(1)G0289", "pdm3", "hec", "CG33272", 
        "obst-H", "CG43968", "CG4116", "l(1)G0320", "l(1)G0320", 
        "bib", "ytr", "CG4374", "Zasp67", "CG32074", "Eip78C", 
        "CG32988", "CG12299", "CG12299", "Sytbeta", "CG12299", 
        "CG12299", "Spn43Aa", "CG1354", "fus", "mbl", "CG8160", 
        "step", "Hers", "jhamt", "CG32248", "CG9498", "CG13086", 
        "CG43049", "CG7033", "otp", "CG9316", "pnt", "CG12567", 
        "CG11858", "CG13082", "Oseg6", "CG46193", "CG13077", 
        "CG42238", "CG14406", "CG4409", "esg", "CG13313", "CG12118", 
        "CG14313", "CG13991", "CG32590", "CG32590", "CG16995", 
        "dyl", "CG31219", "Muc30E", "Myo81F", "CG9689", "tko", 
        "lace", "Mst87F", "CG11878", "Gcn2", "Drl-2", "CG4747", 
        "CG17528", "Alr", "CG14708", "CG4982", "CG15635", "slam", 
        "CG41106", "CG1271")), .Names = c("snp", "closest_tss", 
"min_dist", "chrom", "closest_gene"), row.names = c(NA, 100L), class = "data.frame")

当我使用 geom_denisty 绘制时,我得到:

p<-ggplot(dummyDF)
  p<-p + geom_density(aes(min_dist), alpha = 0.3)
  p<-p + scale_x_continuous("Distance to TSS", limits=c(-1000, 1000))
  p<-p + geom_vline(xintercept = 0, colour="black", linetype="dotted")

但是当我尝试按列 chrom 填写时,我得到:

p<-ggplot(dummyDF)
  p<-p + geom_density(aes(min_dist, fill = chrom), alpha = 0.3)
  p<-p + scale_x_continuous("Distance to TSS", limits=c(-1000, 1000))
  p<-p + geom_vline(xintercept = 0, colour="black", linetype="dotted")

然而,绘制为直方图的相同数据看起来还不错:

  p<-ggplot(dummyDF)
  p<-p + geom_histogram(aes(min_dist, fill = chrom), alpha = 0.6, bins=500)
  p<-p + scale_x_continuous("Distance to TSS", limits=c(-1000, 1000))
  p<-p + geom_vline(xintercept = 0, colour="black", linetype="dotted")
  p

为什么密度图没有显示任何内容?

这是否与正在选择的 bin 大小有关?

Y染色体上只有一个观测值,也就是说那个点的密度正好是1.00。由于其他染色体的密度在 min_dist 的大部分范围内约为 10-4,它看起来像一条密度为 0 的平线和一个尖峰在 min_dist = 500 时密度 = 1。您可以通过添加 + facet_wrap(~chrom, scales = "free_y").

来查看