代表基因组区间数据。需要一些帮助

Representing Genomic intervals data. Some help needed

我正在尝试获取基因组数据的特定表示。我的数据包含在不同染色体中具有特定起始和结束位置的片段。在下面table你会发现三列:第一是染色体的编号,第二是感兴趣片段的起始位置(pos1),然后是感兴趣片段的结束位置(pos2):

Chr pos1    pos2
1   12900000    13700000
1   21200000    21500000
1   45300000    45700000
1   45900000    46600000
1   49400000    50600000
1   51000000    52000000
1   52500000    53000000
1   73400000    74100000
1   92700000    93400000
1   114100000   114400000
1   115400000   115500000
1   145700000   146500000
1   155200000   155900000
1   173500000   175000000
1   186400000   186700000
2   24100000    24500000
2   32000000    32400000
2   61500000    61800000
2   63000000    64300000
2   72500000    73100000
2   78700000    78700000
2   81800000    82200000
2   87000000    88300000
2   95600000    97100000
2   97800000    98300000
2   99700000    100100000
2   110600000   111400000
2   135900000   136500000
2   148600000   148900000
2   155900000   156400000
2   162400000   162600000
2   176300000   176400000
2   186400000   187500000
2   189200000   189700000
2   190000000   190300000
2   193800000   195000000
2   197600000   197600000
2   200900000   201200000
2   203800000   204400000
2   214300000   214600000
3   17400000    17800000
3   44400000    44800000
3   47100000    48500000
3   48800000    51800000
3   52600000    52800000
3   57500000    57800000
3   80700000    81000000
3   83200000    83400000
3   85500000    86000000
3   89600000    90400000
3   99500000    100000000
3   101100000   101400000
3   120700000   121200000
3   135900000   136700000
3   158000000   158300000
3   162500000   163200000
3   163400000   164200000
3   180700000   181000000
4   19200000    19800000
4   33800000    34300000
4   46700000    46700000
4   48400000    49000000
4   52800000    53200000
4   64700000    65000000
4   69400000    69900000
4   74200000    74300000
4   97100000    97400000
4   98600000    99000000
4   103800000   103900000
4   107000000   107500000
4   119100000   119600000
4   120300000   120600000
4   123200000   123500000
4   127800000   128100000
4   128700000   129200000
4   144800000   145000000
4   145800000   146000000
4   151400000   152000000
4   219200000   219800000
4   233800000   234300000
4   246700000   246700000
4   248400000   249000000
5   15500000    15500000
5   36900000    37700000
5   41800000    42200000
5   43500000    44200000
5   44600000    46300000
5   49600000    50100000
5   60000000    60400000
5   63400000    63700000
5   86600000    87000000
5   87200000    87900000
5   93100000    93500000
5   130000000   131300000
5   137200000   137600000
6   26500000    27100000
6   27500000    28500000
6   32800000    33500000
6   34800000    34900000
6   44800000    45300000
6   49000000    49300000
6   62100000    63800000
6   64300000    64600000
6   69900000    70100000
6   75000000    75000000
6   76200000    76600000
6   86300000    86900000
6   115400000   115900000
6   126700000   127100000
6   140500000   140700000
6   140900000   141000000
6   145900000   146600000
7   55900000    56700000
7   56900000    57200000
7   64400000    66400000
7   69100000    69900000
7   84300000    84500000
7   91400000    92000000
7   99800000    100200000
7   102300000   103000000
7   110900000   111200000
7   117900000   118300000
7   124600000   124800000
7   133200000   133600000
8   36100000    36400000
8   42800000    43700000
8   47100000    49000000
8   50200000    50800000
8   51200000    51300000
8   67600000    68200000
8   71600000    72000000
8   76700000    77000000
8   86600000    86900000
8   99600000    100800000
8   104600000   105000000
8   111500000   111800000
8   113000000   113300000
8   114000000   114500000
9   11400000    11800000
9   15700000    15900000
9   30400000    30900000
9   33800000    34200000
9   84600000    85100000
9   95100000    95600000
9   97600000    98100000
9   99400000    100000000
9   102400000   103200000
9   123900000   124200000
9   125700000   126000000
9   126400000   126600000
9   127700000   128700000
9   129800000   130100000
10  11400000    11800000
10  15700000    15900000
10  30400000    30900000
10  33800000    34200000
10  84600000    85100000
10  95100000    95600000
10  97600000    98100000
10  99400000    100000000
10  102400000   103200000
10  123900000   124200000
10  125700000   126000000
10  126400000   126600000
10  127700000   128700000
10  129800000   130100000
11  10000000    10300000
11  14500000    14900000
11  28000000    28500000
11  31200000    31800000
11  38600000    38600000
11  38800000    39000000
11  46500000    51500000
11  54900000    56400000
11  57600000    57900000
11  66300000    66600000
11  71500000    71800000
11  84800000    85500000
11  89500000    89900000
11  111600000   111900000
12  33800000    34800000
12  38100000    39200000
12  39800000    40000000
12  46800000    47100000
12  74200000    74600000
12  82700000    83100000
12  85700000    86000000
12  86400000    87700000
12  88800000    88800000
12  110400000   111200000
12  112000000   112900000
12  123400000   123900000
13  20100000    20100000
13  35600000    35900000
13  52700000    53300000
13  55800000    58600000
13  60400000    60700000
13  65600000    65900000
13  81300000    81300000
13  83600000    83700000
13  87100000    87900000
13  89300000    90000000
13  96500000    96800000
14  37600000    38000000
14  45400000    46000000
14  60400000    61500000
14  66800000    67800000
14  68400000    68700000
14  89000000    89300000
15  28700000    29200000
15  43000000    44200000
15  49400000    50000000
15  57200000    57300000
15  64500000    65100000
15  72200000    72900000
15  77500000    77800000
15  82600000    83400000
16  14600000    15500000
16  18300000    18800000
16  32000000    35000000
16  66900000    68400000
16  69100000    69300000
16  71600000    72200000
17  18400000    18600000
17  19900000    20600000
17  28100000    28500000
17  44200000    44800000
17  58900000    59000000
18  18700000    19400000
18  26200000    26600000
18  46700000    47000000
18  50700000    50900000
19  21300000    21700000
19  37200000    38400000
19  42700000    43800000
20  25300000    26200000
20  32600000    33800000
20  34000000    34800000
22  28400000    29100000
22  30100000    30600000
22  40700000    41700000

我的意图是用如下图表示此数据:

我需要的是用红色表示 pos1 和 pos2 之间的线段,黑线代表 Chr 的总延伸。每个 Chr 的长度如下:

    chr      long
 [1,]   1 250000000
 [2,]   2 250000000
 [3,]   3 200000000
 [4,]   4 200000000
 [5,]   5 182000000
 [6,]   6 171000000
 [7,]   7 160000000
 [8,]   8 146000000
 [9,]   9 139000000
[10,]  10 133900000
[11,]  11 136000000
[12,]  12 134000000
[13,]  13 115000000
[14,]  14 108000000
[15,]  15 102000000
[16,]  16  91000000
[17,]  17  84000000
[18,]  18  81000000
[19,]  19  59000000
[20,]  20  64000000
[21,]  21  49000000
[22,]  22  52000000

我希望能够创建一个像上图一样的图形,但可以稍微改变它,例如,如果我只想表示许多不同人群的 Chr1 而不是某个特定人群的所有 Chr人口。

我真的希望你能帮助我,因为我真的不知道如何开始。谢谢。

您可以使用 ggplot2 中的 geom_segment 为染色体长度绘制黑线(使用 d_chr 数据),然后在 pos1 和 pos1 之间绘制更粗的 (lwd=3) 红色线段pos2 使用段 (d) 数据:

注意 4 号染色体上的片段和染色体长度问题。

这是生成情节的代码:

library(ggplot2)
d <- structure(list(Chr = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 
16L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 
18L, 19L, 19L, 19L, 20L, 20L, 20L, 22L, 22L, 22L), pos1 = c(12900000L, 
21200000L, 45300000L, 45900000L, 49400000L, 51000000L, 52500000L, 
73400000L, 92700000L, 114100000L, 115400000L, 145700000L, 155200000L, 
173500000L, 186400000L, 24100000L, 32000000L, 61500000L, 63000000L, 
72500000L, 78700000L, 81800000L, 87000000L, 95600000L, 97800000L, 
99700000L, 110600000L, 135900000L, 148600000L, 155900000L, 162400000L, 
176300000L, 186400000L, 189200000L, 190000000L, 193800000L, 197600000L, 
200900000L, 203800000L, 214300000L, 17400000L, 44400000L, 47100000L, 
48800000L, 52600000L, 57500000L, 80700000L, 83200000L, 85500000L, 
89600000L, 99500000L, 101100000L, 120700000L, 135900000L, 158000000L, 
162500000L, 163400000L, 180700000L, 19200000L, 33800000L, 46700000L, 
48400000L, 52800000L, 64700000L, 69400000L, 74200000L, 97100000L, 
98600000L, 103800000L, 107000000L, 119100000L, 120300000L, 123200000L, 
127800000L, 128700000L, 144800000L, 145800000L, 151400000L, 219200000L, 
233800000L, 246700000L, 248400000L, 15500000L, 36900000L, 41800000L, 
43500000L, 44600000L, 49600000L, 60000000L, 63400000L, 86600000L, 
87200000L, 93100000L, 130000000L, 137200000L, 26500000L, 27500000L, 
32800000L, 34800000L, 44800000L, 49000000L, 62100000L, 64300000L, 
69900000L, 75000000L, 76200000L, 86300000L, 115400000L, 126700000L, 
140500000L, 140900000L, 145900000L, 55900000L, 56900000L, 64400000L, 
69100000L, 84300000L, 91400000L, 99800000L, 102300000L, 110900000L, 
117900000L, 124600000L, 133200000L, 36100000L, 42800000L, 47100000L, 
50200000L, 51200000L, 67600000L, 71600000L, 76700000L, 86600000L, 
99600000L, 104600000L, 111500000L, 113000000L, 114000000L, 11400000L, 
15700000L, 30400000L, 33800000L, 84600000L, 95100000L, 97600000L, 
99400000L, 102400000L, 123900000L, 125700000L, 126400000L, 127700000L, 
129800000L, 11400000L, 15700000L, 30400000L, 33800000L, 84600000L, 
95100000L, 97600000L, 99400000L, 102400000L, 123900000L, 125700000L, 
126400000L, 127700000L, 129800000L, 10000000L, 14500000L, 28000000L, 
31200000L, 38600000L, 38800000L, 46500000L, 54900000L, 57600000L, 
66300000L, 71500000L, 84800000L, 89500000L, 111600000L, 33800000L, 
38100000L, 39800000L, 46800000L, 74200000L, 82700000L, 85700000L, 
86400000L, 88800000L, 110400000L, 112000000L, 123400000L, 20100000L, 
35600000L, 52700000L, 55800000L, 60400000L, 65600000L, 81300000L, 
83600000L, 87100000L, 89300000L, 96500000L, 37600000L, 45400000L, 
60400000L, 66800000L, 68400000L, 89000000L, 28700000L, 43000000L, 
49400000L, 57200000L, 64500000L, 72200000L, 77500000L, 82600000L, 
14600000L, 18300000L, 32000000L, 66900000L, 69100000L, 71600000L, 
18400000L, 19900000L, 28100000L, 44200000L, 58900000L, 18700000L, 
26200000L, 46700000L, 50700000L, 21300000L, 37200000L, 42700000L, 
25300000L, 32600000L, 34000000L, 28400000L, 30100000L, 40700000L
), pos2 = c(13700000L, 21500000L, 45700000L, 46600000L, 50600000L, 
52000000L, 53000000L, 74100000L, 93400000L, 114400000L, 115500000L, 
146500000L, 155900000L, 175000000L, 186700000L, 24500000L, 32400000L, 
61800000L, 64300000L, 73100000L, 78700000L, 82200000L, 88300000L, 
97100000L, 98300000L, 100100000L, 111400000L, 136500000L, 148900000L, 
156400000L, 162600000L, 176400000L, 187500000L, 189700000L, 190300000L, 
195000000L, 197600000L, 201200000L, 204400000L, 214600000L, 17800000L, 
44800000L, 48500000L, 51800000L, 52800000L, 57800000L, 81000000L, 
83400000L, 86000000L, 90400000L, 100000000L, 101400000L, 121200000L, 
136700000L, 158300000L, 163200000L, 164200000L, 181000000L, 19800000L, 
34300000L, 46700000L, 49000000L, 53200000L, 65000000L, 69900000L, 
74300000L, 97400000L, 99000000L, 103900000L, 107500000L, 119600000L, 
120600000L, 123500000L, 128100000L, 129200000L, 145000000L, 146000000L, 
152000000L, 219800000L, 234300000L, 246700000L, 249000000L, 15500000L, 
37700000L, 42200000L, 44200000L, 46300000L, 50100000L, 60400000L, 
63700000L, 87000000L, 87900000L, 93500000L, 131300000L, 137600000L, 
27100000L, 28500000L, 33500000L, 34900000L, 45300000L, 49300000L, 
63800000L, 64600000L, 70100000L, 75000000L, 76600000L, 86900000L, 
115900000L, 127100000L, 140700000L, 141000000L, 146600000L, 56700000L, 
57200000L, 66400000L, 69900000L, 84500000L, 92000000L, 100200000L, 
103000000L, 111200000L, 118300000L, 124800000L, 133600000L, 36400000L, 
43700000L, 49000000L, 50800000L, 51300000L, 68200000L, 72000000L, 
77000000L, 86900000L, 100800000L, 105000000L, 111800000L, 113300000L, 
114500000L, 11800000L, 15900000L, 30900000L, 34200000L, 85100000L, 
95600000L, 98100000L, 100000000L, 103200000L, 124200000L, 126000000L, 
126600000L, 128700000L, 130100000L, 11800000L, 15900000L, 30900000L, 
34200000L, 85100000L, 95600000L, 98100000L, 100000000L, 103200000L, 
124200000L, 126000000L, 126600000L, 128700000L, 130100000L, 10300000L, 
14900000L, 28500000L, 31800000L, 38600000L, 39000000L, 51500000L, 
56400000L, 57900000L, 66600000L, 71800000L, 85500000L, 89900000L, 
111900000L, 34800000L, 39200000L, 40000000L, 47100000L, 74600000L, 
83100000L, 86000000L, 87700000L, 88800000L, 111200000L, 112900000L, 
123900000L, 20100000L, 35900000L, 53300000L, 58600000L, 60700000L, 
65900000L, 81300000L, 83700000L, 87900000L, 90000000L, 96800000L, 
38000000L, 46000000L, 61500000L, 67800000L, 68700000L, 89300000L, 
29200000L, 44200000L, 50000000L, 57300000L, 65100000L, 72900000L, 
77800000L, 83400000L, 15500000L, 18800000L, 35000000L, 68400000L, 
69300000L, 72200000L, 18600000L, 20600000L, 28500000L, 44800000L, 
59000000L, 19400000L, 26600000L, 47000000L, 50900000L, 21700000L, 
38400000L, 43800000L, 26200000L, 33800000L, 34800000L, 29100000L, 
30600000L, 41700000L)), .Names = c("Chr", "pos1", "pos2"), row.names = c(NA, 
-241L), class = "data.frame")
d_chr <- structure(list(chr = 1:22, long = c(250000000L, 250000000L, 200000000L, 
200000000L, 182000000L, 171000000L, 160000000L, 146000000L, 139000000L, 
133900000L, 136000000L, 134000000L, 115000000L, 108000000L, 102000000L, 
91000000L, 84000000L, 81000000L, 59000000L, 64000000L, 49000000L, 
52000000L)), .Names = c("Chr", "long"), class = "data.frame", row.names = c(NA, 
-22L))

ggplot(d) +
  geom_segment(aes(y = Chr, yend = Chr, x = 0, xend = long), data = d_chr) +
  geom_segment(aes(y = Chr, yend = Chr, x = pos1, xend = pos2), lwd = 3,
    colour = "red") + ylab("Chromosome") + xlab("Position (bp)")