如何让 ggrepel 将(某些)标签移动到美国地图边界之外?

How do I make ggrepel move (some) labels outside US map boundaries?

我正在尝试使用 ggrepel 创建我的第一张地图,但如您所见,我创建了一个重叠标签的垃圾箱。我映射和标注的大部分位置都集中在东北部,因此标注重叠。我如何让一些标签滑过地图边界(可以这么说,在海洋中)?这是我用来创建这个怪物的代码:

plot_usmap(fill = "light blue", alpha = 0.5) +
  ggrepel::geom_label_repel(data = top_18_2_transformed, aes(x=x, y=y, label=INSTNM),
                            size=3,
                            label.padding = unit(.75,"mm"),
                            nudge_y = 20,
                            nudge_x = 20,
                            box.padding=0.3,
                            max.overlaps=30,
                            point.padding=NA,
                            family="Avenir Next",
                            fill="gray99",
                            alpha=1.0,
                            label.r=unit(0.2,"lines"),
                            min.segment.length = 0.1,
                            label.size=unit(.15,"mm"),
                            segment.color="black",
                            segment.size=1,seed=1000) +
  geom_point(data = top_18_2_transformed, aes(x = x, y = y, size = UGDS),
             color = "red",
             alpha = 0.75) +
  labs(title = "Select Colleges",
       size = "Undergrad Enrollment") +
  theme(legend.position = "right")

这是我有问题的地图的图片:

提前感谢您提供的任何更正。

2022 年 3 月 31 日更新:这是 dput(top_18_2_transformed)

structure(list(lon = c(-74.659365, -122.167359, -78.937624, -75.19391, 
-71.093226, -77.073463, -118.125878, -117.709837, -71.222839, 
-79.941993, -72.926688, -76.483084, -73.961885, -71.169242, -74.025334, 
-75.380236, -70.624084, -71.118313), lat = c(40.348732, 37.429434, 
36.001135, 39.950929, 42.359243, 38.908809, 34.137349, 34.106515, 
42.385995, 40.44357, 41.311158, 42.4472, 40.808286, 42.336213, 
40.744776, 40.606822, 41.739072, 42.374471), UNITID = c(186131, 
243744, 198419, 215062, 166683, 131496, 110404, 115409, 164739, 
211440, 130794, 190415, 190150, 164924, 186867, 213543, 166692, 
166027), OPEID = c(262700, 130500, 292000, 337800, 217800, 144500, 
113100, 117100, 212400, 324200, 142600, 271100, 270700, 212800, 
263900, 328900, 218100, 215500), OPEID6 = c(2627, 1305, 2920, 
3378, 2178, 1445, 1131, 1171, 2124, 3242, 1426, 2711, 2707, 2128, 
2639, 3289, 2181, 2155), INSTNM = c("Princeton University", "Stanford University", 
"Duke University", "University of Pennsylvania", "Massachusetts Institute of Technology", 
"Georgetown University", "California Institute of Technology", 
"Harvey Mudd College", "Bentley University", "Carnegie Mellon University", 
"Yale University", "Cornell University", "Columbia University in the City of New York", 
"Boston College", "Stevens Institute of Technology", "Lehigh University", 
"Massachusetts Maritime Academy", "Harvard University"), CITY = c("Princeton", 
"Stanford", "Durham", "Philadelphia", "Cambridge", "Washington", 
"Pasadena", "Claremont", "Waltham", "Pittsburgh", "New Haven", 
"Ithaca", "New York", "Chestnut Hill", "Hoboken", "Bethlehem", 
"Buzzards Bay", "Cambridge"), STABBR = c("NJ", "CA", "NC", "PA", 
"MA", "DC", "CA", "CA", "MA", "PA", "CT", "NY", "NY", "MA", "NJ", 
"PA", "MA", "MA"), ZIP = c("08544-0070", "94305", "27708", "19104-6303", 
"02139-4307", "20057-0001", "91125", "91711", "02452-4705", "15213-3890", 
"6520", "14853", "10027", "2467", "07030-5991", "18015", "02532-1803", 
"2138"), ACCREDAGENCY = c("Middle States Commission on Higher Education", 
"Western Association of Schools and Colleges Senior Colleges and University Commission", 
"Southern Association of Colleges and Schools Commission on Colleges", 
"Middle States Commission on Higher Education", "New England Commission on Higher Education", 
"Middle States Commission on Higher Education", "Western Association of Schools and Colleges Senior Colleges and University Commission", 
"Western Association of Schools and Colleges Senior Colleges and University Commission", 
"New England Commission on Higher Education", "Middle States Commission on Higher Education", 
"New England Commission on Higher Education", "Middle States Commission on Higher Education", 
"Middle States Commission on Higher Education", "New England Commission on Higher Education", 
"Middle States Commission on Higher Education", "Middle States Commission on Higher Education", 
"New England Commission on Higher Education", "New England Commission on Higher Education"
), INSTURL = c("www.princeton.edu/", "www.stanford.edu/", "www.duke.edu/", 
"www.upenn.edu/", "web.mit.edu/", "www.georgetown.edu/", "www.caltech.edu/", 
"https://www.hmc.edu/", "www.bentley.edu/", "www.cmu.edu/", "https://www.yale.edu/", 
"www.cornell.edu/", "www.columbia.edu/", "www.bc.edu/", "www.stevens.edu/", 
"www.lehigh.edu/", "https://www.maritime.edu/", "www.harvard.edu/"
), SCH_DEG = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3), PREDDEG = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3, 3, 3), HIGHDEG = c(4, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 
4, 4, 4, 4, 4, 4), REGION = c(2, 8, 5, 2, 1, 2, 8, 8, 1, 2, 1, 
2, 2, 1, 2, 2, 1, 1), CCBASIC = c(15, 15, 15, 15, 15, 15, 15, 
21, 18, 15, 15, 15, 15, 15, 16, 16, 22, 15), ADM_RATE = c(0.0578, 
0.0434, 0.076, 0.0766, 0.067, 0.1436, 0.0642, 0.1367, 0.4672, 
0.1544, 0.0608, 0.1085, 0.0545, 0.2722, 0.3996, 0.321, 0.9146, 
0.0464), ACTCM25 = c(33, 32, 33, 33, 34, 31, 35, 33, 27, 33, 
33, 32, 33, 31, 31, 29, 19, 33), ACTCM75 = c(35, 35, 35, 35, 
36, 35, 36, 35, 31, 35, 35, 35, 35, 34, 34, 33, 24, 35), SAT_AVG = c(1517, 
1503, 1522, 1511, 1547, 1473, 1557, 1526, 1327, 1513, 1517, 1487, 
1511, 1437, 1429, 1380, 1100, 1517), DISTANCEONLY = c(0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), UGDS = c(5308, 
6994, 6546, 10774, 4516, 7141, 938, 893, 4157, 6535, 6089, 14976, 
8221, 9637, 3641, 5164, 1654, 7547), CURROPER = c(1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), COSTT4_A = c(70900, 
71587, 75105, 75303, 70240, 73840, 72084, 76953, 68577, 72265, 
73900, 73879, 76907, 73053, 68734, 68383, 27858, 73485), COSTT4_P = c("NULL", 
"NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
"NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", 
"NULL"), TUITIONFEE_IN = c(52800, 53529, 58031, 57770, 53790, 
56058, 54600, 58660, 51830, 57119, 55500, 57222, 61788, 57910, 
54014, 55240, 10018, 51925), TUITIONFEE_OUT = c(52800, 53529, 
58031, 57770, 53790, 56058, 54600, 58660, 51830, 57119, 55500, 
57222, 61788, 57910, 54014, 55240, 25752, 51925), AVGFACSAL = c(20724, 
20865, 16863, 18277, 19624, 15798, 20595, 14397, 14592, 12296, 
19830, 15574, 19431, 15599, 15318, 13763, 8928, 20988), PFTFAC = c("0.835", 
"0.9881", "0.9364", "0.7779", "0.9885", "0.4815", "0.9289", "0.8992", 
"0.6696", "0.9161", "0.717", "0.9074", "0.4521", "0.6662", "1", 
"0.8392", "0.5867", "0.862"), C150_4 = c(0.979, 0.9432, 0.9462, 
0.96, 0.954, 0.9491, 0.9357, 0.9167, 0.8952, 0.9049, 0.972, 0.9453, 
0.9549, 0.9404, 0.8473, 0.8981, 0.7629, 0.971), RET_FT4 = c(0.9768, 
0.9876, 0.9827, 0.9808, 0.9946, 0.9679, 0.9826, 0.9744, 0.9201, 
0.9732, 0.9892, 0.9748, 0.9853, 0.9467, 0.9394, 0.9349, 0.8672, 
0.9722), RET_PT4 = c("NULL", "NULL", "NULL", "0.9245", "NULL", 
"0.6667", "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "0.95", 
"NULL", "NULL", "NULL", "NULL", "NULL"), MD_EARN_WNE_P10 = c("95689", 
"97798", "93115", "103246", "111222", "96375", "112166", "108988", 
"107974", "99998", "88655", "91176", "89871", "93021", "98159", 
"95033", "91668", "84918"), PCT25_EARN_WNE_P10 = c("52729", "61965", 
"61558", "65218", "67120", "61372", "67501", "69466", "73117", 
"62003", "60311", "59566", "56005", "62006", "72669", "65644", 
"68187", "56301"), PCT75_EARN_WNE_P10 = c("167686", "172245", 
"151838", "174907", "169465", "147685", "175675", "173725", "146079", 
"159483", "146102", "147189", "141158", "147010", "127298", "134075", 
"129421", "153746"), MD_EARN_WNE_P6 = c("84713", "88873", "77260", 
"80445", "112623", "71107", "129420", "112059", "78514", "87824", 
"72046", "78779", "79434", "70858", "82237", "79832", "79354", 
"77816"), GRAD_DEBT_MDN_SUPP = c("10450", "12000", "13500", "16763", 
"13418", "16500", "PrivacySuppressed", "22089", "25000", "22014", 
"13142", "14500", "21500", "18000", "27000", "23000", "26000", 
"12665"), GRAD_DEBT_MDN10YR_SUPP = c("104.4654099", "119.9602793", 
"134.9553142", "167.5745134", "134.1355856", "164.945384", "PrivacySuppressed", 
"220.8168841", "249.9172485", "220.0671323", "131.3764992", "144.9520041", 
"214.9288337", "179.9404189", "269.9106283", "229.9238686", "259.9139384", 
"126.6080781"), C100_4 = c(0.898, 0.7288, 0.8831, 0.8571, 0.8691, 
0.9076, 0.8434, 0.8565, 0.8479, 0.7599, 0.8777, 0.8694, 0.8635, 
0.9003, 0.4566, 0.8003, 0.6322, 0.8476), ICLEVEL = c(1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), OPENADMP = c(2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2), GRADS = c("2997", 
"10253", "10037", "14803", "6990", "12080", "1299", "NULL", "1086", 
"7562", "7517", "8984", "23235", "4846", "3624", "1775", "97", 
"21592"), ACCREDCODE = c("MSACHE", "WASCSR", "SACSCC", "MSACHE", 
"NECHE", "MSACHE", "WASCSR", "WASCSR", "NECHE", "MSACHE", "NECHE", 
"MSACHE", "MSACHE", "NECHE", "MSACHE", "MSACHE", "NECHE", "NECHE"
), RET_FT4_POOLED = c(0.9788, 0.9879, 0.9793, 0.9821, 0.9909, 
0.9651, 0.9806, 0.9716, 0.9262, 0.97, 0.9892, 0.9741, 0.9825, 
0.9479, 0.9423, 0.9378, 0.8633, 0.9817), C100_4_POOLED = c(0.8856, 
0.739, 0.8788, 0.8546, 0.8602, 0.9009, 0.8242, 0.8551, 0.8326, 
0.7546, 0.8772, 0.8766, 0.8677, 0.8918, 0.4515, 0.7621, 0.5955, 
0.8573), BOOKSUPPLY = c("1050", "1245", "1434", "1358", "820", 
"1200", "1428", "800", "1300", "1000", "1050", "970", "1294", 
"1250", "1200", "1000", "1500", "1000"), ADMCON7 = c(1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 1, 1), MDCOMP_ALL = c(0.5845, 
0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 
0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 0.5845, 
0.5845), MDCOST_ALL = c(15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 
15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 
15387.5, 15387.5, 15387.5, 15387.5, 15387.5, 15387.5), MDEARN_ALL = c(37078, 
37078, 37078, 37078, 37078, 37078, 37078, 37078, 37078, 37078, 
37078, 37078, 37078, 37078, 37078, 37078, 37078, 37078), PPTUG_EF = c(0, 
0, 0.0031, 0.0537, 0.0064, 0.0214, 0, 0.0011, 0.0118, 0.017, 
2e-04, 3e-04, 0.0633, 0.0127, 0, 0.0128, 0.023, 0.0745), INEXPFTE = c(60048, 
113338, 68756, 56874, 80756, 31693, 105185, 34419, 15842, 28167, 
57231, 29893, 96463, 23266, 12504, 24995, 9687, 46272), C150_4_POOLED = c(0.9712, 
0.9435, 0.9512, 0.9574, 0.9477, 0.9452, 0.9278, 0.9179, 0.8917, 
0.8968, 0.969, 0.9452, 0.9566, 0.9297, 0.8608, 0.886, 0.7484, 
0.974), GRAD_DEBT_MDN = c("10450", "12000", "13500", "16763", 
"13418", "16500", "17747", "22089", "25000", "22014", "13142", 
"14500", "21500", "18000", "27000", "23000", "26000", "12665"
), x = c(2107384.76948701, -1933340.27810509, 1876178.25472949, 
2077243.02501463, 2314261.77712267, 1955381.08673633, -1660141.85673732, 
-1623368.30493136, 2303424.70345276, 1678023.03854027, 2211596.23078863, 
1896995.53745184, 2147624.50302849, 2309370.68277906, 2144734.86774305, 
2041573.64168227, 2373567.48443726, 2311783.20749272), y = c(-188894.792987744, 
-582296.149881856, -762721.806918975, -245389.810253038, 123275.753360416, 
-404107.357328073, -1027748.36033576, -1039201.65863312, 122405.777575308, 
-300870.762534603, -39714.5927185968, -7748.73302456512, -121333.925485063, 
118650.586978148, -129820.607837031, -179439.260821836, 71069.0976923304, 
124173.1993115)), class = "data.frame", row.names = c(NA, -18L
))

通过一些数据操作,您可以将标签移到国家/地区的任一侧,并绘制线段以将标签连接到大学:

top_18_2_transformed <- top_18_2_transformed[order(-top_18_2_transformed$y),]
colleges_east <- top_18_2_transformed[top_18_2_transformed$x > 0,]
colleges_west <- top_18_2_transformed[top_18_2_transformed$x < 0,]
colleges_west$lab_x <- -2300000
colleges_west$lab_y <- seq(-1000000, -1500000, -250000)
colleges_east$lab_x <- 2800000
colleges_east$lab_y <- seq(1000000, -2500000, -250000)


plot_usmap(fill = "light blue", alpha = 0.5) +
 geom_text(data = colleges_west, 
           aes(x = lab_x, y = lab_y, label =stringr::str_wrap(INSTNM, 25)),
           hjust = 1, size = 3, lineheight = 0.8) +
  geom_text(data = colleges_east, 
            aes(x = lab_x, y = lab_y, label = stringr::str_wrap(INSTNM, 25)),
            hjust = 0, size = 3, lineheight = 0.8) +
  geom_point(data = top_18_2_transformed, aes(x = x, y = y, size = UGDS),
             color = "red",
             alpha = 0.75) +
  geom_segment(data = colleges_east, 
               aes(x, y, xend = lab_x - 100000, yend = lab_y)) +
  geom_segment(data = colleges_west, 
               aes(x, y, xend = lab_x + 100000, yend = lab_y)) +
  labs(title = "Select Colleges",
       size = "Undergrad Enrollment") +
  theme(legend.position = c(0.35, 0),
        legend.direction = 'horizontal') +
  coord_cartesian(xlim = c( -3500000,  4000000),
                  ylim = c(-3000000,   1500000)) 

看起来 ggrepel::geom_*_repel() 不会在 aes() 内接受 xlim 也不能接受 vector 中的 list 来拆分约束西海岸和东海岸标签。但是,您可以将它们分成两个单独的层,这样更容易控制。下面我做了一个函数来提供那个位置以避免重复这些层的代码。然后你必须自定义 xlimexpand_limits() 中使用的确切值,以根据你的图形设备等使事情看起来不错

另外恕我直言,很难从中轻松获取大量信息。东北部的点大多重叠,标签太多,即使 spaced 非常好,要全部跟踪它们也有点棘手。相反,最好放大该区域的绘图并避免在绘图中显示大量没有数据的 space 或以其他方式增加绘图的易读性。

library(tidyverse)
library(ggrepel)
library(usmap)

# create function to generate labels and constrain outside map away from the center of the map
college_layers <- function(d) {
  xlimz <- if (all(d$x > 0)) {c(2.5e6, NA)} else {c(NA, -2e6)}
  
  geom_text_repel(
    data = d,
    aes(x, y, label = INSTNM),
    xlim = xlimz,
    ylim = c(-Inf, Inf),
    size = 3,
    force = 20,
    box.padding = 0.3,
    max.overlaps = 30,
    point.padding = NA,
    alpha = 1.0,
    min.segment.length = 0.1,
    segment.color = "black",
    segment.size = 1,
    seed = 1000
  )
}

# plot with separate layer for west coast and east coast
plot_usmap(fill = "light blue", alpha = 0.5) +
  geom_point(
    data = d,
    aes(x = x, y = y, size = UGDS),
    color = "red",
    alpha = 0.75
  ) +
  college_layers(d = filter(d, x > 0)) +
  college_layers(d = filter(d, x < 0)) +
  expand_limits(x = c(-3.9e6, 4.6e6),
                y = c(-3e6, 2e6)) +
  labs(title = "Select Colleges",
       size = "Undergrad Enrollment") +
  theme(legend.position = c(0.35, 0),
        legend.direction = 'horizontal',
        plot.title = element_text(hjust = 0.5))

reprex package (v2.0.1)

于 2022-04-01 创建

数据:

d <- structure(list(INSTNM = c("Princeton University", "Stanford University", 
"Duke University", "University of Pennsylvania", "Massachusetts Institute of Technology", 
"Georgetown University", "California Institute of Technology", 
"Harvey Mudd College", "Bentley University", "Carnegie Mellon University", 
"Yale University", "Cornell University", "Columbia University in the City of New York", 
"Boston College", "Stevens Institute of Technology", "Lehigh University", 
"Massachusetts Maritime Academy", "Harvard University"), x = c(2107384.76948701, 
-1933340.27810509, 1876178.25472949, 2077243.02501463, 2314261.77712267, 
1955381.08673633, -1660141.85673732, -1623368.30493136, 2303424.70345276, 
1678023.03854027, 2211596.23078863, 1896995.53745184, 2147624.50302849, 
2309370.68277906, 2144734.86774305, 2041573.64168227, 2373567.48443726, 
2311783.20749272), y = c(-188894.792987744, -582296.149881856, 
-762721.806918975, -245389.810253038, 123275.753360416, -404107.357328073, 
-1027748.36033576, -1039201.65863312, 122405.777575308, -300870.762534603, 
-39714.5927185968, -7748.73302456512, -121333.925485063, 118650.586978148, 
-129820.607837031, -179439.260821836, 71069.0976923304, 124173.1993115
), UGDS = c(5308, 6994, 6546, 10774, 4516, 7141, 938, 893, 4157, 
6535, 6089, 14976, 8221, 9637, 3641, 5164, 1654, 7547)), class = "data.frame", row.names = c(NA, 
-18L))