在多列上聚合 sf 对象
Aggregating sf object over multiple columns
我正在处理一些地理参考工业数据。坐标系指某产业项目所在城市的质心。
structure(list(date = c("01dec2013", "01jul2003", "01nov2008",
"01dec2017", "01dec2017", "01dec2003"), company = c("Shwe Taung",
"PetroChina Exploration and Development", "Repsol SA", "Repsol SA",
"Ipsen Pharmaceutical", "Ceva Laval"), parent_company = c("Shwe Taung",
"China National Petroleum (CNPC)", "Repsol SA", "Repsol SA",
"Ipsen Pharmaceutical", "Ceva Sante Animale"), Website = c("www.shwetaunggroup.com",
"www.cnpc.com.cn", "www.repsol.com", "www.repsol.com", "www.ipsen.com",
"www.ceva.com"), revenues_usd_ml = c(NA, 394554.53, 53215.45,
53215.45, 1760.671, 967.152), Headcount = c(NA, 1396144L, 24634L,
24634L, NA, 3500L), r_d_exp = c(NA, NA, 77.67, 77.67, NA, NA),
est_year = c(NA, 1988L, 1927L, 1927L, 1929L, 1989L), o_country = c("Myanmar",
"China", "Spain", "Spain", "France", "France"), o_state = c("Rangoon (Yangon)",
"Beijing Municipality", "Comunidad de Madrid", "Comunidad de Madrid",
"Ile-de-France", "Sud-Ouest (FR)"), o_admin = c("Not Specified",
"Not Specified", "Madrid", "Madrid", "Ile-de-France", "Not Specified"
), o_city = c("Rangoon (Yangon)", "Beijing", "Madrid", "Madrid",
"Paris", "Not Specified"), country = c("Algeria", "Algeria",
"Algeria", "Algeria", "Algeria", "Algeria"), state = c("Adrar",
"Adrar", "Adrar", "Adrar", "Adrar", "Adrar"), region = c("Not Specified",
"Not Specified", "Not Specified", "Not Specified", "Not Specified",
"Not Specified"), city = c("Adrar", "Adrar", "Reggane", "Reggane",
"Sidi Abdallah", "Sidi Abdallah"), free_zone = c("", "",
"", "", "", ""), relocation = c("", "", "", "", "", ""),
sector = c("Building materials", "Coal, oil & gas", "Coal, oil & gas",
"Coal, oil & gas", "Pharmaceuticals", "Healthcare"), sub_sector = c("Cement & concrete products",
"Oil & gas extraction", "Oil & gas extraction", "Oil & gas extraction",
"Pharmaceutical preparations", "Other (Healthcare)"), cluster = c("Construction",
"Energy", "Energy", "Energy", "Life sciences", "Life sciences"
), activity = c("Manufacturing", "Extraction", "Extraction",
"Extraction", "Manufacturing", "Manufacturing"), fdi_jobs = c(351L,
145L, 235L, 227L, 150L, 45L), est_fdi_jobs = c("Yes", "Yes",
"Yes", "Yes", "No", "No"), capital = c(139.9, 350, 565, 299.7,
29.55, 2.5), est_capital = c("Yes", "No", "No", "Yes", "No",
"No"), fdi_type = c("New", "New", "New", "Expansion", "New",
"New"), fdi_status = c("Announced", "Announced", "Announced",
"Opened", "Announced", "Opened"), source = c("geocode", "geocode",
"opencagegeo", "opencagegeo", "opencagegeo", "opencagegeo"
), year = c(2013L, 2003L, 2008L, 2017L, 2017L, 2003L), code_d = c(12L,
12L, 12L, 12L, 12L, 12L), income_d = c("MIDLW", "MIDLW",
"MIDLW", "MIDLW", "MIDLW", "MIDLW"), continent_d = c("Africa",
"Africa", "Africa", "Africa", "Africa", "Africa"), lang_d = c("Arabic",
"Arabic", "Arabic", "Arabic", "Arabic", "Arabic"), landlocked = c(0L,
0L, 0L, 0L, 0L, 0L), iso_d = c("DZA", "DZA", "DZA", "DZA",
"DZA", "DZA"), isic = c(26L, 11L, 11L, 11L, 24L, 85L), isic4 = c(2695L,
1110L, 1110L, 1110L, 2411L, 8519L), sector_eora = c("Petroleum, Chemical and Non-Metallic Mineral Products",
"Mining and Quarrying", "Mining and Quarrying", "Mining and Quarrying",
"Petroleum, Chemical and Non-Metallic Mineral Products",
"Mining and Quarrying"), n_proj = c(1, 1, 1, 1, 1, 1), geometry = structure(list(
structure(c(-0.287488, 27.87338), class = c("XY", "POINT",
"sfg")), structure(c(-0.287488, 27.87338), class = c("XY",
"POINT", "sfg")), structure(c(0.1751507, 26.7207267), class = c("XY",
"POINT", "sfg")), structure(c(0.1751507, 26.7207267), class = c("XY",
"POINT", "sfg")), structure(c(5.0152099, 35.726173), class = c("XY",
"POINT", "sfg")), structure(c(5.0152099, 35.726173), class = c("XY",
"POINT", "sfg"))), class = c("sfc_POINT", "sfc"), precision = 0, bbox = structure(c(xmin = -0.287488,
ymin = 26.7207267, xmax = 5.0152099, ymax = 35.726173), class = "bbox"), crs = structure(list(
input = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0",
wkt = "BOUNDCRS[\n SOURCECRS[\n GEOGCRS[\"unknown\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1]],\n ID[\"EPSG\",6326]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8901]],\n CS[ellipsoidal,2],\n AXIS[\"longitude\",east,\n ORDER[1],\n ANGLEUNIT[\"degree\",0.0174532925199433,\n ID[\"EPSG\",9122]]],\n AXIS[\"latitude\",north,\n ORDER[2],\n ANGLEUNIT[\"degree\",0.0174532925199433,\n ID[\"EPSG\",9122]]]]],\n TARGETCRS[\n GEOGCRS[\"WGS 84\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1]]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n CS[ellipsoidal,2],\n AXIS[\"latitude\",north,\n ORDER[1],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n AXIS[\"longitude\",east,\n ORDER[2],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n ID[\"EPSG\",4326]]],\n ABRIDGEDTRANSFORMATION[\"Transformation from unknown to WGS84\",\n METHOD[\"Geocentric translations (geog2D domain)\",\n ID[\"EPSG\",9603]],\n PARAMETER[\"X-axis translation\",0,\n ID[\"EPSG\",8605]],\n PARAMETER[\"Y-axis translation\",0,\n ID[\"EPSG\",8606]],\n PARAMETER[\"Z-axis translation\",0,\n ID[\"EPSG\",8607]]]]"), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(date = NA_integer_,
company = NA_integer_, parent_company = NA_integer_, Website = NA_integer_,
revenues_usd_ml = NA_integer_, Headcount = NA_integer_, r_d_exp = NA_integer_,
est_year = NA_integer_, o_country = NA_integer_, o_state = NA_integer_,
o_admin = NA_integer_, o_city = NA_integer_, country = NA_integer_,
state = NA_integer_, region = NA_integer_, city = NA_integer_,
free_zone = NA_integer_, relocation = NA_integer_, sector = NA_integer_,
sub_sector = NA_integer_, cluster = NA_integer_, activity = NA_integer_,
fdi_jobs = NA_integer_, est_fdi_jobs = NA_integer_, capital = NA_integer_,
est_capital = NA_integer_, fdi_type = NA_integer_, fdi_status = NA_integer_,
source = NA_integer_, year = NA_integer_, code_d = NA_integer_,
income_d = NA_integer_, continent_d = NA_integer_, lang_d = NA_integer_,
landlocked = NA_integer_, iso_d = NA_integer_, isic = NA_integer_,
isic4 = NA_integer_, sector_eora = NA_integer_, n_proj = NA_integer_
), .Label = c("constant", "aggregate", "identity"), class = "factor"), row.names = c(NA,
6L), class = c("sf", "data.frame"))
我想统计每个部门(变量:sector
)每个点城市(变量:geometry
)的项目数(变量: n_proj
)。
我的想法是aggregate
两列:
FDI_sf_ag1 <- aggregate(FDI_sf["n_proj"] ~ FDI_sf["geometry"] + FDI_sf["sector"], FDI_sf, sum)
但是它返回了这个错误:
invalid type (list) for variable 'FDI_sf["n_proj"]'
然后,我重新尝试取消列出列:
FDI_sf_ag1 <- aggregate(unlist(FDI_sf["n_proj"]) ~ cbind(unlist(FDI_sf["geometry"]),unlist(FDI_sf["sector"])), FDI_sf, sum)
但它返回另一个错误:
Error in aggregate.data.frame(lhs, mf[-1L], FUN = FUN, ...) : arguments must have same length In addition: Warning message: In cbind(unlist(FDI_sf["geometry"]), unlist(FDI_sf["sector"])) : number of rows of result is not a multiple of vector length (arg 1)
问题:如何聚合 n_project
几何(sfc 对象)和扇区?
aggregate(fdi_sf[, "n_proj"],
with(fdi_sf, list(sector=sector, city=city)),
sum)
Simple feature collection with 5 features and 3 fields
Attribute-geometry relationship: 0 constant, 1 aggregate, 2 identity
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -0.287488 ymin: 26.72073 xmax: 5.01521 ymax: 35.72617
CRS: +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
sector city n_proj
1 Building materials Adrar 1
2 Coal, oil & gas Adrar 1
3 Coal, oil & gas Reggane 2
4 Healthcare Sidi Abdallah 1
5 Pharmaceuticals Sidi Abdallah 1
geometry
1 POINT (-0.287488 27.87338)
2 POINT (-0.287488 27.87338)
3 POINT (0.1751507 26.72073)
4 POINT (5.01521 35.72617)
5 POINT (5.01521 35.72617)
我正在处理一些地理参考工业数据。坐标系指某产业项目所在城市的质心。
structure(list(date = c("01dec2013", "01jul2003", "01nov2008",
"01dec2017", "01dec2017", "01dec2003"), company = c("Shwe Taung",
"PetroChina Exploration and Development", "Repsol SA", "Repsol SA",
"Ipsen Pharmaceutical", "Ceva Laval"), parent_company = c("Shwe Taung",
"China National Petroleum (CNPC)", "Repsol SA", "Repsol SA",
"Ipsen Pharmaceutical", "Ceva Sante Animale"), Website = c("www.shwetaunggroup.com",
"www.cnpc.com.cn", "www.repsol.com", "www.repsol.com", "www.ipsen.com",
"www.ceva.com"), revenues_usd_ml = c(NA, 394554.53, 53215.45,
53215.45, 1760.671, 967.152), Headcount = c(NA, 1396144L, 24634L,
24634L, NA, 3500L), r_d_exp = c(NA, NA, 77.67, 77.67, NA, NA),
est_year = c(NA, 1988L, 1927L, 1927L, 1929L, 1989L), o_country = c("Myanmar",
"China", "Spain", "Spain", "France", "France"), o_state = c("Rangoon (Yangon)",
"Beijing Municipality", "Comunidad de Madrid", "Comunidad de Madrid",
"Ile-de-France", "Sud-Ouest (FR)"), o_admin = c("Not Specified",
"Not Specified", "Madrid", "Madrid", "Ile-de-France", "Not Specified"
), o_city = c("Rangoon (Yangon)", "Beijing", "Madrid", "Madrid",
"Paris", "Not Specified"), country = c("Algeria", "Algeria",
"Algeria", "Algeria", "Algeria", "Algeria"), state = c("Adrar",
"Adrar", "Adrar", "Adrar", "Adrar", "Adrar"), region = c("Not Specified",
"Not Specified", "Not Specified", "Not Specified", "Not Specified",
"Not Specified"), city = c("Adrar", "Adrar", "Reggane", "Reggane",
"Sidi Abdallah", "Sidi Abdallah"), free_zone = c("", "",
"", "", "", ""), relocation = c("", "", "", "", "", ""),
sector = c("Building materials", "Coal, oil & gas", "Coal, oil & gas",
"Coal, oil & gas", "Pharmaceuticals", "Healthcare"), sub_sector = c("Cement & concrete products",
"Oil & gas extraction", "Oil & gas extraction", "Oil & gas extraction",
"Pharmaceutical preparations", "Other (Healthcare)"), cluster = c("Construction",
"Energy", "Energy", "Energy", "Life sciences", "Life sciences"
), activity = c("Manufacturing", "Extraction", "Extraction",
"Extraction", "Manufacturing", "Manufacturing"), fdi_jobs = c(351L,
145L, 235L, 227L, 150L, 45L), est_fdi_jobs = c("Yes", "Yes",
"Yes", "Yes", "No", "No"), capital = c(139.9, 350, 565, 299.7,
29.55, 2.5), est_capital = c("Yes", "No", "No", "Yes", "No",
"No"), fdi_type = c("New", "New", "New", "Expansion", "New",
"New"), fdi_status = c("Announced", "Announced", "Announced",
"Opened", "Announced", "Opened"), source = c("geocode", "geocode",
"opencagegeo", "opencagegeo", "opencagegeo", "opencagegeo"
), year = c(2013L, 2003L, 2008L, 2017L, 2017L, 2003L), code_d = c(12L,
12L, 12L, 12L, 12L, 12L), income_d = c("MIDLW", "MIDLW",
"MIDLW", "MIDLW", "MIDLW", "MIDLW"), continent_d = c("Africa",
"Africa", "Africa", "Africa", "Africa", "Africa"), lang_d = c("Arabic",
"Arabic", "Arabic", "Arabic", "Arabic", "Arabic"), landlocked = c(0L,
0L, 0L, 0L, 0L, 0L), iso_d = c("DZA", "DZA", "DZA", "DZA",
"DZA", "DZA"), isic = c(26L, 11L, 11L, 11L, 24L, 85L), isic4 = c(2695L,
1110L, 1110L, 1110L, 2411L, 8519L), sector_eora = c("Petroleum, Chemical and Non-Metallic Mineral Products",
"Mining and Quarrying", "Mining and Quarrying", "Mining and Quarrying",
"Petroleum, Chemical and Non-Metallic Mineral Products",
"Mining and Quarrying"), n_proj = c(1, 1, 1, 1, 1, 1), geometry = structure(list(
structure(c(-0.287488, 27.87338), class = c("XY", "POINT",
"sfg")), structure(c(-0.287488, 27.87338), class = c("XY",
"POINT", "sfg")), structure(c(0.1751507, 26.7207267), class = c("XY",
"POINT", "sfg")), structure(c(0.1751507, 26.7207267), class = c("XY",
"POINT", "sfg")), structure(c(5.0152099, 35.726173), class = c("XY",
"POINT", "sfg")), structure(c(5.0152099, 35.726173), class = c("XY",
"POINT", "sfg"))), class = c("sfc_POINT", "sfc"), precision = 0, bbox = structure(c(xmin = -0.287488,
ymin = 26.7207267, xmax = 5.0152099, ymax = 35.726173), class = "bbox"), crs = structure(list(
input = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0",
wkt = "BOUNDCRS[\n SOURCECRS[\n GEOGCRS[\"unknown\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1]],\n ID[\"EPSG\",6326]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8901]],\n CS[ellipsoidal,2],\n AXIS[\"longitude\",east,\n ORDER[1],\n ANGLEUNIT[\"degree\",0.0174532925199433,\n ID[\"EPSG\",9122]]],\n AXIS[\"latitude\",north,\n ORDER[2],\n ANGLEUNIT[\"degree\",0.0174532925199433,\n ID[\"EPSG\",9122]]]]],\n TARGETCRS[\n GEOGCRS[\"WGS 84\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1]]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n CS[ellipsoidal,2],\n AXIS[\"latitude\",north,\n ORDER[1],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n AXIS[\"longitude\",east,\n ORDER[2],\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n ID[\"EPSG\",4326]]],\n ABRIDGEDTRANSFORMATION[\"Transformation from unknown to WGS84\",\n METHOD[\"Geocentric translations (geog2D domain)\",\n ID[\"EPSG\",9603]],\n PARAMETER[\"X-axis translation\",0,\n ID[\"EPSG\",8605]],\n PARAMETER[\"Y-axis translation\",0,\n ID[\"EPSG\",8606]],\n PARAMETER[\"Z-axis translation\",0,\n ID[\"EPSG\",8607]]]]"), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(date = NA_integer_,
company = NA_integer_, parent_company = NA_integer_, Website = NA_integer_,
revenues_usd_ml = NA_integer_, Headcount = NA_integer_, r_d_exp = NA_integer_,
est_year = NA_integer_, o_country = NA_integer_, o_state = NA_integer_,
o_admin = NA_integer_, o_city = NA_integer_, country = NA_integer_,
state = NA_integer_, region = NA_integer_, city = NA_integer_,
free_zone = NA_integer_, relocation = NA_integer_, sector = NA_integer_,
sub_sector = NA_integer_, cluster = NA_integer_, activity = NA_integer_,
fdi_jobs = NA_integer_, est_fdi_jobs = NA_integer_, capital = NA_integer_,
est_capital = NA_integer_, fdi_type = NA_integer_, fdi_status = NA_integer_,
source = NA_integer_, year = NA_integer_, code_d = NA_integer_,
income_d = NA_integer_, continent_d = NA_integer_, lang_d = NA_integer_,
landlocked = NA_integer_, iso_d = NA_integer_, isic = NA_integer_,
isic4 = NA_integer_, sector_eora = NA_integer_, n_proj = NA_integer_
), .Label = c("constant", "aggregate", "identity"), class = "factor"), row.names = c(NA,
6L), class = c("sf", "data.frame"))
我想统计每个部门(变量:sector
)每个点城市(变量:geometry
)的项目数(变量: n_proj
)。
我的想法是aggregate
两列:
FDI_sf_ag1 <- aggregate(FDI_sf["n_proj"] ~ FDI_sf["geometry"] + FDI_sf["sector"], FDI_sf, sum)
但是它返回了这个错误:
invalid type (list) for variable 'FDI_sf["n_proj"]'
然后,我重新尝试取消列出列:
FDI_sf_ag1 <- aggregate(unlist(FDI_sf["n_proj"]) ~ cbind(unlist(FDI_sf["geometry"]),unlist(FDI_sf["sector"])), FDI_sf, sum)
但它返回另一个错误:
Error in aggregate.data.frame(lhs, mf[-1L], FUN = FUN, ...) : arguments must have same length In addition: Warning message: In cbind(unlist(FDI_sf["geometry"]), unlist(FDI_sf["sector"])) : number of rows of result is not a multiple of vector length (arg 1)
问题:如何聚合 n_project
几何(sfc 对象)和扇区?
aggregate(fdi_sf[, "n_proj"],
with(fdi_sf, list(sector=sector, city=city)),
sum)
Simple feature collection with 5 features and 3 fields
Attribute-geometry relationship: 0 constant, 1 aggregate, 2 identity
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -0.287488 ymin: 26.72073 xmax: 5.01521 ymax: 35.72617
CRS: +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0
sector city n_proj
1 Building materials Adrar 1
2 Coal, oil & gas Adrar 1
3 Coal, oil & gas Reggane 2
4 Healthcare Sidi Abdallah 1
5 Pharmaceuticals Sidi Abdallah 1
geometry
1 POINT (-0.287488 27.87338)
2 POINT (-0.287488 27.87338)
3 POINT (0.1751507 26.72073)
4 POINT (5.01521 35.72617)
5 POINT (5.01521 35.72617)