使用 geopandas (Python) 从空间数据框进行空间分箱

Question

我想进行空间分箱（使用中位数作为聚合函数）从包含在经度和纬度位置测量的污染物值的 CSV 文件开始。
生成的地图应该是这样的：

但适用于应用于城市范围的数据。在这方面，我发现这个 tutorial 接近我想要做的事情，但我无法获得想要的结果。我认为我遗漏了一些有关如何正确使用 dissolve 和绘制结果数据的信息（最好使用 Folium）任何有用的示例代码？

Answer 1

您还没有提供示例数据。所以我使用全球地震作为范围/范围的加利福尼亚点和几何图形
使用shapely.geometry.box()
我展示了 中位数 的使用以及另一个 aggfunc 来证明可以计算多个指标
已使用 folium 绘图。此功能是 geopandas 0.10.0 https://geopandas.org/en/stable/docs/user_guide/interactive_mapping.html

import geopandas as gpd
import shapely.geometry
import numpy as np

# equivalent of CSV, all earthquake points globally
gdf_e = gpd.read_file(
    "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.geojson"
)

# get geometry of bounding area.  Have selected a state rather than a city
gdf_CA = gpd.read_file(
    "https://raw.githubusercontent.com/glynnbird/usstatesgeojson/master/california.geojson"
).loc[:, ["geometry"]]

BOXES = 50
a, b, c, d = gdf_CA.total_bounds

# create a grid for Califormia, could be a city
gdf_grid = gpd.GeoDataFrame(
    geometry=[
        shapely.geometry.box(minx, miny, maxx, maxy)
        for minx, maxx in zip(np.linspace(a, c, BOXES), np.linspace(a, c, BOXES)[1:])
        for miny, maxy in zip(np.linspace(b, d, BOXES), np.linspace(b, d, BOXES)[1:])
    ],
    crs="epsg:4326",
)

# remove grid boxes created outside actual geometry
gdf_grid = gdf_grid.sjoin(gdf_CA).drop(columns="index_right")

# get earthquakes that have occured within one of the grid geometries
gdf_e_CA = gdf_e.loc[:, ["geometry", "mag"]].sjoin(gdf_grid)
# get median magnitude of eargquakes in grid
gdf_grid = gdf_grid.join(
    gdf_e_CA.dissolve(by="index_right", aggfunc="median").drop(columns="geometry")
)
# how many earthquakes in the grid
gdf_grid = gdf_grid.join(
    gdf_e_CA.dissolve(by="index_right", aggfunc=lambda d: len(d))
    .drop(columns="geometry")
    .rename(columns={"mag": "number"})
)

# drop grids geometries that have no measures and create folium map
m = gdf_grid.dropna().explore(column="mag")
# for good measure - boundary on map too
gdf_CA["geometry"].apply(lambda g: shapely.geometry.MultiLineString([p.exterior for p in g.geoms])).explore(m=m)

Answer 2

我想将 pandas DataFrame 转换为启用空间的 geopandas 一个，如：

df=pd.read_csv('../Desktop/test_esri.csv')
df.head()

然后转换使用：

gdf = geopandas.GeoDataFrame(
    df, geometry=geopandas.points_from_xy(df.long, df.lat))
from pyproj import crs
crs_epsg = crs.CRS.from_epsg(4326)
gdf=gdf.set_crs('epsg:4326')

然后我想叠加一个空间网格：

import numpy as np
import shapely
from pyproj import crs
# total area for the grid
xmin, ymin, xmax, ymax= gdf.total_bounds
# how many cells across and down
n_cells=30
cell_size = (xmax-xmin)/n_cells
# projection of the grid
# crs = "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
# create the cells in a loop
grid_cells = []
for x0 in np.arange(xmin, xmax+cell_size, cell_size ):
    for y0 in np.arange(ymin, ymax+cell_size, cell_size):
        # bounds
        x1 = x0-cell_size
        y1 = y0+cell_size
        grid_cells.append( shapely.geometry.box(x0, y0, x1, y1)  )
cell = geopandas.GeoDataFrame(grid_cells, columns=['geometry'], 
                                 crs=crs.CRS('epsg:4326'))

然后将网格与 geopandas 数据框合并：

merged = geopandas.sjoin(gdf, cell, how='left', predicate='within')

最终在“溶解”中计算所需的指标：

# Compute stats per grid cell -- aggregate fires to grid cells with dissolve
dissolve = merged.dissolve(by="index_right", aggfunc="median")

但我想我在“单元格”网格上做错了什么，我想不通！！可以找到使用的 csv 文件的摘录 here。

Answer 3

感谢@Rob Raymond，
终于用下面的代码解决了：

import pandas as pd
import geopandas as gpd
import pyproj
import matplotlib.pyplot as plt
import numpy as np
import shapely
from folium import plugins

df=pd.read_csv('../Desktop/test_esri.csv')
gdf_monica = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.long, df.lat))
gdf_monica=gdf_monica.set_crs('epsg:4326')

gdf_area = gpd.read_file('https://raw.githubusercontent.com/openpolis/geojson-italy/master/geojson/limits_IT_municipalities.geojson')#.loc[:, ["geometry"]]

gdf_area =gdf_area[gdf_area['name']=='Portici'].loc[:,['geometry']]

BOXES = 50
a, b, c, d = gdf_area.total_bounds


gdf_grid = gpd.GeoDataFrame(
    geometry=[
        shapely.geometry.box(minx, miny, maxx, maxy)
        for minx, maxx in zip(np.linspace(a, c, BOXES), np.linspace(a, c, BOXES)[1:])
        for miny, maxy in zip(np.linspace(b, d, BOXES), np.linspace(b, d, BOXES)[1:])
    ],
    crs="epsg:4326",
)

# remove grid boxes created outside actual geometry
gdf_grid = gdf_grid.sjoin(gdf_area).drop(columns="index_right")


gdf_monica_binned = gdf_monica.loc[:, ["geometry", "CO"]].sjoin(gdf_grid)
# get median magnitude of CO pollutant
gdf_grid = gdf_grid.join(
    gdf_monica_binned.dissolve(by="index_right", aggfunc="median").drop(columns="geometry")
)
# how many earthquakes in the grid
gdf_grid = gdf_grid.join(
    gdf_monica_binned.dissolve(by="index_right", aggfunc=lambda d: len(d))
    .drop(columns="geometry")
    .rename(columns={"CO": "number"})
)

# drop grids geometries that have no measures and create folium map
m = gdf_grid.dropna().explore(column="CO")
# for good measure - boundary on map too
gdf_area["geometry"].apply(lambda g: shapely.geometry.MultiLineString([p.exterior for p in g.geoms])).explore(m=m)

产生：

如您所知，我对空间分析知之甚少。如果不使用描述感兴趣点所在的几何形状的 geojson 数据，我将无法获得正确的结果。如果有人可以添加更多见解...谢谢！

使用 geopandas (Python) 从空间数据框进行空间分箱

Spatial binning from a spatial dataframe using geopandas (Python)

python

geospatial

binning

geopandas