使用 geopandas (Python) 从空间数据框进行空间分箱
Spatial binning from a spatial dataframe using geopandas (Python)
我想进行空间分箱(使用中位数作为聚合函数)
从包含在经度和纬度位置测量的污染物值的 CSV 文件开始。
生成的地图应该是这样的:
但适用于应用于城市范围的数据。
在这方面,我发现这个 tutorial 接近我想要做的事情,但我无法获得想要的结果。
我认为我遗漏了一些有关如何正确使用 dissolve
和绘制结果数据的信息(最好使用 Folium
)
任何有用的示例代码?
- 您还没有提供示例数据。所以我使用全球地震作为范围/范围的加利福尼亚点和几何图形
- 使用
shapely.geometry.box()
创建网格很简单
- 我展示了 中位数 的使用以及另一个
aggfunc
来证明可以计算多个指标
- 已使用 folium 绘图。此功能是 geopandas 0.10.0 https://geopandas.org/en/stable/docs/user_guide/interactive_mapping.html
中的新增功能
import geopandas as gpd
import shapely.geometry
import numpy as np
# equivalent of CSV, all earthquake points globally
gdf_e = gpd.read_file(
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.geojson"
)
# get geometry of bounding area. Have selected a state rather than a city
gdf_CA = gpd.read_file(
"https://raw.githubusercontent.com/glynnbird/usstatesgeojson/master/california.geojson"
).loc[:, ["geometry"]]
BOXES = 50
a, b, c, d = gdf_CA.total_bounds
# create a grid for Califormia, could be a city
gdf_grid = gpd.GeoDataFrame(
geometry=[
shapely.geometry.box(minx, miny, maxx, maxy)
for minx, maxx in zip(np.linspace(a, c, BOXES), np.linspace(a, c, BOXES)[1:])
for miny, maxy in zip(np.linspace(b, d, BOXES), np.linspace(b, d, BOXES)[1:])
],
crs="epsg:4326",
)
# remove grid boxes created outside actual geometry
gdf_grid = gdf_grid.sjoin(gdf_CA).drop(columns="index_right")
# get earthquakes that have occured within one of the grid geometries
gdf_e_CA = gdf_e.loc[:, ["geometry", "mag"]].sjoin(gdf_grid)
# get median magnitude of eargquakes in grid
gdf_grid = gdf_grid.join(
gdf_e_CA.dissolve(by="index_right", aggfunc="median").drop(columns="geometry")
)
# how many earthquakes in the grid
gdf_grid = gdf_grid.join(
gdf_e_CA.dissolve(by="index_right", aggfunc=lambda d: len(d))
.drop(columns="geometry")
.rename(columns={"mag": "number"})
)
# drop grids geometries that have no measures and create folium map
m = gdf_grid.dropna().explore(column="mag")
# for good measure - boundary on map too
gdf_CA["geometry"].apply(lambda g: shapely.geometry.MultiLineString([p.exterior for p in g.geoms])).explore(m=m)
我想将 pandas DataFrame 转换为启用空间的 geopandas 一个,如:
df=pd.read_csv('../Desktop/test_esri.csv')
df.head()
然后转换使用:
gdf = geopandas.GeoDataFrame(
df, geometry=geopandas.points_from_xy(df.long, df.lat))
from pyproj import crs
crs_epsg = crs.CRS.from_epsg(4326)
gdf=gdf.set_crs('epsg:4326')
然后我想叠加一个空间网格:
import numpy as np
import shapely
from pyproj import crs
# total area for the grid
xmin, ymin, xmax, ymax= gdf.total_bounds
# how many cells across and down
n_cells=30
cell_size = (xmax-xmin)/n_cells
# projection of the grid
# crs = "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
# create the cells in a loop
grid_cells = []
for x0 in np.arange(xmin, xmax+cell_size, cell_size ):
for y0 in np.arange(ymin, ymax+cell_size, cell_size):
# bounds
x1 = x0-cell_size
y1 = y0+cell_size
grid_cells.append( shapely.geometry.box(x0, y0, x1, y1) )
cell = geopandas.GeoDataFrame(grid_cells, columns=['geometry'],
crs=crs.CRS('epsg:4326'))
然后将网格与 geopandas 数据框合并:
merged = geopandas.sjoin(gdf, cell, how='left', predicate='within')
最终在“溶解”中计算所需的指标:
# Compute stats per grid cell -- aggregate fires to grid cells with dissolve
dissolve = merged.dissolve(by="index_right", aggfunc="median")
但我想我在“单元格”网格上做错了什么,我想不通!!
可以找到使用的 csv 文件的摘录 here。
感谢@Rob Raymond,
终于用下面的代码解决了:
import pandas as pd
import geopandas as gpd
import pyproj
import matplotlib.pyplot as plt
import numpy as np
import shapely
from folium import plugins
df=pd.read_csv('../Desktop/test_esri.csv')
gdf_monica = gpd.GeoDataFrame(
df, geometry=gpd.points_from_xy(df.long, df.lat))
gdf_monica=gdf_monica.set_crs('epsg:4326')
gdf_area = gpd.read_file('https://raw.githubusercontent.com/openpolis/geojson-italy/master/geojson/limits_IT_municipalities.geojson')#.loc[:, ["geometry"]]
gdf_area =gdf_area[gdf_area['name']=='Portici'].loc[:,['geometry']]
BOXES = 50
a, b, c, d = gdf_area.total_bounds
gdf_grid = gpd.GeoDataFrame(
geometry=[
shapely.geometry.box(minx, miny, maxx, maxy)
for minx, maxx in zip(np.linspace(a, c, BOXES), np.linspace(a, c, BOXES)[1:])
for miny, maxy in zip(np.linspace(b, d, BOXES), np.linspace(b, d, BOXES)[1:])
],
crs="epsg:4326",
)
# remove grid boxes created outside actual geometry
gdf_grid = gdf_grid.sjoin(gdf_area).drop(columns="index_right")
gdf_monica_binned = gdf_monica.loc[:, ["geometry", "CO"]].sjoin(gdf_grid)
# get median magnitude of CO pollutant
gdf_grid = gdf_grid.join(
gdf_monica_binned.dissolve(by="index_right", aggfunc="median").drop(columns="geometry")
)
# how many earthquakes in the grid
gdf_grid = gdf_grid.join(
gdf_monica_binned.dissolve(by="index_right", aggfunc=lambda d: len(d))
.drop(columns="geometry")
.rename(columns={"CO": "number"})
)
# drop grids geometries that have no measures and create folium map
m = gdf_grid.dropna().explore(column="CO")
# for good measure - boundary on map too
gdf_area["geometry"].apply(lambda g: shapely.geometry.MultiLineString([p.exterior for p in g.geoms])).explore(m=m)
产生:
如您所知,我对空间分析知之甚少。如果不使用描述感兴趣点所在的几何形状的 geojson 数据,我将无法获得正确的结果。
如果有人可以添加更多见解...谢谢!
我想进行空间分箱(使用中位数作为聚合函数)
从包含在经度和纬度位置测量的污染物值的 CSV 文件开始。
生成的地图应该是这样的:
但适用于应用于城市范围的数据。
在这方面,我发现这个 tutorial 接近我想要做的事情,但我无法获得想要的结果。
我认为我遗漏了一些有关如何正确使用 dissolve
和绘制结果数据的信息(最好使用 Folium
)
任何有用的示例代码?
- 您还没有提供示例数据。所以我使用全球地震作为范围/范围的加利福尼亚点和几何图形
- 使用
shapely.geometry.box()
创建网格很简单
- 我展示了 中位数 的使用以及另一个
aggfunc
来证明可以计算多个指标 - 已使用 folium 绘图。此功能是 geopandas 0.10.0 https://geopandas.org/en/stable/docs/user_guide/interactive_mapping.html 中的新增功能
import geopandas as gpd
import shapely.geometry
import numpy as np
# equivalent of CSV, all earthquake points globally
gdf_e = gpd.read_file(
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_month.geojson"
)
# get geometry of bounding area. Have selected a state rather than a city
gdf_CA = gpd.read_file(
"https://raw.githubusercontent.com/glynnbird/usstatesgeojson/master/california.geojson"
).loc[:, ["geometry"]]
BOXES = 50
a, b, c, d = gdf_CA.total_bounds
# create a grid for Califormia, could be a city
gdf_grid = gpd.GeoDataFrame(
geometry=[
shapely.geometry.box(minx, miny, maxx, maxy)
for minx, maxx in zip(np.linspace(a, c, BOXES), np.linspace(a, c, BOXES)[1:])
for miny, maxy in zip(np.linspace(b, d, BOXES), np.linspace(b, d, BOXES)[1:])
],
crs="epsg:4326",
)
# remove grid boxes created outside actual geometry
gdf_grid = gdf_grid.sjoin(gdf_CA).drop(columns="index_right")
# get earthquakes that have occured within one of the grid geometries
gdf_e_CA = gdf_e.loc[:, ["geometry", "mag"]].sjoin(gdf_grid)
# get median magnitude of eargquakes in grid
gdf_grid = gdf_grid.join(
gdf_e_CA.dissolve(by="index_right", aggfunc="median").drop(columns="geometry")
)
# how many earthquakes in the grid
gdf_grid = gdf_grid.join(
gdf_e_CA.dissolve(by="index_right", aggfunc=lambda d: len(d))
.drop(columns="geometry")
.rename(columns={"mag": "number"})
)
# drop grids geometries that have no measures and create folium map
m = gdf_grid.dropna().explore(column="mag")
# for good measure - boundary on map too
gdf_CA["geometry"].apply(lambda g: shapely.geometry.MultiLineString([p.exterior for p in g.geoms])).explore(m=m)
我想将 pandas DataFrame 转换为启用空间的 geopandas 一个,如:
df=pd.read_csv('../Desktop/test_esri.csv')
df.head()
然后转换使用:
gdf = geopandas.GeoDataFrame(
df, geometry=geopandas.points_from_xy(df.long, df.lat))
from pyproj import crs
crs_epsg = crs.CRS.from_epsg(4326)
gdf=gdf.set_crs('epsg:4326')
然后我想叠加一个空间网格:
import numpy as np
import shapely
from pyproj import crs
# total area for the grid
xmin, ymin, xmax, ymax= gdf.total_bounds
# how many cells across and down
n_cells=30
cell_size = (xmax-xmin)/n_cells
# projection of the grid
# crs = "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs"
# create the cells in a loop
grid_cells = []
for x0 in np.arange(xmin, xmax+cell_size, cell_size ):
for y0 in np.arange(ymin, ymax+cell_size, cell_size):
# bounds
x1 = x0-cell_size
y1 = y0+cell_size
grid_cells.append( shapely.geometry.box(x0, y0, x1, y1) )
cell = geopandas.GeoDataFrame(grid_cells, columns=['geometry'],
crs=crs.CRS('epsg:4326'))
然后将网格与 geopandas 数据框合并:
merged = geopandas.sjoin(gdf, cell, how='left', predicate='within')
最终在“溶解”中计算所需的指标:
# Compute stats per grid cell -- aggregate fires to grid cells with dissolve
dissolve = merged.dissolve(by="index_right", aggfunc="median")
但我想我在“单元格”网格上做错了什么,我想不通!! 可以找到使用的 csv 文件的摘录 here。
感谢@Rob Raymond,
终于用下面的代码解决了:
import pandas as pd
import geopandas as gpd
import pyproj
import matplotlib.pyplot as plt
import numpy as np
import shapely
from folium import plugins
df=pd.read_csv('../Desktop/test_esri.csv')
gdf_monica = gpd.GeoDataFrame(
df, geometry=gpd.points_from_xy(df.long, df.lat))
gdf_monica=gdf_monica.set_crs('epsg:4326')
gdf_area = gpd.read_file('https://raw.githubusercontent.com/openpolis/geojson-italy/master/geojson/limits_IT_municipalities.geojson')#.loc[:, ["geometry"]]
gdf_area =gdf_area[gdf_area['name']=='Portici'].loc[:,['geometry']]
BOXES = 50
a, b, c, d = gdf_area.total_bounds
gdf_grid = gpd.GeoDataFrame(
geometry=[
shapely.geometry.box(minx, miny, maxx, maxy)
for minx, maxx in zip(np.linspace(a, c, BOXES), np.linspace(a, c, BOXES)[1:])
for miny, maxy in zip(np.linspace(b, d, BOXES), np.linspace(b, d, BOXES)[1:])
],
crs="epsg:4326",
)
# remove grid boxes created outside actual geometry
gdf_grid = gdf_grid.sjoin(gdf_area).drop(columns="index_right")
gdf_monica_binned = gdf_monica.loc[:, ["geometry", "CO"]].sjoin(gdf_grid)
# get median magnitude of CO pollutant
gdf_grid = gdf_grid.join(
gdf_monica_binned.dissolve(by="index_right", aggfunc="median").drop(columns="geometry")
)
# how many earthquakes in the grid
gdf_grid = gdf_grid.join(
gdf_monica_binned.dissolve(by="index_right", aggfunc=lambda d: len(d))
.drop(columns="geometry")
.rename(columns={"CO": "number"})
)
# drop grids geometries that have no measures and create folium map
m = gdf_grid.dropna().explore(column="CO")
# for good measure - boundary on map too
gdf_area["geometry"].apply(lambda g: shapely.geometry.MultiLineString([p.exterior for p in g.geoms])).explore(m=m)
产生:
如您所知,我对空间分析知之甚少。如果不使用描述感兴趣点所在的几何形状的 geojson 数据,我将无法获得正确的结果。 如果有人可以添加更多见解...谢谢!