如何从不同距离的TIF文件中提取不同的土地利用面积?
How to extracts the different land use areas from TIF file within different distances?
我有一个 TIF 格式的土地利用光栅数据文件和一个 SHP 格式的矢量点文件,想使用 python 库(如 geopandas)编写一个 Python 脚本来创建具有不同的缓冲区矢量点的距离,并自动从不同距离内的 TIF 文件中提取不同的土地利用区域。我该怎么做?
非常感谢!
- 已使用此处的土地使用 TIF https://www.eea.europa.eu/data-and-maps/data/global-land-cover-250m
- 这真的归结为一些技巧
- 与 CRS 非常一致。已将形状文件投影到 TIF 的 CRS
- 鉴于此 TIF CRS 以米为单位 geopandas / shapely
buffer()
有意义
- 终于使用了这个文档化的技术https://rasterio.readthedocs.io/en/latest/topics/masking-by-shapefile.html#
完整代码
from pathlib import Path
from zipfile import ZipFile
import requests
import rasterio, rasterio.mask, rasterio.plot
import geopandas as gpd
import shapely
import numpy as np
import random
import matplotlib.pyplot as plt
# geta raster..
url = "https://www.eea.europa.eu/data-and-maps/data/global-land-cover-250m/zipped-tif-files-250m-raster/zipped-tif-files-250m-raster/at_download/file"
f = Path.cwd().joinpath("land-cover.zip")
if not f.exists():
r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
with open(f, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
zfile = ZipFile(f)
tif = [f for f in zfile.infolist() if "/" not in f.filename]
zfile.extractall(path=f.stem, members=tif)
# open the raster
src = rasterio.open(list(Path.cwd().joinpath(f.stem).glob("*.tif"))[0])
# some points - cities within bounds of GeoTIFF
gdf = (
gpd.read_file(gpd.datasets.get_path("naturalearth_cities"))
.to_crs(src.crs)
.clip(shapely.geometry.box(*src.bounds))
).sample(15)
# random distance for each point
gdf["geometry"] = gdf["geometry"].apply(lambda p: p.buffer(random.randint(10**5, 5*10**5)))
# shapes to mask raster
shapes = [f["geometry"] for f in gdf["geometry"].__geo_interface__["features"]]
# mask the raster, with points buffered to distance
out_image, out_transform = rasterio.mask.mask(src, shapes, crop=True)
out_meta = src.meta
# save and reload masked raster
out_meta.update({"driver": "GTiff",
"height": out_image.shape[1],
"width": out_image.shape[2],
"transform": out_transform})
f = Path.cwd().joinpath("land-cover/masked")
if not f.exists(): f.mkdir()
f = f.joinpath("land-maksed.tif")
with rasterio.open(f, "w", **out_meta) as dest:
dest.write(out_image)
msk = rasterio.open(f)
# let's have a look at what we've done..
fig, ax = plt.subplots()
# transform rasterio plot to real world coords
extent=msk.bounds
ax = rasterio.plot.show(msk, extent=extent, ax=ax)
输出
不同大小的圆圈使用了随机点的随机缓冲半径。
- 你的问题表明你想从 GeoTIFF 中提取(剪辑)土地使用数据
- comment 然后声明您需要与缓冲点相关联的使用像素计数。这个答案是这样做的
- 剪辑 xarray 到缓冲点
- 运行 分析这个剪裁的 xarray 以获得计数
- 将使用列添加回 geopandas 数据框作为列
import rioxarray
from pathlib import Path
import geopandas as gpd
import shapely.geometry
import pandas as pd
import numpy as np
# geta raster..
url = "https://www.eea.europa.eu/data-and-maps/data/global-land-cover-250m/zipped-tif-files-250m-raster/zipped-tif-files-250m-raster/at_download/file"
f = Path.cwd().joinpath("land-cover.zip")
if not f.exists():
r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
with open(f, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
zfile = ZipFile(f)
tif = [f for f in zfile.infolist() if "/" not in f.filename]
zfile.extractall(path=f.stem, members=tif)
# open the raster
f = list(Path.cwd().joinpath(f.stem).glob("*.tif"))[0]
xds = rioxarray.open_rasterio(f)
# reduce size of tif for purpose of dev / testing turnaround speed
devmode = True
if devmode == True:
_f = f.parent.joinpath("smaller/smaller.tif")
if not _f.parent.exists():
_f.parent.mkdir()
if not _f.exists():
box = shapely.geometry.box(
*shapely.geometry.box(*xds.rio.bounds()).centroid.buffer(5 * 10**5).bounds
)
xa_cut = xds.rio.clip([box.__geo_interface__], drop=True)
xa_cut.rio.to_raster(_f)
xds = rioxarray.open_rasterio(_f)
# some points - cities within bounds of GeoTIFF
gdf = (
gpd.read_file(gpd.datasets.get_path("naturalearth_cities"))
.to_crs(xds.rio.crs)
.clip(shapely.geometry.box(*xds.rio.bounds()))
).reset_index(drop=True)
# random distance for each point
np.random.seed(7)
gdf["radius"] = np.random.randint(5 * 10**4, 10**5, len(gdf))
gdf["geometry"] = gdf.apply(lambda r: r["geometry"].buffer(r["radius"]), axis=1)
def land_usage(g):
xa = xds.rio.clip([g], drop=True)
xa = xa.where(xa != 0, drop=True)
counts = xa.groupby(xa).count().drop("spatial_ref").squeeze().to_dict()
return {
str(int(k)): v
for k, v in zip(counts["coords"]["group"]["data"], counts["data"])
}
# add counts of land use as colums to buffered points
gdf = gdf.join(gdf["geometry"].apply(land_usage).apply(pd.Series))
输出
name
radius
2
4
6
13
16
20
22
14
15
17
19
0
Budapest
99689
70923
2984
2083
2342
347318
1055
17074
nan
nan
nan
nan
1
Bratislava
60742
19247
8750
1085
629
146807
2281
6355
nan
nan
nan
nan
2
Vienna
88467
63181
66289
13330
837
237979
2525
8631
nan
nan
nan
nan
3
Prague
50919
2301
20483
3799
15
98426
nan
5079
nan
nan
nan
nan
4
Berlin
63927
1719
44207
18960
21410
78925
4171
18038
3337
251
14070
nan
5
København
84140
6460
6326
11066
1732
141932
133341
16274
665
683
11198
6
6
Warsaw
52583
1998
11428
9947
10557
77328
1700
8836
5251
109
11599
nan
7
Vilnius
78232
4826
41176
23952
17708
86706
3889
2637
4547
62
21022
nan
我有一个 TIF 格式的土地利用光栅数据文件和一个 SHP 格式的矢量点文件,想使用 python 库(如 geopandas)编写一个 Python 脚本来创建具有不同的缓冲区矢量点的距离,并自动从不同距离内的 TIF 文件中提取不同的土地利用区域。我该怎么做? 非常感谢!
- 已使用此处的土地使用 TIF https://www.eea.europa.eu/data-and-maps/data/global-land-cover-250m
- 这真的归结为一些技巧
- 与 CRS 非常一致。已将形状文件投影到 TIF 的 CRS
- 鉴于此 TIF CRS 以米为单位 geopandas / shapely
buffer()
有意义 - 终于使用了这个文档化的技术https://rasterio.readthedocs.io/en/latest/topics/masking-by-shapefile.html#
完整代码
from pathlib import Path
from zipfile import ZipFile
import requests
import rasterio, rasterio.mask, rasterio.plot
import geopandas as gpd
import shapely
import numpy as np
import random
import matplotlib.pyplot as plt
# geta raster..
url = "https://www.eea.europa.eu/data-and-maps/data/global-land-cover-250m/zipped-tif-files-250m-raster/zipped-tif-files-250m-raster/at_download/file"
f = Path.cwd().joinpath("land-cover.zip")
if not f.exists():
r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
with open(f, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
zfile = ZipFile(f)
tif = [f for f in zfile.infolist() if "/" not in f.filename]
zfile.extractall(path=f.stem, members=tif)
# open the raster
src = rasterio.open(list(Path.cwd().joinpath(f.stem).glob("*.tif"))[0])
# some points - cities within bounds of GeoTIFF
gdf = (
gpd.read_file(gpd.datasets.get_path("naturalearth_cities"))
.to_crs(src.crs)
.clip(shapely.geometry.box(*src.bounds))
).sample(15)
# random distance for each point
gdf["geometry"] = gdf["geometry"].apply(lambda p: p.buffer(random.randint(10**5, 5*10**5)))
# shapes to mask raster
shapes = [f["geometry"] for f in gdf["geometry"].__geo_interface__["features"]]
# mask the raster, with points buffered to distance
out_image, out_transform = rasterio.mask.mask(src, shapes, crop=True)
out_meta = src.meta
# save and reload masked raster
out_meta.update({"driver": "GTiff",
"height": out_image.shape[1],
"width": out_image.shape[2],
"transform": out_transform})
f = Path.cwd().joinpath("land-cover/masked")
if not f.exists(): f.mkdir()
f = f.joinpath("land-maksed.tif")
with rasterio.open(f, "w", **out_meta) as dest:
dest.write(out_image)
msk = rasterio.open(f)
# let's have a look at what we've done..
fig, ax = plt.subplots()
# transform rasterio plot to real world coords
extent=msk.bounds
ax = rasterio.plot.show(msk, extent=extent, ax=ax)
输出
不同大小的圆圈使用了随机点的随机缓冲半径。
- 你的问题表明你想从 GeoTIFF 中提取(剪辑)土地使用数据
- comment 然后声明您需要与缓冲点相关联的使用像素计数。这个答案是这样做的
- 剪辑 xarray 到缓冲点
- 运行 分析这个剪裁的 xarray 以获得计数
- 将使用列添加回 geopandas 数据框作为列
import rioxarray
from pathlib import Path
import geopandas as gpd
import shapely.geometry
import pandas as pd
import numpy as np
# geta raster..
url = "https://www.eea.europa.eu/data-and-maps/data/global-land-cover-250m/zipped-tif-files-250m-raster/zipped-tif-files-250m-raster/at_download/file"
f = Path.cwd().joinpath("land-cover.zip")
if not f.exists():
r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
with open(f, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
zfile = ZipFile(f)
tif = [f for f in zfile.infolist() if "/" not in f.filename]
zfile.extractall(path=f.stem, members=tif)
# open the raster
f = list(Path.cwd().joinpath(f.stem).glob("*.tif"))[0]
xds = rioxarray.open_rasterio(f)
# reduce size of tif for purpose of dev / testing turnaround speed
devmode = True
if devmode == True:
_f = f.parent.joinpath("smaller/smaller.tif")
if not _f.parent.exists():
_f.parent.mkdir()
if not _f.exists():
box = shapely.geometry.box(
*shapely.geometry.box(*xds.rio.bounds()).centroid.buffer(5 * 10**5).bounds
)
xa_cut = xds.rio.clip([box.__geo_interface__], drop=True)
xa_cut.rio.to_raster(_f)
xds = rioxarray.open_rasterio(_f)
# some points - cities within bounds of GeoTIFF
gdf = (
gpd.read_file(gpd.datasets.get_path("naturalearth_cities"))
.to_crs(xds.rio.crs)
.clip(shapely.geometry.box(*xds.rio.bounds()))
).reset_index(drop=True)
# random distance for each point
np.random.seed(7)
gdf["radius"] = np.random.randint(5 * 10**4, 10**5, len(gdf))
gdf["geometry"] = gdf.apply(lambda r: r["geometry"].buffer(r["radius"]), axis=1)
def land_usage(g):
xa = xds.rio.clip([g], drop=True)
xa = xa.where(xa != 0, drop=True)
counts = xa.groupby(xa).count().drop("spatial_ref").squeeze().to_dict()
return {
str(int(k)): v
for k, v in zip(counts["coords"]["group"]["data"], counts["data"])
}
# add counts of land use as colums to buffered points
gdf = gdf.join(gdf["geometry"].apply(land_usage).apply(pd.Series))
输出
name | radius | 2 | 4 | 6 | 13 | 16 | 20 | 22 | 14 | 15 | 17 | 19 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Budapest | 99689 | 70923 | 2984 | 2083 | 2342 | 347318 | 1055 | 17074 | nan | nan | nan | nan |
1 | Bratislava | 60742 | 19247 | 8750 | 1085 | 629 | 146807 | 2281 | 6355 | nan | nan | nan | nan |
2 | Vienna | 88467 | 63181 | 66289 | 13330 | 837 | 237979 | 2525 | 8631 | nan | nan | nan | nan |
3 | Prague | 50919 | 2301 | 20483 | 3799 | 15 | 98426 | nan | 5079 | nan | nan | nan | nan |
4 | Berlin | 63927 | 1719 | 44207 | 18960 | 21410 | 78925 | 4171 | 18038 | 3337 | 251 | 14070 | nan |
5 | København | 84140 | 6460 | 6326 | 11066 | 1732 | 141932 | 133341 | 16274 | 665 | 683 | 11198 | 6 |
6 | Warsaw | 52583 | 1998 | 11428 | 9947 | 10557 | 77328 | 1700 | 8836 | 5251 | 109 | 11599 | nan |
7 | Vilnius | 78232 | 4826 | 41176 | 23952 | 17708 | 86706 | 3889 | 2637 | 4547 | 62 | 21022 | nan |