如何从不同距离的TIF文件中提取不同的土地利用面积?

How to extracts the different land use areas from TIF file within different distances?

我有一个 TIF 格式的土地利用光栅数据文件和一个 SHP 格式的矢量点文件,想使用 python 库(如 geopandas)编写一个 Python 脚本来创建具有不同的缓冲区矢量点的距离,并自动从不同距离内的 TIF 文件中提取不同的土地利用区域。我该怎么做? 非常感谢!

完整代码

from pathlib import Path
from zipfile import ZipFile
import requests
import rasterio, rasterio.mask, rasterio.plot
import geopandas as gpd
import shapely
import numpy as np
import random
import matplotlib.pyplot as plt

# geta raster..
url = "https://www.eea.europa.eu/data-and-maps/data/global-land-cover-250m/zipped-tif-files-250m-raster/zipped-tif-files-250m-raster/at_download/file"

f = Path.cwd().joinpath("land-cover.zip")
if not f.exists():
    r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
    with open(f, "wb") as fd:
        for chunk in r.iter_content(chunk_size=128):
            fd.write(chunk)
    zfile = ZipFile(f)
    tif = [f for f in zfile.infolist() if "/" not in f.filename]
    zfile.extractall(path=f.stem, members=tif)

# open the raster
src = rasterio.open(list(Path.cwd().joinpath(f.stem).glob("*.tif"))[0])

# some points - cities within bounds of GeoTIFF
gdf = (
    gpd.read_file(gpd.datasets.get_path("naturalearth_cities"))
    .to_crs(src.crs)
    .clip(shapely.geometry.box(*src.bounds))
).sample(15)

# random distance for each point
gdf["geometry"] = gdf["geometry"].apply(lambda p: p.buffer(random.randint(10**5, 5*10**5)))

# shapes to mask raster
shapes = [f["geometry"] for f in gdf["geometry"].__geo_interface__["features"]]

# mask the raster, with points buffered to distance
out_image, out_transform = rasterio.mask.mask(src, shapes, crop=True)
out_meta = src.meta

# save and reload masked raster
out_meta.update({"driver": "GTiff",
                 "height": out_image.shape[1],
                 "width": out_image.shape[2],
                 "transform": out_transform})

f = Path.cwd().joinpath("land-cover/masked")
if not f.exists(): f.mkdir()
f = f.joinpath("land-maksed.tif")
with rasterio.open(f, "w", **out_meta) as dest:
    dest.write(out_image)
msk = rasterio.open(f)

# let's have a look at what we've done..
fig, ax = plt.subplots()

# transform rasterio plot to real world coords
extent=msk.bounds
ax = rasterio.plot.show(msk, extent=extent, ax=ax)

输出

不同大小的圆圈使用了随机点的随机缓冲半径。

  • 你的问题表明你想从 GeoTIFF 中提取(剪辑)土地使用数据
  • comment 然后声明您需要与缓冲点相关联的使用像素计数。这个答案是这样做的
    • 剪辑 xarray 到缓冲点
    • 运行 分析这个剪裁的 xarray 以获得计数
    • 将使用列添加回 geopandas 数据框作为列
import rioxarray
from pathlib import Path
import geopandas as gpd
import shapely.geometry
import pandas as pd
import numpy as np

# geta raster..
url = "https://www.eea.europa.eu/data-and-maps/data/global-land-cover-250m/zipped-tif-files-250m-raster/zipped-tif-files-250m-raster/at_download/file"

f = Path.cwd().joinpath("land-cover.zip")
if not f.exists():
    r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
    with open(f, "wb") as fd:
        for chunk in r.iter_content(chunk_size=128):
            fd.write(chunk)
    zfile = ZipFile(f)
    tif = [f for f in zfile.infolist() if "/" not in f.filename]
    zfile.extractall(path=f.stem, members=tif)

# open the raster
f = list(Path.cwd().joinpath(f.stem).glob("*.tif"))[0]
xds = rioxarray.open_rasterio(f)

# reduce size of tif for purpose of dev / testing turnaround speed
devmode = True
if devmode == True:
    _f = f.parent.joinpath("smaller/smaller.tif")
    if not _f.parent.exists():
        _f.parent.mkdir()
    if not _f.exists():
        box = shapely.geometry.box(
            *shapely.geometry.box(*xds.rio.bounds()).centroid.buffer(5 * 10**5).bounds
        )
        xa_cut = xds.rio.clip([box.__geo_interface__], drop=True)
        xa_cut.rio.to_raster(_f)
    xds = rioxarray.open_rasterio(_f)

# some points - cities within bounds of GeoTIFF
gdf = (
    gpd.read_file(gpd.datasets.get_path("naturalearth_cities"))
    .to_crs(xds.rio.crs)
    .clip(shapely.geometry.box(*xds.rio.bounds()))
).reset_index(drop=True)

# random distance for each point
np.random.seed(7)
gdf["radius"] = np.random.randint(5 * 10**4, 10**5, len(gdf))
gdf["geometry"] = gdf.apply(lambda r: r["geometry"].buffer(r["radius"]), axis=1)

def land_usage(g):
    xa = xds.rio.clip([g], drop=True)
    xa = xa.where(xa != 0, drop=True)
    counts = xa.groupby(xa).count().drop("spatial_ref").squeeze().to_dict()

    return {
        str(int(k)): v
        for k, v in zip(counts["coords"]["group"]["data"], counts["data"])
    }


# add counts of land use as colums to buffered points
gdf = gdf.join(gdf["geometry"].apply(land_usage).apply(pd.Series))

输出

name radius 2 4 6 13 16 20 22 14 15 17 19
0 Budapest 99689 70923 2984 2083 2342 347318 1055 17074 nan nan nan nan
1 Bratislava 60742 19247 8750 1085 629 146807 2281 6355 nan nan nan nan
2 Vienna 88467 63181 66289 13330 837 237979 2525 8631 nan nan nan nan
3 Prague 50919 2301 20483 3799 15 98426 nan 5079 nan nan nan nan
4 Berlin 63927 1719 44207 18960 21410 78925 4171 18038 3337 251 14070 nan
5 København 84140 6460 6326 11066 1732 141932 133341 16274 665 683 11198 6
6 Warsaw 52583 1998 11428 9947 10557 77328 1700 8836 5251 109 11599 nan
7 Vilnius 78232 4826 41176 23952 17708 86706 3889 2637 4547 62 21022 nan