提高屏蔽 xarray 文件的创建速度

Increase speed creation for masked xarray file

我目前正在尝试使用蒙版网格将矩形 xarray 文件裁剪成国家的形状。您可以在下面找到我当前的解决方案(使用更简单和更小的阵列)。代码有效,我根据 1 和 0 获得了所需的掩码。问题在于这样一个事实,即当 运行 在真实国家形状(更大更复杂)上的代码需要 30 多分钟才能 运行。因为我在这里使用的是非常基本的操作,比如嵌套 for 循环,所以我也尝试了不同的替代方法,比如列表方法。但是,在对过程进行计时时,它并没有改进下面的代码。我想知道是否有更快的方法来获得此掩码(矢量化?),或者我是否应该以不同的方式解决问题(尝试探索 xarray 的属性,但尚未找到任何解决此问题的方法)。

代码如下:

import geopandas as gpd
from shapely.geometry import Polygon, Point
import pandas as pd
import numpy as np 
import xarray as xr

df = pd.read_csv('Brazil_borders.csv',index_col=0)
lats = np.array([-20, -5, -5, -20,])
lons = np.array([-60, -60, -30, -30])
lats2 = np.array([-10.25, -10.75, -11.25, -11.75, -12.25, -12.75, -13.25, -13.75,
       -14.25, -14.75, -15.25, -15.75, -16.25, -16.75, -17.25, -17.75,
       -18.25, -18.75, -19.25, -19.75, -20.25, -20.75, -21.25, -21.75,
       -22.25, -22.75, -23.25, -23.75, -24.25, -24.75, -25.25, -25.75,
       -26.25, -26.75, -27.25, -27.75, -28.25, -28.75, -29.25, -29.75,
       -30.25, -30.75, -31.25, -31.75, -32.25, -32.75])
lons2 = np.array([-61.75, -61.25, -60.75, -60.25, -59.75, -59.25, -58.75, -58.25,
       -57.75, -57.25, -56.75, -56.25, -55.75, -55.25, -54.75, -54.25,
       -53.75, -53.25, -52.75, -52.25, -51.75, -51.25, -50.75, -50.25,
       -49.75, -49.25, -48.75, -48.25, -47.75, -47.25, -46.75, -46.25,
       -45.75, -45.25, -44.75, -44.25])
points = []
for i in range(len(lats)):
        _= [lats[i],lons[i]]
        points.append(_)
poly_proj = Polygon(points)    

mask = np.zeros((len(lats2),len(lons2)))     # Mask with the dataset's shape and size.                                       

for i in range(len(lats2)):                  # Iteration to verify if a given coordinate is within the polygon's area                                    
    for j in range(len(lons2)):                                                  
        grid_point = Point(lats2[i], lons2[j])                  
        if grid_point.within(poly_proj):  
            mask[i][j] = 1    
bool_final = mask
bool_final

基于列表方法的替代方案,但处理时间更差(根据 timeit):

lats = np.array([-20, -5, -5, -20,])
lons = np.array([-60, -60, -30, -30])
lats2 = np.array([-10.25, -10.75, -11.25, -11.75, -12.25, -12.75, -13.25, -13.75,
       -14.25, -14.75, -15.25, -15.75, -16.25, -16.75, -17.25, -17.75,
       -18.25, -18.75, -19.25, -19.75, -20.25, -20.75, -21.25, -21.75,
       -22.25, -22.75, -23.25, -23.75, -24.25, -24.75, -25.25, -25.75,
       -26.25, -26.75, -27.25, -27.75, -28.25, -28.75, -29.25, -29.75,
       -30.25, -30.75, -31.25, -31.75, -32.25, -32.75])
lons2 = np.array([-61.75, -61.25, -60.75, -60.25, -59.75, -59.25, -58.75, -58.25,
       -57.75, -57.25, -56.75, -56.25, -55.75, -55.25, -54.75, -54.25,
       -53.75, -53.25, -52.75, -52.25, -51.75, -51.25, -50.75, -50.25,
       -49.75, -49.25, -48.75, -48.25, -47.75, -47.25, -46.75, -46.25,
       -45.75, -45.25, -44.75, -44.25])
points = []
for i in range(len(lats)):
        _= [lats[i],lons[i]]
        points.append(_)
poly_proj = Polygon(points)    

grid_point = [Point(lats2[i],lons2[j]) for i in range(len(lats2)) for j in range(len(lons2))]                 
mask = [1 if grid_point[i].within(poly_proj) else 0 for i in range(len(grid_point))] 
bool_final2 = np.reshape(mask,(((len(lats2)),(len(lons2)))))

提前致谢!

基于 snowman2, I created this simple function that provides a much faster solution by using geopandas and rioxarray. Instead of using a list of latitudes and longitudes, one has to use a shapefile with the desired shape to be masked (Instructions 从坐标列表创建 GeoDataFrame 的回答。

import xarray as xr
import geopandas as gpd
import rioxarray
from shapely.geometry import mapping

def mask_shape_border (DS,shape_shp): #Inputs are the dataset to be cropped and the address of the mask file (.shp )
    if 'lat' in DS:                     #Some datasets use lat/lon, others latitude/longitude
            DS.rio.set_spatial_dims(x_dim="lon", y_dim="lat", inplace=True)
    elif 'latitude' in DS:
            DS.rio.set_spatial_dims(x_dim="longitude", y_dim="latitude", inplace=True)
    else:
        print("Error: check latitude and longitude variable names.")

    DS.rio.write_crs("epsg:4326", inplace=True)
    mask = gpd.read_file(shape_shp, crs="epsg:4326")
    DS_clipped = DS.rio.clip(mask.geometry.apply(mapping), mask.crs, drop=False)

    return(DS_clipped)