从 zipfile 中提取并合并多个 shapefile 而无需保存到光盘
extract and merge several shapefiles from zipfile without saving to disc
我第一次在 Python 中使用 zipfiles :-/
手头的任务如下(主要要求是不向光盘写入任何内容)
鉴于此 url:http://shapefiles.fews.net.s3.amazonaws.com/ALL_HFIC.zip
- 获取 zip 文件
- 从文件名中包含
Africa
的 zip arcvhie 中提取形状文件。
- 将所有文件合并为一个 shapefile(将所有文件读入 geopandas)。
- 转换为 geoJson。
这是我目前的代码结构 - 但我一直收到属性错误
AttributeError: 'ZipFile' object has no attribute 'seek'
import io
import zipfile
import pandas as pd
import geopandas as gpd
# util funcs
is_africa = lambda string: "Africa" in string
is_shape = lambda string: string.endswith('shp')
# get_zip() defined in module
filebytes = io.BytesIO(get_zip(url=URL).content)
# get the zipfile object
myzipfile = zipfile.ZipFile(filebytes)
# instantiate empty list where to store the shapefiles of interest.
shapefiles = []
# below code adapted from:
with zipfile.ZipFile(zip_file, 'r') as zf:
for file_name in zf.namelist():
if is_africa(file_name) and is_shape(file_name):
data = zf.read(file_name)
shapefiles.append(data)
# below code adapted from
gdf_africa = gpd.GeoDataFrame(pd.concat([gpd.read_file(i) for i in shapefiles],
ignore_index=True),
crs=gpd.read_file(shapefiles[0]).crs)
gdf_africa.to_file("output.json", driver="GeoJSON")
此代码从 URL 请求 ZipFile,将 ZipFile 读取到流中并提取非洲 ShapeFile 的名称。
from zipfile import ZipFile
import requests
# util funcs
is_africa = lambda string: "Africa" in string
is_shape = lambda string: string.endswith('shp')
# instantiate empty list where to store the shapefiles of interest.
africa_data = []
response = requests.get('http://shapefiles.fews.net.s3.amazonaws.com/ALL_HFIC.zip')
with ZipFile(io.BytesIO(response.content)) as zf:
for file_name in zf.namelist():
if is_africa(file_name) and is_shape(file_name):
print(file_name)
# Output
ALL_HFIC/ALL_HFIC/East Africa/EA_200907_CS.shp
ALL_HFIC/ALL_HFIC/East Africa/EA_200910_CS.shp
ALL_HFIC/ALL_HFIC/East Africa/EA_201001_CS.shp
ALL_HFIC/ALL_HFIC/East Africa/EA_201004_CS.shp
我从未使用过ShapeFiles或geopandas。我花了最后 4 个小时试图了解如何使用这些。我能够输出一个 JSON 文件,但我不确定这个 JSON 文件中的数据是否满足您的需要。
# util funcs
is_africa = lambda string: "Africa" in string
is_shape = lambda string: string.endswith('shp')
# instantiate empty list where to store the shapefiles of interest.
africa_data = []
response = requests.get('http://shapefiles.fews.net.s3.amazonaws.com/ALL_HFIC.zip')
with ZipFile(io.BytesIO(response.content)) as zf:
for file_name in zf.namelist():
if is_africa(file_name) and is_shape(file_name):
reader = shapefile.Reader(file_name)
fields = reader.fields[1:]
field_names = [field[0] for field in fields]
for sr in reader.shapeRecords():
atr = dict(zip(field_names, sr.record))
geom = sr.shape.__geo_interface__
africa_data.append(dict(type="Feature", geometry=geom, properties=atr))
geojson = open("african_geo_data.json", "w")
geojson.write(dumps({"type": "FeatureCollection", "features": africa_data}, indent=2) + "\n")
geojson.close()
从 JSON 文件中抽样:
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "MultiPolygon",
"coordinates": [
[
[
[
40.213226318000125,
-10.277393340999765
],
[
40.21355056800013,
-10.279667853999932
],
[
40.21699915800019,
-10.27847569599988
]
},
"properties": {
"CS": 4.0,
"HA0": 0.0
}
}
]
}
我第一次在 Python 中使用 zipfiles :-/
手头的任务如下(主要要求是不向光盘写入任何内容)
鉴于此 url:http://shapefiles.fews.net.s3.amazonaws.com/ALL_HFIC.zip
- 获取 zip 文件
- 从文件名中包含
Africa
的 zip arcvhie 中提取形状文件。 - 将所有文件合并为一个 shapefile(将所有文件读入 geopandas)。
- 转换为 geoJson。
这是我目前的代码结构 - 但我一直收到属性错误
AttributeError: 'ZipFile' object has no attribute 'seek'
import io
import zipfile
import pandas as pd
import geopandas as gpd
# util funcs
is_africa = lambda string: "Africa" in string
is_shape = lambda string: string.endswith('shp')
# get_zip() defined in module
filebytes = io.BytesIO(get_zip(url=URL).content)
# get the zipfile object
myzipfile = zipfile.ZipFile(filebytes)
# instantiate empty list where to store the shapefiles of interest.
shapefiles = []
# below code adapted from:
with zipfile.ZipFile(zip_file, 'r') as zf:
for file_name in zf.namelist():
if is_africa(file_name) and is_shape(file_name):
data = zf.read(file_name)
shapefiles.append(data)
# below code adapted from
gdf_africa = gpd.GeoDataFrame(pd.concat([gpd.read_file(i) for i in shapefiles],
ignore_index=True),
crs=gpd.read_file(shapefiles[0]).crs)
gdf_africa.to_file("output.json", driver="GeoJSON")
此代码从 URL 请求 ZipFile,将 ZipFile 读取到流中并提取非洲 ShapeFile 的名称。
from zipfile import ZipFile
import requests
# util funcs
is_africa = lambda string: "Africa" in string
is_shape = lambda string: string.endswith('shp')
# instantiate empty list where to store the shapefiles of interest.
africa_data = []
response = requests.get('http://shapefiles.fews.net.s3.amazonaws.com/ALL_HFIC.zip')
with ZipFile(io.BytesIO(response.content)) as zf:
for file_name in zf.namelist():
if is_africa(file_name) and is_shape(file_name):
print(file_name)
# Output
ALL_HFIC/ALL_HFIC/East Africa/EA_200907_CS.shp
ALL_HFIC/ALL_HFIC/East Africa/EA_200910_CS.shp
ALL_HFIC/ALL_HFIC/East Africa/EA_201001_CS.shp
ALL_HFIC/ALL_HFIC/East Africa/EA_201004_CS.shp
我从未使用过ShapeFiles或geopandas。我花了最后 4 个小时试图了解如何使用这些。我能够输出一个 JSON 文件,但我不确定这个 JSON 文件中的数据是否满足您的需要。
# util funcs
is_africa = lambda string: "Africa" in string
is_shape = lambda string: string.endswith('shp')
# instantiate empty list where to store the shapefiles of interest.
africa_data = []
response = requests.get('http://shapefiles.fews.net.s3.amazonaws.com/ALL_HFIC.zip')
with ZipFile(io.BytesIO(response.content)) as zf:
for file_name in zf.namelist():
if is_africa(file_name) and is_shape(file_name):
reader = shapefile.Reader(file_name)
fields = reader.fields[1:]
field_names = [field[0] for field in fields]
for sr in reader.shapeRecords():
atr = dict(zip(field_names, sr.record))
geom = sr.shape.__geo_interface__
africa_data.append(dict(type="Feature", geometry=geom, properties=atr))
geojson = open("african_geo_data.json", "w")
geojson.write(dumps({"type": "FeatureCollection", "features": africa_data}, indent=2) + "\n")
geojson.close()
从 JSON 文件中抽样:
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {
"type": "MultiPolygon",
"coordinates": [
[
[
[
40.213226318000125,
-10.277393340999765
],
[
40.21355056800013,
-10.279667853999932
],
[
40.21699915800019,
-10.27847569599988
]
},
"properties": {
"CS": 4.0,
"HA0": 0.0
}
}
]
}