如何从 python 中的 txt 文件为 geoJSON 输入数据集
how to input dataset from txt file in python for geoJSON
我对 geoJSON 和 python 脚本编写经验很少。
我正在尝试获取一个 txt 文件并将其转换为具有这些值的 geoJSON 数据集的数据。
我参考了这篇文章并使用了这段代码:
import pandas as pd
import geojson
def data2geojson(df):
features = []
insert_features = lambda X: features.append(
geojson.Feature(geometry=geojson.LineString((X["long"],
X["lat"],
X["elev"])),
properties=dict(name=X["name"],
description=X["description"])))
df.apply(insert_features, axis=1)
with open('map1.geojson', 'w', encoding='utf8') as fp:
geojson.dump(geojson.FeatureCollection(features), fp, sort_keys=True, ensure_ascii=False,indent=4)
col = ['lat','long','elev','name','description']
data = [[-29.9953,-70.5867,760,'A','Place MNO'],
[-30.1217,-70.4933,1250,'C','Place PQR'],
[-30.0953,-70.5008,1185,'C','Place STU']]
df = pd.DataFrame(data, columns=col)
data2geojson(df)
但是对于数据,我有位于同一行的点(因此我使用 LineString 而不是 Point)并且它们具有唯一的 ID 以放置在 geoJSON 的 'properties' 部分中。
这是我的 .txt 文件中的数据示例
lat=1.3218368 ,long=103.9364834 ,A, Place BC
lat=1.3218821 ,long=103.9364744 ,A, Place BC
lat=1.3219285 ,long=103.9367017 ,A, Place BC
lat=1.321643 ,long=103.9364707 ,A, Place BC
lat=1.3216271 ,long=103.9363887 ,A, Place BC
lat=1.3235089 ,long=103.9344606 ,A, Place BC
lat=1.3237205 ,long=103.9355026 ,A, Place BC
lat=1.3217046 ,long=103.934106 ,A, Place BC
lat=1.3203204 ,long=103.9366324 ,B, Place AC
lat=1.3206557 ,long=103.9373536 ,B, Place AC
lat=1.3206271 ,long=103.9374192 ,B, Place AC
lat=1.3205511 ,long=103.9371742 ,B, Place AC
lat=1.3206044 ,long=103.9375056 ,B, Place AC
lat=1.3207561 ,long=103.9371863 ,B, Place AC
lat=1.3204307 ,long=103.9368537 ,B, Place AC
lat=1.3204877 ,long=103.9368389 ,B, Place AC
lat=1.3205465 ,long=103.9368269 ,B, Place AC
lat=1.320612 ,long=103.9368246 ,B, Place AC
lat=1.3207378 ,long=103.9371016 ,B, Place AC
lat=1.3207702 ,long=103.9370846 ,B, Place AC
我应该如何将此 txt 作为函数 data
数组的输入并根据 name
对它们进行分组,例如。 A 或 B,无论 description
还是 LineString
每个对象都有超过 1 个点。
我们将不胜感激!!
我建议使用 geopandas,因为它是针对这样的问题量身定制的:
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io
col = ['lat','long','name','description']
data = '''lat=1.3218368 ,long=103.9364834 ,A, Place BC
lat=1.3218821 ,long=103.9364744 ,A, Place BC
lat=1.3219285 ,long=103.9367017 ,A, Place BC
lat=1.321643 ,long=103.9364707 ,A, Place BC
lat=1.3216271 ,long=103.9363887 ,A, Place BC
lat=1.3235089 ,long=103.9344606 ,A, Place BC
lat=1.3237205 ,long=103.9355026 ,A, Place BC
lat=1.3217046 ,long=103.934106 ,A, Place BC
lat=1.3203204 ,long=103.9366324 ,B, Place AC
lat=1.3206557 ,long=103.9373536 ,B, Place AC
lat=1.3206271 ,long=103.9374192 ,B, Place AC
lat=1.3205511 ,long=103.9371742 ,B, Place AC
lat=1.3206044 ,long=103.9375056 ,B, Place AC
lat=1.3207561 ,long=103.9371863 ,B, Place AC
lat=1.3204307 ,long=103.9368537 ,B, Place AC
lat=1.3204877 ,long=103.9368389 ,B, Place AC
lat=1.3205465 ,long=103.9368269 ,B, Place AC
lat=1.320612 ,long=103.9368246 ,B, Place AC
lat=1.3207378 ,long=103.9371016 ,B, Place AC
lat=1.3207702 ,long=103.9370846 ,B, Place AC'''
#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep='\s*\,\s*', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
gdf = gdf.groupby(['name', 'description'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
gdf.to_json()
输出geojson:
{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {"description": "Place BC", "name": "A"}, "geometry": {"type": "LineString", "coordinates": [[103.9364834, 1.3218368], [103.9364744, 1.3218821], [103.9367017, 1.3219285], [103.9364707, 1.321643], [103.9363887, 1.3216271], [103.9344606, 1.3235089], [103.9355026, 1.3237205], [103.934106, 1.3217046]]}}, {"id": "1", "type": "Feature", "properties": {"description": "Place AC", "name": "B"}, "geometry": {"type": "LineString", "coordinates": [[103.9366324, 1.3203204], [103.9373536, 1.3206557], [103.9374192, 1.3206271], [103.9371742, 1.3205511], [103.9375056, 1.3206044], [103.9371863, 1.3207561], [103.9368537, 1.3204307], [103.9368389, 1.3204877], [103.9368269, 1.3205465], [103.9368246, 1.320612], [103.9371016, 1.3207378], [103.9370846, 1.3207702]]}}]}
我对 geoJSON 和 python 脚本编写经验很少。
我正在尝试获取一个 txt 文件并将其转换为具有这些值的 geoJSON 数据集的数据。
我参考了这篇文章并使用了这段代码:
import pandas as pd
import geojson
def data2geojson(df):
features = []
insert_features = lambda X: features.append(
geojson.Feature(geometry=geojson.LineString((X["long"],
X["lat"],
X["elev"])),
properties=dict(name=X["name"],
description=X["description"])))
df.apply(insert_features, axis=1)
with open('map1.geojson', 'w', encoding='utf8') as fp:
geojson.dump(geojson.FeatureCollection(features), fp, sort_keys=True, ensure_ascii=False,indent=4)
col = ['lat','long','elev','name','description']
data = [[-29.9953,-70.5867,760,'A','Place MNO'],
[-30.1217,-70.4933,1250,'C','Place PQR'],
[-30.0953,-70.5008,1185,'C','Place STU']]
df = pd.DataFrame(data, columns=col)
data2geojson(df)
但是对于数据,我有位于同一行的点(因此我使用 LineString 而不是 Point)并且它们具有唯一的 ID 以放置在 geoJSON 的 'properties' 部分中。
这是我的 .txt 文件中的数据示例
lat=1.3218368 ,long=103.9364834 ,A, Place BC
lat=1.3218821 ,long=103.9364744 ,A, Place BC
lat=1.3219285 ,long=103.9367017 ,A, Place BC
lat=1.321643 ,long=103.9364707 ,A, Place BC
lat=1.3216271 ,long=103.9363887 ,A, Place BC
lat=1.3235089 ,long=103.9344606 ,A, Place BC
lat=1.3237205 ,long=103.9355026 ,A, Place BC
lat=1.3217046 ,long=103.934106 ,A, Place BC
lat=1.3203204 ,long=103.9366324 ,B, Place AC
lat=1.3206557 ,long=103.9373536 ,B, Place AC
lat=1.3206271 ,long=103.9374192 ,B, Place AC
lat=1.3205511 ,long=103.9371742 ,B, Place AC
lat=1.3206044 ,long=103.9375056 ,B, Place AC
lat=1.3207561 ,long=103.9371863 ,B, Place AC
lat=1.3204307 ,long=103.9368537 ,B, Place AC
lat=1.3204877 ,long=103.9368389 ,B, Place AC
lat=1.3205465 ,long=103.9368269 ,B, Place AC
lat=1.320612 ,long=103.9368246 ,B, Place AC
lat=1.3207378 ,long=103.9371016 ,B, Place AC
lat=1.3207702 ,long=103.9370846 ,B, Place AC
我应该如何将此 txt 作为函数 data
数组的输入并根据 name
对它们进行分组,例如。 A 或 B,无论 description
还是 LineString
每个对象都有超过 1 个点。
我们将不胜感激!!
我建议使用 geopandas,因为它是针对这样的问题量身定制的:
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io
col = ['lat','long','name','description']
data = '''lat=1.3218368 ,long=103.9364834 ,A, Place BC
lat=1.3218821 ,long=103.9364744 ,A, Place BC
lat=1.3219285 ,long=103.9367017 ,A, Place BC
lat=1.321643 ,long=103.9364707 ,A, Place BC
lat=1.3216271 ,long=103.9363887 ,A, Place BC
lat=1.3235089 ,long=103.9344606 ,A, Place BC
lat=1.3237205 ,long=103.9355026 ,A, Place BC
lat=1.3217046 ,long=103.934106 ,A, Place BC
lat=1.3203204 ,long=103.9366324 ,B, Place AC
lat=1.3206557 ,long=103.9373536 ,B, Place AC
lat=1.3206271 ,long=103.9374192 ,B, Place AC
lat=1.3205511 ,long=103.9371742 ,B, Place AC
lat=1.3206044 ,long=103.9375056 ,B, Place AC
lat=1.3207561 ,long=103.9371863 ,B, Place AC
lat=1.3204307 ,long=103.9368537 ,B, Place AC
lat=1.3204877 ,long=103.9368389 ,B, Place AC
lat=1.3205465 ,long=103.9368269 ,B, Place AC
lat=1.320612 ,long=103.9368246 ,B, Place AC
lat=1.3207378 ,long=103.9371016 ,B, Place AC
lat=1.3207702 ,long=103.9370846 ,B, Place AC'''
#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep='\s*\,\s*', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
gdf = gdf.groupby(['name', 'description'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
gdf.to_json()
输出geojson:
{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {"description": "Place BC", "name": "A"}, "geometry": {"type": "LineString", "coordinates": [[103.9364834, 1.3218368], [103.9364744, 1.3218821], [103.9367017, 1.3219285], [103.9364707, 1.321643], [103.9363887, 1.3216271], [103.9344606, 1.3235089], [103.9355026, 1.3237205], [103.934106, 1.3217046]]}}, {"id": "1", "type": "Feature", "properties": {"description": "Place AC", "name": "B"}, "geometry": {"type": "LineString", "coordinates": [[103.9366324, 1.3203204], [103.9373536, 1.3206557], [103.9374192, 1.3206271], [103.9371742, 1.3205511], [103.9375056, 1.3206044], [103.9371863, 1.3207561], [103.9368537, 1.3204307], [103.9368389, 1.3204877], [103.9368269, 1.3205465], [103.9368246, 1.320612], [103.9371016, 1.3207378], [103.9370846, 1.3207702]]}}]}