使用 python 数据帧未将数据写入文件的问题

Question

我正在尝试使用我拥有的输入 txt 文件创建一个 geojson 文件。这是我的代码，但似乎有错误。

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway,random
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
    
jsonLoad = gdf.to_json()

如果上面的方法有效，那么这部分会生成我想要的文件格式。

import json
from geojson import Point, Feature, dump
#save the data to the file 
parsed = json.loads(jsonLoad)
print(json.dumps(parsed, indent=4, sort_keys=True))
#parsed = gdf.to_json()
with open('myfile.geojson', 'w') as f:
   dump(parsed, f,indent=1)

我试图根据他们的 WayID 对它们进行分组，但是当我得到我的结果文件时，在 geoJSON 中我没有看到像 shelter,yes,highway,footway 这样的标签附加到它，这我不明白为什么它没有存储在 geoJSON 中？

例如这是我生成的文件，

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9364834,
                        1.3218368
                    ],
                    [
                        103.9365417,
                        1.3208156
                    ],
                    [
                        103.9367689,
                        1.3206226
                    ],
                    [
                        103.9345338,
                        1.3202877
                    ],
                    [
                        103.9344606,
                        1.3235089
                    ],
                    [
                        103.9370296,
                        1.3207544
                    ],
                    [
                        103.9364744,
                        1.3218821
                    ],
                    [
                        103.9367017,
                        1.3219285
                    ],
                    [
                        103.936561,
                        1.3203222
                    ],
                    [
                        103.936842,
                        1.320661
                    ],
                    [
                        103.9371016,
                        1.3207378
                    ],
                    [
                        103.933684,
                        1.3237604
                    ],
                    [
                        103.9355026,
                        1.3237205
                    ],
                    [
                        103.9364707,
                        1.321643
                    ],
                    [
                        103.9363887,
                        1.3216271
                    ]
                ],
                "type": "LineString"
            },
            "id": "0",
            "properties": {
                "WAYID": 190637
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9365788,
                        1.3202255
                    ],
                    [
                        103.9339101,
                        1.3205415
                    ],
                    [
                        103.9363223,
                        1.3202778
                    ]
                ],
                "type": "LineString"
            },
            "id": "1",
            "properties": {
                "WAYID": 190888
            },
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}

但在 properties 下，我希望看到其余的标签列，例如 'tag2','tag3','tag4','tag5'

我在这里错过了什么？如果有人能告诉我为什么会这样，我将不胜感激，谢谢！

编辑：

对于同样的问题，如果我更改几行数据以具有更多标签，例如：

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5','tag6','tag7','tag8','tag9','tag10','tag11','tag12','tag13','tag14','tag15','tag16','tag17','tag18','tag19','tag20']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3224845,long=103.9332554,106525,116692201,addr:housenumber,4,residential,BLOCK,addr:country,AG,building:levels,14,footway,sidewalk,addr:street,Random South Avenue 10,addr:postcode,460004,building,residential,addr:city,Boo
lat=1.3217691,long=103.9348351,106119,190571,highway,footway
lat=1.323215,long=103.9330919,106524,116692204,addr:housenumber,23,residential,BLOCK,addr:country,AG,building:levels,14,addr:street,Random Street Name 1,addr:postcode,460011,building,residential,addr:city,Boo
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlongRemoveComma.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID','tag2','tag3','tag4','tag5','tag6','tag7','tag8','tag9','tag10','tag11','tag12','tag13','tag14','tag15','tag16','tag17','tag18','tag19','tag20'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
    
jsonLoad = gdf.to_json()

它 returns 我是一个空数据集，但我希望它主要按 WAYID 分组并附加所有相应的标签

{
    "features": [],
    "type": "FeatureCollection"
}

Answer 1

问题来自这样一个事实，即当您按 WAYID 分组时，您会转储所有其他列。这就是所有标签消失的原因。这样做：

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID', 'tag2', 'tag3', 'tag4', 'tag5'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
    
jsonLoad = gdf.to_json()

和运行你再次编码。这将 return

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9344606,
                        1.3235089
                    ],
                    [
                        103.9355026,
                        1.3237205
                    ]
                ],
                "type": "LineString"
            },
            "id": "0",
            "properties": {
                "WAYID": 190637,
                "tag2": "highway",
                "tag3": "footway",
                "tag4": "shelter",
                "tag5": "yes"
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9364834,
                        1.3218368
                    ],
                    [
                        103.9345338,
                        1.3202877
                    ],
                    [
                        103.9364744,
                        1.3218821
                    ],
                    [
                        103.9367017,
                        1.3219285
                    ],
                    [
                        103.936561,
                        1.3203222
                    ],
                    [
                        103.9371016,
                        1.3207378
                    ],
                    [
                        103.933684,
                        1.3237604
                    ],
                    [
                        103.9364707,
                        1.321643
                    ],
                    [
                        103.9363887,
                        1.3216271
                    ]
                ],
                "type": "LineString"
            },
            "id": "1",
            "properties": {
                "WAYID": 190637,
                "tag2": "shelter",
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway"
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9365788,
                        1.3202255
                    ],
                    [
                        103.9339101,
                        1.3205415
                    ],
                    [
                        103.9363223,
                        1.3202778
                    ]
                ],
                "type": "LineString"
            },
            "id": "2",
            "properties": {
                "WAYID": 190888,
                "tag2": "shelter",
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway"
            },
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}

更新:

执行此操作以保留所有列：


gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
gdf = gdf.sort_values("pointID").groupby("WAYID", as_index=False).first()

    
jsonLoad = gdf.to_json()

和运行再次使用您的代码给出：

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    103.9348351,
                    1.3217691
                ],
                "type": "Point"
            },
            "id": "0",
            "properties": {
                "WAYID": 190571,
                "lat": 1.3217691,
                "long": 103.9348351,
                "pointID": 106119,
                "tag10": null,
                "tag11": null,
                "tag12": null,
                "tag13": null,
                "tag14": null,
                "tag15": null,
                "tag16": null,
                "tag17": null,
                "tag18": null,
                "tag19": null,
                "tag2": "highway",
                "tag20": null,
                "tag3": "footway",
                "tag4": null,
                "tag5": null,
                "tag6": null,
                "tag7": null,
                "tag8": null,
                "tag9": null
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.933684,
                    1.3237604
                ],
                "type": "Point"
            },
            "id": "1",
            "properties": {
                "WAYID": 190637,
                "lat": 1.3237604,
                "long": 103.933684,
                "pointID": 106563,
                "tag10": null,
                "tag11": null,
                "tag12": null,
                "tag13": null,
                "tag14": null,
                "tag15": null,
                "tag16": null,
                "tag17": null,
                "tag18": null,
                "tag19": null,
                "tag2": "shelter",
                "tag20": null,
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway",
                "tag6": null,
                "tag7": null,
                "tag8": null,
                "tag9": null
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.9339101,
                    1.3205415
                ],
                "type": "Point"
            },
            "id": "2",
            "properties": {
                "WAYID": 190888,
                "lat": 1.3205415,
                "long": 103.9339101,
                "pointID": 106642,
                "tag10": null,
                "tag11": null,
                "tag12": null,
                "tag13": null,
                "tag14": null,
                "tag15": null,
                "tag16": null,
                "tag17": null,
                "tag18": null,
                "tag19": null,
                "tag2": "shelter",
                "tag20": null,
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway",
                "tag6": null,
                "tag7": null,
                "tag8": null,
                "tag9": null
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.9332554,
                    1.3224845
                ],
                "type": "Point"
            },
            "id": "3",
            "properties": {
                "WAYID": 116692201,
                "lat": 1.3224845,
                "long": 103.9332554,
                "pointID": 106525,
                "tag10": "footway",
                "tag11": "sidewalk",
                "tag12": "addr:street",
                "tag13": "Random South Avenue 10",
                "tag14": "addr:postcode",
                "tag15": "460004",
                "tag16": "building",
                "tag17": "residential",
                "tag18": "addr:city",
                "tag19": "Boo",
                "tag2": "addr:housenumber",
                "tag20": null,
                "tag3": "4",
                "tag4": "residential",
                "tag5": "BLOCK",
                "tag6": "addr:country",
                "tag7": "AG",
                "tag8": "building:levels",
                "tag9": 14.0
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.9330919,
                    1.323215
                ],
                "type": "Point"
            },
            "id": "4",
            "properties": {
                "WAYID": 116692204,
                "lat": 1.323215,
                "long": 103.9330919,
                "pointID": 106524,
                "tag10": "addr:street",
                "tag11": "Random Street Name 1",
                "tag12": "addr:postcode",
                "tag13": "460011",
                "tag14": "building",
                "tag15": "residential",
                "tag16": "addr:city",
                "tag17": "Boo",
                "tag18": null,
                "tag19": null,
                "tag2": "addr:housenumber",
                "tag20": null,
                "tag3": "23",
                "tag4": "residential",
                "tag5": "BLOCK",
                "tag6": "addr:country",
                "tag7": "AG",
                "tag8": "building:levels",
                "tag9": 14.0
            },
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}

使用 python 数据帧未将数据写入文件的问题

issue with data not being written to file using python dataframe

python

geojson

pandas