使用 python 数据帧未将数据写入文件的问题

issue with data not being written to file using python dataframe

我正在尝试使用我拥有的输入 txt 文件创建一个 geojson 文件。这是我的代码,但似乎有错误。

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway,random
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
    
jsonLoad = gdf.to_json()

如果上面的方法有效,那么这部分会生成我想要的文件格式。

import json
from geojson import Point, Feature, dump
#save the data to the file 
parsed = json.loads(jsonLoad)
print(json.dumps(parsed, indent=4, sort_keys=True))
#parsed = gdf.to_json()
with open('myfile.geojson', 'w') as f:
   dump(parsed, f,indent=1)

我试图根据他们的 WayID 对它们进行分组,但是当我得到我的结果文件时,在 geoJSON 中我没有看到像 shelter,yes,highway,footway 这样的标签附加到它,这我不明白为什么它没有存储在 geoJSON 中?

例如这是我生成的文件,

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9364834,
                        1.3218368
                    ],
                    [
                        103.9365417,
                        1.3208156
                    ],
                    [
                        103.9367689,
                        1.3206226
                    ],
                    [
                        103.9345338,
                        1.3202877
                    ],
                    [
                        103.9344606,
                        1.3235089
                    ],
                    [
                        103.9370296,
                        1.3207544
                    ],
                    [
                        103.9364744,
                        1.3218821
                    ],
                    [
                        103.9367017,
                        1.3219285
                    ],
                    [
                        103.936561,
                        1.3203222
                    ],
                    [
                        103.936842,
                        1.320661
                    ],
                    [
                        103.9371016,
                        1.3207378
                    ],
                    [
                        103.933684,
                        1.3237604
                    ],
                    [
                        103.9355026,
                        1.3237205
                    ],
                    [
                        103.9364707,
                        1.321643
                    ],
                    [
                        103.9363887,
                        1.3216271
                    ]
                ],
                "type": "LineString"
            },
            "id": "0",
            "properties": {
                "WAYID": 190637
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9365788,
                        1.3202255
                    ],
                    [
                        103.9339101,
                        1.3205415
                    ],
                    [
                        103.9363223,
                        1.3202778
                    ]
                ],
                "type": "LineString"
            },
            "id": "1",
            "properties": {
                "WAYID": 190888
            },
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}

但在 properties 下,我希望看到其余的标签列,例如 'tag2','tag3','tag4','tag5'

我在这里错过了什么?如果有人能告诉我为什么会这样,我将不胜感激,谢谢!

编辑:

对于同样的问题,如果我更改几行数据以具有更多标签,例如:

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5','tag6','tag7','tag8','tag9','tag10','tag11','tag12','tag13','tag14','tag15','tag16','tag17','tag18','tag19','tag20']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3224845,long=103.9332554,106525,116692201,addr:housenumber,4,residential,BLOCK,addr:country,AG,building:levels,14,footway,sidewalk,addr:street,Random South Avenue 10,addr:postcode,460004,building,residential,addr:city,Boo
lat=1.3217691,long=103.9348351,106119,190571,highway,footway
lat=1.323215,long=103.9330919,106524,116692204,addr:housenumber,23,residential,BLOCK,addr:country,AG,building:levels,14,addr:street,Random Street Name 1,addr:postcode,460011,building,residential,addr:city,Boo
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlongRemoveComma.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID','tag2','tag3','tag4','tag5','tag6','tag7','tag8','tag9','tag10','tag11','tag12','tag13','tag14','tag15','tag16','tag17','tag18','tag19','tag20'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
    
jsonLoad = gdf.to_json()

它 returns 我是一个空数据集,但我希望它主要按 WAYID 分组并附加所有相应的标签

{
    "features": [],
    "type": "FeatureCollection"
}

问题来自这样一个事实,即当您按 WAYID 分组时,您会转储所有其他列。这就是所有标签消失的原因。这样做:

import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import io

col = ['lat','long','pointID','WAYID','tag2','tag3','tag4','tag5']
data = '''lat=1.3218368,long=103.9364834,107244,190637,shelter,yes,highway,footway
lat=1.3208156,long=103.9365417,106940,190637,highway,footway
lat=1.3206226,long=103.9367689,107034,190637,highway,footway
lat=1.3202877,long=103.9345338,106640,190637,shelter,yes,highway,footway
lat=1.3235089,long=103.9344606,107148,190637,highway,footway,shelter,yes
lat=1.3207544,long=103.9370296,107041,190637,highway,footway
lat=1.3218821,long=103.9364744,107243,190637,shelter,yes,highway,footway
lat=1.3202255,long=103.9365788,106947,190888,shelter,yes,highway,footway
lat=1.3219285,long=103.9367017,107242,190637,shelter,yes,highway,footway
lat=1.3203222,long=103.936561,106946,190637,shelter,yes,highway,footway
lat=1.320661,long=103.936842,107036,190637,highway,footway
lat=1.3205415,long=103.9339101,106642,190888,shelter,yes,highway,footway
lat=1.3207378,long=103.9371016,107043,190637,shelter,yes,highway,footway
lat=1.3237604,long=103.933684,106563,190637,shelter,yes,highway,footway
lat=1.3237205,long=103.9355026,107115,190637,highway,footway,shelter,yes
lat=1.321643,long=103.9364707,107241,190637,shelter,yes,highway,footway
lat=1.3202778,long=103.9363223,106945,190888,shelter,yes,highway,footway
lat=1.3216271,long=103.9363887,107240,190637,shelter,yes,highway,footway'''

#load csv as dataframe (replace io.StringIO(data) with the csv filename), use converters to clean up lat and long columns upon loading
df = pd.read_csv(io.StringIO(data), names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
#input the data from the text file
#df = pd.read_csv("latlong.txt", names=col, sep=',', engine='python', converters={'lat': lambda x: float(x.split('=')[1]), 'long': lambda x: float(x.split('=')[1])})
    
    
#load dataframe as geodataframe
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
#groupby on name and description, while converting the grouped geometries to a LineString
#gdf = gdf.groupby(['description'])['geometry'].apply(lambda p: LineString(zip(p.x, p.y)) if len(p) > 1 else Point(p.x, p.y))
gdf = gdf.groupby(['WAYID', 'tag2', 'tag3', 'tag4', 'tag5'])['geometry'].apply(lambda x: LineString(x.tolist())).reset_index()
#gdf.groupby(['description'])['geometry'].apply(LineString)
    
jsonLoad = gdf.to_json()

和 运行 你再次编码。这将 return

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9344606,
                        1.3235089
                    ],
                    [
                        103.9355026,
                        1.3237205
                    ]
                ],
                "type": "LineString"
            },
            "id": "0",
            "properties": {
                "WAYID": 190637,
                "tag2": "highway",
                "tag3": "footway",
                "tag4": "shelter",
                "tag5": "yes"
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9364834,
                        1.3218368
                    ],
                    [
                        103.9345338,
                        1.3202877
                    ],
                    [
                        103.9364744,
                        1.3218821
                    ],
                    [
                        103.9367017,
                        1.3219285
                    ],
                    [
                        103.936561,
                        1.3203222
                    ],
                    [
                        103.9371016,
                        1.3207378
                    ],
                    [
                        103.933684,
                        1.3237604
                    ],
                    [
                        103.9364707,
                        1.321643
                    ],
                    [
                        103.9363887,
                        1.3216271
                    ]
                ],
                "type": "LineString"
            },
            "id": "1",
            "properties": {
                "WAYID": 190637,
                "tag2": "shelter",
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway"
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    [
                        103.9365788,
                        1.3202255
                    ],
                    [
                        103.9339101,
                        1.3205415
                    ],
                    [
                        103.9363223,
                        1.3202778
                    ]
                ],
                "type": "LineString"
            },
            "id": "2",
            "properties": {
                "WAYID": 190888,
                "tag2": "shelter",
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway"
            },
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}

更新:

执行此操作以保留所有列:


gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.long, df.lat))
gdf = gdf.sort_values("pointID").groupby("WAYID", as_index=False).first()

    
jsonLoad = gdf.to_json()

和运行再次使用您的代码给出:

{
    "features": [
        {
            "geometry": {
                "coordinates": [
                    103.9348351,
                    1.3217691
                ],
                "type": "Point"
            },
            "id": "0",
            "properties": {
                "WAYID": 190571,
                "lat": 1.3217691,
                "long": 103.9348351,
                "pointID": 106119,
                "tag10": null,
                "tag11": null,
                "tag12": null,
                "tag13": null,
                "tag14": null,
                "tag15": null,
                "tag16": null,
                "tag17": null,
                "tag18": null,
                "tag19": null,
                "tag2": "highway",
                "tag20": null,
                "tag3": "footway",
                "tag4": null,
                "tag5": null,
                "tag6": null,
                "tag7": null,
                "tag8": null,
                "tag9": null
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.933684,
                    1.3237604
                ],
                "type": "Point"
            },
            "id": "1",
            "properties": {
                "WAYID": 190637,
                "lat": 1.3237604,
                "long": 103.933684,
                "pointID": 106563,
                "tag10": null,
                "tag11": null,
                "tag12": null,
                "tag13": null,
                "tag14": null,
                "tag15": null,
                "tag16": null,
                "tag17": null,
                "tag18": null,
                "tag19": null,
                "tag2": "shelter",
                "tag20": null,
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway",
                "tag6": null,
                "tag7": null,
                "tag8": null,
                "tag9": null
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.9339101,
                    1.3205415
                ],
                "type": "Point"
            },
            "id": "2",
            "properties": {
                "WAYID": 190888,
                "lat": 1.3205415,
                "long": 103.9339101,
                "pointID": 106642,
                "tag10": null,
                "tag11": null,
                "tag12": null,
                "tag13": null,
                "tag14": null,
                "tag15": null,
                "tag16": null,
                "tag17": null,
                "tag18": null,
                "tag19": null,
                "tag2": "shelter",
                "tag20": null,
                "tag3": "yes",
                "tag4": "highway",
                "tag5": "footway",
                "tag6": null,
                "tag7": null,
                "tag8": null,
                "tag9": null
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.9332554,
                    1.3224845
                ],
                "type": "Point"
            },
            "id": "3",
            "properties": {
                "WAYID": 116692201,
                "lat": 1.3224845,
                "long": 103.9332554,
                "pointID": 106525,
                "tag10": "footway",
                "tag11": "sidewalk",
                "tag12": "addr:street",
                "tag13": "Random South Avenue 10",
                "tag14": "addr:postcode",
                "tag15": "460004",
                "tag16": "building",
                "tag17": "residential",
                "tag18": "addr:city",
                "tag19": "Boo",
                "tag2": "addr:housenumber",
                "tag20": null,
                "tag3": "4",
                "tag4": "residential",
                "tag5": "BLOCK",
                "tag6": "addr:country",
                "tag7": "AG",
                "tag8": "building:levels",
                "tag9": 14.0
            },
            "type": "Feature"
        },
        {
            "geometry": {
                "coordinates": [
                    103.9330919,
                    1.323215
                ],
                "type": "Point"
            },
            "id": "4",
            "properties": {
                "WAYID": 116692204,
                "lat": 1.323215,
                "long": 103.9330919,
                "pointID": 106524,
                "tag10": "addr:street",
                "tag11": "Random Street Name 1",
                "tag12": "addr:postcode",
                "tag13": "460011",
                "tag14": "building",
                "tag15": "residential",
                "tag16": "addr:city",
                "tag17": "Boo",
                "tag18": null,
                "tag19": null,
                "tag2": "addr:housenumber",
                "tag20": null,
                "tag3": "23",
                "tag4": "residential",
                "tag5": "BLOCK",
                "tag6": "addr:country",
                "tag7": "AG",
                "tag8": "building:levels",
                "tag9": 14.0
            },
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}