在 python 中将数据框转换为地理数据框

Turning a dataframe into a geodata frame in python

我正在尝试将数据框转换为用于空间分析的地理数据框,但由于每个条目开头的单词 'POLYGON',我的坐标 ('geometry') 列是一个字符串.

如何编辑我的数据,使我的坐标列只有数字?

ptal_lsoas['geometry']= ptal_lsoas['geometry'].apply(Point)
geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')

这是我收到的错误消息

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-20-d175e91ad85f> in <module>
----> 1 ptal_lsoas['geometry']= ptal_lsoas['geometry'].apply(Point)
      2 geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')

/opt/conda/lib/python3.8/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwargs)
   4354         dtype: float64
   4355         """
-> 4356         return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
   4357 
   4358     def _reduce(

/opt/conda/lib/python3.8/site-packages/pandas/core/apply.py in apply(self)
   1034             return self.apply_str()
   1035 
-> 1036         return self.apply_standard()
   1037 
   1038     def agg(self):

/opt/conda/lib/python3.8/site-packages/pandas/core/apply.py in apply_standard(self)
   1090                 # List[Union[Callable[..., Any], str]]]]]"; expected
   1091                 # "Callable[[Any], Any]"
-> 1092                 mapped = lib.map_infer(
   1093                     values,
   1094                     f,  # type: ignore[arg-type]

/opt/conda/lib/python3.8/site-packages/pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()

/opt/conda/lib/python3.8/site-packages/shapely/geometry/point.py in __init__(self, *args)
     46         BaseGeometry.__init__(self)
     47         if len(args) > 0:
---> 48             self._set_coords(*args)
     49 
     50     # Coordinate getters and setters

/opt/conda/lib/python3.8/site-packages/shapely/geometry/point.py in _set_coords(self, *args)
    131         self.empty()
    132         if len(args) == 1:
--> 133             self._geom, self._ndim = geos_point_from_py(args[0])
    134         elif len(args) > 3:
    135             raise TypeError("Point() takes at most 3 arguments ({} given)".format(len(args)))

/opt/conda/lib/python3.8/site-packages/shapely/geometry/point.py in geos_point_from_py(ob, update_geom, update_ndim)
    212         coords = ob
    213     n = len(coords)
--> 214     dx = c_double(coords[0])
    215     dy = c_double(coords[1])
    216     dz = None

TypeError: must be real number, not str

提前致谢!

编辑:

如下所述,我现在没有我的坐标中的多边形婴儿这个词,但是当我尝试将它转换为地理数据框时它仍然给我这个错误

geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-27-c55e804329dc> in <module>
----> 1 geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')

/opt/conda/lib/python3.8/site-packages/geopandas/geodataframe.py in __init__(self, geometry, crs, *args, **kwargs)
    165                 )
    166                 # TODO: raise error in 0.9 or 0.10.
--> 167             self.set_geometry(geometry, inplace=True)
    168 
    169         if geometry is None and crs:

/opt/conda/lib/python3.8/site-packages/geopandas/geodataframe.py in set_geometry(self, col, drop, inplace, crs)
    293 
    294         # Check that we are using a listlike of geometries
--> 295         level = _ensure_geometry(level, crs=crs)
    296         index = frame.index
    297         frame[geo_column_name] = level

/opt/conda/lib/python3.8/site-packages/geopandas/geodataframe.py in _ensure_geometry(data, crs)
     41             return GeoSeries(out, index=data.index, name=data.name)
     42         else:
---> 43             out = from_shapely(data, crs=crs)
     44             return out
     45 

/opt/conda/lib/python3.8/site-packages/geopandas/array.py in from_shapely(data, crs)
    166 
    167     """
--> 168     return GeometryArray(vectorized.from_shapely(data), crs=crs)
    169 
    170 

/opt/conda/lib/python3.8/site-packages/geopandas/_vectorized.py in from_shapely(data)
    131             out.append(None)
    132         else:
--> 133             raise TypeError("Input must be valid geometry objects: {0}".format(geom))
    134 
    135     if compat.USE_PYGEOS:

TypeError: Input must be valid geometry objects:  ((532105.0919998939 182011.23, 532162.4910001159 181867.7629999601, 532248.3160000765 181895.3249998323, 532282.6300000258 181906.4960000554, 532308.6079999561 181915.5200001549, 532303.4919998596 181814.1099998362, 532251.1259998521 181720.0000001175, 532267.7280001113 181643.780999956, 532213.063999875 181477.2030001849, 532282.2499999444 181460.4999999297, 532248.2490000051 181332.0360001744, 532227.687999856 181278.8749999276, 532173.1249999898 181263.453, 532074.3749998672 181338.2970001521, 532080.2499999747 181456.7499999724, 531948.313000044 181471.96900013, 531951.8749998901 181548.0000001803, 531985.8750000516 181583.4999998761, 532052.4999999914 181600.3909998685, 532064.3750001056 181561.5940001571, 532095.5629998141 181577.3510000679, 532077.7050000296 181797.669000078, 532069.8130000822 181825.9059999275, 532021.1880001619 181803.8910001202, 532022.3739999083 181893.4690001195, 532082.8759998722 181911.7809999972, 532105.0919998939 182011.23))

这是我的单元格当前的样子(显然有更多行)

geometry LSOA11CD AvPTAl2015
((532105.0919998939 182011.23, 532162.4910001... E01000001 69.8233
((532746.8130000263 181786.891000028, 532671.... E01000002 83.7820

谢谢!

第二次编辑:


print(ptal_lsoas.geometry.head())
0    POLYGON ((532105.092 182011.230, 532162.491 18...
1    POLYGON ((532746.813 181786.891, 532671.688 18...
2    POLYGON ((532135.145 182198.119, 532158.250 18...
3    POLYGON ((533807.946 180767.770, 533649.063 18...
4    POLYGON ((545122.049 184314.931, 545271.917 18...
Name: geometry, dtype: geometry

import matplotlib as mpl
mpl.use('TkAgg')

%matplotlib inline
import matplotlib.pyplot as plt
import rasterio 
import rasterio.plot
import rasterstats as rs
import pysal as ps
import mapclassify
import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
import matplotlib.cm as cm
import urllib
import zipfile
import re
import os
import shapely
from shapely.geometry import Point

import random 
random.seed(123456789) 

pd.set_option('display.float_format', lambda x: '{:,.4f}'.format(x))

import warnings
warnings.simplefilter('ignore')

import zipfile

if os.path.isdir('data') is not True:
    print("Creating 'data' directory...")
    os.mkdir('data')

url  = 'https://github.com/cusp-london/Spatial-Data-Analysis/blob/master/LDN-LSOAs.zip?raw=true'
path = os.path.join("data","LDN-LSOAs.zip")

r    = urllib.request.urlretrieve(url, path)

z    = zipfile.ZipFile(path)
m    = z.extractall("data")

url  = 'https://github.com/cusp-london/Spatial-Data-Analysis/blob/master/NSSHRP_UNIT_URESPOP.zip?raw=true'
path = os.path.join("data","NSSHRP_UNIT_URESPOP.zip")

r    = urllib.request.urlretrieve(url, path)

url  = 'https://data.london.gov.uk/download/public-transport-accessibility-levels/77d9b319-931e-4090-bf8e-f578938bd352/LSOA2011%20AvPTAI2015.csv'
path = os.path.join("data","LSOA_PTAL.csv")

r    = urllib.request.urlretrieve(url, path)

london = pd.read_csv('LSOA_Data.csv')
ptal = pd.read_csv('LSOA_PTAL.csv')

lsoa_shapes = london [['geometry', 'LSOA11CD']]
lsoa_names = london [['LSOA11NM', 'LSOA11CD']]

lsoa_shapes.head()


geometry    LSOA11CD
0   POLYGON ((532105.0919998939 182011.23, 532162....   E01000001
1   POLYGON ((532746.8130000263 181786.891000028, ...   E01000002
2   POLYGON ((532135.1449999654 182198.1190000199,...   E01000003
3   POLYGON ((533807.9460001207 180767.7700000888,...   E01000005
4   POLYGON ((545122.048999952 184314.931000118, 5...   E01000006


lsoas = london [['geometry','LSOA11NM']]
lsoas = ptal.rename(columns={'LSOA11NM':'LSOA11CD'})
​
lsoas.head()
Out[10]:
LSOA11CD    AvPTAI2015  PTAL    PTAIHigh    PTAILow
0   E01000001   69.8233 6b  97.4435 35.9190
1   E01000002   83.7820 6b  117.9120    66.3503
2   E01000003   41.7417 6b  49.5318 37.3635
3   E01000005   85.8893 6b  120.8470    45.9168
4   E01000006   22.4558 5   34.1054 0.0000


Out[11]:
LSOA11CD    AvPTAI2015  PTAL    PTAIHigh    PTAILow
0   E01000001   69.8233 6b  97.4435 35.9190
1   E01000002   83.7820 6b  117.9120    66.3503
2   E01000003   41.7417 6b  49.5318 37.3635
3   E01000005   85.8893 6b  120.8470    45.9168
4   E01000006   22.4558 5   34.1054 0.0000

ptal_lsoas = pd.merge (lsoa_shapes, ptal, left_on = ['LSOA11CD'],
                  right_on = ['LSOA11CD'],
                  how = 'inner')

ptal_lsoas

geometry    LSOA11CD    AvPTAI2015  PTAL    PTAIHigh    PTAILow
0   POLYGON ((532105.0919998939 182011.23, 532162....   E01000001   69.8233 6b  97.4435 35.9190
1   POLYGON ((532746.8130000263 181786.891000028, ...   E01000002   83.7820 6b  117.9120    66.3503
2   POLYGON ((532135.1449999654 182198.1190000199,...   E01000003   41.7417 6b  49.5318 37.3635
3   POLYGON ((533807.9460001207 180767.7700000888,...   E01000005   85.8893 6b  120.8470    45.9168
4   POLYGON ((545122.048999952 184314.931000118, 5...   E01000006   22.4558 5   34.1054 0.0000
... ... ... ... ... ... ...
4830    POLYGON ((544642.6800000862 179824.6740001431,...   E01033742   3.9532  1b  5.7986  3.0585
4831    POLYGON ((546579.1949997952 181097.8129996927,...   E01033743   3.8174  1b  14.3944 0.0000
4832    POLYGON ((544536.4859999884 179447.1149999507,...   E01033744   10.1709 3   23.5689 3.5750
4833    POLYGON ((546415.7449998577 180152.2700002448,...   E01033745   4.6838  1b  11.4851 0.0000
4834    POLYGON ((538140.0000000072 177313, 538129.176...   E01033746   18.9882 4   24.1235 14.5436
4835 rows × 6 columns

ptal_lsoas

geometry    LSOA11CD    AvPTAI2015  PTAL    PTAIHigh    PTAILow
0   POLYGON ((532105.092 182011.230, 532162.491 18...   E01000001   69.8233 6b  97.4435 35.9190
1   POLYGON ((532746.813 181786.891, 532671.688 18...   E01000002   83.7820 6b  117.9120    66.3503
2   POLYGON ((532135.145 182198.119, 532158.250 18...   E01000003   41.7417 6b  49.5318 37.3635
3   POLYGON ((533807.946 180767.770, 533649.063 18...   E01000005   85.8893 6b  120.8470    45.9168
4   POLYGON ((545122.049 184314.931, 545271.917 18...   E01000006   22.4558 5   34.1054 0.0000
... ... ... ... ... ... ...
4830    POLYGON ((544642.680 179824.674, 544766.313 17...   E01033742   3.9532  1b  5.7986  3.0585
4831    POLYGON ((546579.195 181097.813, 546687.036 18...   E01033743   3.8174  1b  14.3944 0.0000
4832    POLYGON ((544536.486 179447.115, 544602.630 17...   E01033744   10.1709 3   23.5689 3.5750
4833    POLYGON ((546415.745 180152.270, 546320.715 18...   E01033745   4.6838  1b  11.4851 0.0000
4834    POLYGON ((538140.000 177313.000, 538129.177 17...   E01033746   18.9882 4   24.1235 14.5436
4835 rows × 6 columns

ptal_lsoas['geometry']= gpd.GeoSeries.from_wkt(ptal_lsoas['geometry'])

geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-17-2d79f4c9cee1> in <module>
----> 1 ptal_lsoas['geometry']= gpd.GeoSeries.from_wkt(ptal_lsoas['geometry'])
      2 
      3 geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas, geometry='geometry')

/opt/conda/lib/python3.8/site-packages/geopandas/geoseries.py in from_wkt(cls, data, index, crs, **kwargs)
    444         dtype: geometry
    445         """
--> 446         return cls._from_wkb_or_wkb(from_wkt, data, index=index, crs=crs, **kwargs)
    447 
    448     @classmethod

/opt/conda/lib/python3.8/site-packages/geopandas/geoseries.py in _from_wkb_or_wkb(cls, from_wkb_or_wkt_function, data, index, crs, **kwargs)
    457                 index = data.index
    458             data = data.values
--> 459         return cls(from_wkb_or_wkt_function(data, crs=crs), index=index, **kwargs)
    460 
    461     @property

/opt/conda/lib/python3.8/site-packages/geopandas/array.py in from_wkt(data, crs)
    218 
    219     """
--> 220     return GeometryArray(vectorized.from_wkt(data), crs=crs)
    221 
    222 

/opt/conda/lib/python3.8/site-packages/geopandas/_vectorized.py in from_wkt(data)
    194     """
    195     if compat.USE_PYGEOS:
--> 196         return pygeos.from_wkt(data)
    197 
    198     import shapely.wkt

/opt/conda/lib/python3.8/site-packages/pygeos/io.py in from_wkt(geometry, **kwargs)
    158     <pygeos.Geometry POINT (0 0)>
    159     """
--> 160     return lib.from_wkt(geometry, **kwargs)
    161 
    162 

TypeError: Expected bytes, got Polygon

print(ptal_lsoas.geometry.head())

0    POLYGON ((532105.092 182011.230, 532162.491 18...
1    POLYGON ((532746.813 181786.891, 532671.688 18...
2    POLYGON ((532135.145 182198.119, 532158.250 18...
3    POLYGON ((533807.946 180767.770, 533649.063 18...
4    POLYGON ((545122.049 184314.931, 545271.917 18...
Name: geometry, dtype: geometry

你试过这样简单吗?

ptal_lsoas['geometry'] = ptal_lsoas['geometry'].str.replace('POLYGON',"")

我只是猜测,您在 geometry 中有经度和纬度的坐标,它们以逗号分隔,因此不应进行其他 dtype 转换。 如果你只有数字, 添加以下行:

ptal_lsoas['geometry'] = ptal_lsoas['geometry'].astype(int)

听起来你的形状在 "well known text" (aka wkt) format. You can convert a wkt column to a geometry column with geopandas.GeoSeries.from_wkt:

# replace string geometry representations with shapely geometries
ptal_lsoas['geometry'] = gpd.GeoSeries.from_wkt(ptal_lsoas['geometry'])

# initialize GeoDataFrame with the result
# ('geometry' is the default geometry column name)
geo_ptal_lsoas = gpd.GeoDataFrame(ptal_lsoas)

请注意整个字符串,例如需要 POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10)) 才能被 WKT 解析。不要尝试做任何 pre-processing。如果您的列是有效的 wkt 格式,那么整个字符串可以被 from_wkt 解析。如果没有字符串前缀,geopandas/shapely 将不知道数据的几何类型。