从包含字典的 Pandas 列中提取信息以创建新的 Pandas 列
Extracting info from a Pandas column containing a dict to create a new Pandas column
数据框df2中的一列如下:
from pandas import DataFrame, Series
df = DataFrame(data)
df2 = df.ix[0:, [17, 18, 19, 13]]
df2.columns = ['a', 'b', 'c', 'd']
df2 = df2.rename(columns={'a':'name', 'b':'stuff', 'c':'scanner', 'd': 'geo'})
print df2['geo']
{u'type': u'Point', u'coordinates': [40.70477818, -74.18183193]}
{u'type': u'Point', u'coordinates': [25.78915569, -80.1845325]}
{u'type': u'Point', u'coordinates': [35.20042459, -101.91661173]}
{u'type': u'Point', u'coordinates': [32.51591725, -92.15884093]}
{u'type': u'Point', u'coordinates': [43.35457272, -79.78553736]}
{u'type': u'Point', u'coordinates': [43.35460763, -79.78536878]}
{u'type': u'Point', u'coordinates': [47.80446395, 16.16058828]}
我想提取第一个数字作为纬度(新列),第二个数字作为经度。已经尝试了很长时间,但似乎无法找出正确的语法。我的非工作代码:
df2['latitude'] = df2['geo']['coordinates'][0]
KeyError: 'coordinates'
编辑:我将 unicode 转换为 dict 的工作解决方案,仍然想知道是否有任何更快的单行代码 python 代码可以做同样的事情...
def uni_to_dict_lat(row):
uni_string = row['geo']
to_dict = ast.literal_eval(uni_string)
return to_dict['coordinates'][0]
def uni_to_dict_lon(row):
uni_string = row['geo']
to_dict = ast.literal_eval(uni_string)
return to_dict['coordinates'][1]
df2['lat'] = df2.apply(uni_to_dict_lat, axis=1)
df2['lon'] = df2.apply(uni_to_dict_lon, axis=1)
您可以使用
df2['latitude'] = df2['geo'].apply(lambda x: x['coordinates'][0])
你能试试这个吗?
import pandas as pd
import numpy as np
# replicate your data structure
# ===================================================================
a, b, c = np.arange(7), np.arange(7), np.arange(7)
d = [{u'type': u'Point', u'coordinates': [40.70477818, -74.18183193]},
{u'type': u'Point', u'coordinates': [25.78915569, -80.1845325]},
{u'type': u'Point', u'coordinates': [35.20042459, -101.91661173]},
{u'type': u'Point', u'coordinates': [32.51591725, -92.15884093]},
{u'type': u'Point', u'coordinates': [43.35457272, -79.78553736]},
{u'type': u'Point', u'coordinates': [43.35460763, -79.78536878]},
{u'type': u'Point', u'coordinates': [47.80446395, 16.16058828]}]
df2 = pd.DataFrame(dict(a=a,b=b,c=c,d=d))
df2 = df2.rename(columns={'a':'name', 'b':'stuff', 'c':'scanner', 'd': 'geo'})
Out[54]:
name stuff scanner geo
0 0 0 0 {u'type': u'Point', u'coordinates': [40.70477818, -74.18183193]}
1 1 1 1 {u'type': u'Point', u'coordinates': [25.78915569, -80.1845325]}
2 2 2 2 {u'type': u'Point', u'coordinates': [35.20042459, -101.91661173]}
3 3 3 3 {u'type': u'Point', u'coordinates': [32.51591725, -92.15884093]}
4 4 4 4 {u'type': u'Point', u'coordinates': [43.35457272, -79.78553736]}
5 5 5 5 {u'type': u'Point', u'coordinates': [43.35460763, -79.78536878]}
6 6 6 6 {u'type': u'Point', u'coordinates': [47.80446395, 16.16058828]}
# do list comprehension
# ===================================================================
df2['latitude'] = [x['coordinates'][0] for x in df2['geo'].values]
df2['longitude'] = [x['coordinates'][1] for x in df2['geo'].values]
df2.drop('geo', axis=1)
Out[57]:
name stuff scanner latitude longitude
0 0 0 0 40.7048 -74.1818
1 1 1 1 25.7892 -80.1845
2 2 2 2 35.2004 -101.9166
3 3 3 3 32.5159 -92.1588
4 4 4 4 43.3546 -79.7855
5 5 5 5 43.3546 -79.7854
6 6 6 6 47.8045 16.1606
数据框df2中的一列如下:
from pandas import DataFrame, Series
df = DataFrame(data)
df2 = df.ix[0:, [17, 18, 19, 13]]
df2.columns = ['a', 'b', 'c', 'd']
df2 = df2.rename(columns={'a':'name', 'b':'stuff', 'c':'scanner', 'd': 'geo'})
print df2['geo']
{u'type': u'Point', u'coordinates': [40.70477818, -74.18183193]}
{u'type': u'Point', u'coordinates': [25.78915569, -80.1845325]}
{u'type': u'Point', u'coordinates': [35.20042459, -101.91661173]}
{u'type': u'Point', u'coordinates': [32.51591725, -92.15884093]}
{u'type': u'Point', u'coordinates': [43.35457272, -79.78553736]}
{u'type': u'Point', u'coordinates': [43.35460763, -79.78536878]}
{u'type': u'Point', u'coordinates': [47.80446395, 16.16058828]}
我想提取第一个数字作为纬度(新列),第二个数字作为经度。已经尝试了很长时间,但似乎无法找出正确的语法。我的非工作代码:
df2['latitude'] = df2['geo']['coordinates'][0]
KeyError: 'coordinates'
编辑:我将 unicode 转换为 dict 的工作解决方案,仍然想知道是否有任何更快的单行代码 python 代码可以做同样的事情...
def uni_to_dict_lat(row):
uni_string = row['geo']
to_dict = ast.literal_eval(uni_string)
return to_dict['coordinates'][0]
def uni_to_dict_lon(row):
uni_string = row['geo']
to_dict = ast.literal_eval(uni_string)
return to_dict['coordinates'][1]
df2['lat'] = df2.apply(uni_to_dict_lat, axis=1)
df2['lon'] = df2.apply(uni_to_dict_lon, axis=1)
您可以使用
df2['latitude'] = df2['geo'].apply(lambda x: x['coordinates'][0])
你能试试这个吗?
import pandas as pd
import numpy as np
# replicate your data structure
# ===================================================================
a, b, c = np.arange(7), np.arange(7), np.arange(7)
d = [{u'type': u'Point', u'coordinates': [40.70477818, -74.18183193]},
{u'type': u'Point', u'coordinates': [25.78915569, -80.1845325]},
{u'type': u'Point', u'coordinates': [35.20042459, -101.91661173]},
{u'type': u'Point', u'coordinates': [32.51591725, -92.15884093]},
{u'type': u'Point', u'coordinates': [43.35457272, -79.78553736]},
{u'type': u'Point', u'coordinates': [43.35460763, -79.78536878]},
{u'type': u'Point', u'coordinates': [47.80446395, 16.16058828]}]
df2 = pd.DataFrame(dict(a=a,b=b,c=c,d=d))
df2 = df2.rename(columns={'a':'name', 'b':'stuff', 'c':'scanner', 'd': 'geo'})
Out[54]:
name stuff scanner geo
0 0 0 0 {u'type': u'Point', u'coordinates': [40.70477818, -74.18183193]}
1 1 1 1 {u'type': u'Point', u'coordinates': [25.78915569, -80.1845325]}
2 2 2 2 {u'type': u'Point', u'coordinates': [35.20042459, -101.91661173]}
3 3 3 3 {u'type': u'Point', u'coordinates': [32.51591725, -92.15884093]}
4 4 4 4 {u'type': u'Point', u'coordinates': [43.35457272, -79.78553736]}
5 5 5 5 {u'type': u'Point', u'coordinates': [43.35460763, -79.78536878]}
6 6 6 6 {u'type': u'Point', u'coordinates': [47.80446395, 16.16058828]}
# do list comprehension
# ===================================================================
df2['latitude'] = [x['coordinates'][0] for x in df2['geo'].values]
df2['longitude'] = [x['coordinates'][1] for x in df2['geo'].values]
df2.drop('geo', axis=1)
Out[57]:
name stuff scanner latitude longitude
0 0 0 0 40.7048 -74.1818
1 1 1 1 25.7892 -80.1845
2 2 2 2 35.2004 -101.9166
3 3 3 3 32.5159 -92.1588
4 4 4 4 43.3546 -79.7855
5 5 5 5 43.3546 -79.7854
6 6 6 6 47.8045 16.1606