使用 python 计算 CSV 文件中的条目
Counting entries in a CSV file with python
我的 CSV 包含有关个人及其死亡日期和死亡地区的信息。我也在使用 geopy 来定位这个区域。这部分工作 但我真的不想有个人,但 我想要每天和每个地区每个事件的总人数。 所以它给了我一个 CSV有关死亡人数、地理位置、日期和 area/province 发生的信息。这是我的 csv 的片段:
rowid,Name,Status,Sex,Province,Area,Date_of_Death,Cause_of_Death
1,Hasan Ali Saad al-Hareeri ,Civilian,Adult - Male,Daraa,Sawara,2015-12-10,Warplane shelling
2,Riham Adnan Swais ,Civilian,Adult - Female,Homs,Telbeiseh,2015-12-10,Shelling
3,Ammar Ahmad al-Njoam ,Civilian,Adult - Male,Aleppo,Jarablus,2015-12-10,Explosion
4,Yaseen Slaiman al-Salkhadi ,Civilian,Adult - Male,Daraa,Jassim,2015-12-10,Warplane shelling
5,Ayman Noman Qanatra ,Civilian,Adult - Male,Daraa,Jassim,2015-12-10,Warplane shelling
6,Abdul Kareem Mohammad al-Tamki ,Civilian,Adult - Male,Daraa,Sheikh Miskeen,2015-12-10,Warplane shelling
7,Ahmad Mohammad al-Tamki ,Civilian,Child - Male,Daraa,Sheikh Miskeen,2015-12-10,Warplane shelling
8,Hamze Mohammad al-Tamki ,Civilian,Adult - Male,Daraa,Sheikh Miskeen,2015-12-10,Warplane shelling
12,Ahmad Ibraheem al-Sayed Omar ,Civilian,Adult - Male,Aleppo,,2015-12-10,Warplane shelling
13,Unidentified ,Civilian,Child - Male,Aleppo,Kafrnaya,2015-12-10,Warplane shelling
14,Unidentified 1 ,Civilian,Adult - Female,Aleppo,Bazzaa,2015-12-10,Warplane shelling
15,Unidentified 2 ,Civilian,Child - Male,Aleppo,Bazzaa,2015-12-10,Warplane shelling
16,Unidentified 3 ,Civilian,Child - Male,Aleppo,Bazzaa,2015-12-10,Warplane shelling
17,Mohammad Ahmad al-Farhoud ,Civilian,Adult - Male,Idlib,Telminis,2015-12-10,Shooting
23,wife of Faisal al-Ahmad al-Khaleefe ,Civilian,Adult - Female,Deir Ezzor,Qourieh,2015-12-10,Warplane shelling
24,Unidentified ,Civilian,Adult - Male,Deir Ezzor,Takaya,2015-12-10,Kidnapping - Execution
25,Unidentified 1 ,Civilian,Adult - Male,Deir Ezzor,Tabya Jazera village,2015-12-10,Kidnapping - Execution
26,Unidentified 2 ,Civilian,Adult - Male,Deir Ezzor,Tabya Jazera village,2015-12-10,Kidnapping - Execution
27,Unidentified 3 ,Civilian,Adult - Male,Deir Ezzor,Tabya Jazera village,2015-12-10,Kidnapping - Execution
28,Unidentified 4 ,Civilian,Adult - Male,Deir Ezzor,Tabya Jazera village,2015-12-10,Kidnapping - Execution
29,Unidentified 5 ,Civilian,Adult - Male,Deir Ezzor,Tabya Jazera village,2015-12-10,Kidnapping - Execution
32,Amoun Murad ,Civilian,Adult - Female,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
33,Kafa Mezweq ,Civilian,Adult - Female,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
34,Hala Mohammad ,Civilian,Adult - Female,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
35,Ahmad Mezweq ,Civilian,Child - Male,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
36,Ahlam Mezweq ,Civilian,Child - Female,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
37,Adnan Hussain Haj Kheder ,Civilian,Child - Male,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
38,Wife of Hussain Haj Kheder ,Civilian,Adult - Female,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
39,daughter of Hussain Haj Kheder 1 ,Civilian,Child - Female,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
40,Muhammad Muhammad al-Aoutani ,Civilian,Adult - Male,Damascus Suburbs,Hamouria,2015-12-09,Warplane shelling
41,Ibrahim al-Shami ,Civilian,Adult - Male,Damascus Suburbs,Hamouria,2015-12-09,Warplane shelling
这是我的代码
import csv
from geopy.geocoders import GoogleV3
geolocator = GoogleV3() #here some parameters are needed
lookups = {}
valueCounter = 0
init= True
with open('151213_Martyrs_filtered_civilian_2015_with_country.csv', 'rb') as csvinput:
with open('151213_output_martyrs_final.csv', 'w') as csvoutput:
output_fieldnames = ['Value','Country','Province','Area','Date_of_Death','Cause_of_Death','Latitude','Longitude']
writer = csv.DictWriter(csvoutput, delimiter=',', fieldnames=output_fieldnames)
reader = csv.DictReader(csvinput)
for row in reader:
if init == True :
prevProvince = row['Province']
prevArea = row['Area']
prevDate = row['Date_of_Death']
prevCause = row['Cause_of_Death']
init = False
if prevArea == row['Area'] and prevCause == row['Cause_of_Death'] and prevDate['Date_of_Death']:
valueCounter +=1
if row['Province'].split(' ', 1) == 'Damascus':
rowProv = "Damascus"
else:
rowProv = row['Province']
#here you have to replace the dict item by your csv column names
query = ','.join(str(x) for x in (rowProv, row['Area']))
try:
address, (latitude, longitude) = lookups[query]
except KeyError: # new lookup
try:
query = ','.join(str(x) for x in (rowProv, rowProv))
address, (latitude, longitude) = lookups[query]
# print('lookup')
except KeyError: # search
try:
query = ','.join(str(x) for x in (rowProv, row['Area']))
address, (latitude, longitude) = geolocator.geocode(query)
except: # error
query = ','.join(str(x) for x in (rowProv, rowProv))
address, (latitude, longitude) = geolocator.geocode(query)
lookups[query] = address, (latitude, longitude)
print(query)
else: #write down and save previous Row data
output_row = {}
output_row['Value'] = valueCounter
output_row['Province'] = prevProvince
output_row['Area'] = prevArea
output_row['Date_of_Death'] = prevDate
output_row['Cause_of_Death'] = prevCause
output_row['Latitude'] = latitude
output_row['Longitude'] = longitude
writer.writerow(output_row)
prevProvince = row['Province']
prevArea = row['Area']
prevDate = row['Date_of_Death']
prevCause = row['Cause_of_Death']
valueCounter = 1
if row['Province'].split(' ', 1) == 'Damascus':
rowProv = "Damascus"
else:
rowProv = row['Province']
#here you have to replace the dict item by your csv column names
query = ','.join(str(x) for x in (rowProv, row['Area']))
try:
address, (latitude, longitude) = lookups[query]
except KeyError: # new lookup
try:
query = ','.join(str(x) for x in (rowProv, rowProv))
address, (latitude, longitude) = lookups[query]
# print('lookup')
except KeyError: # search
try:
query = ','.join(str(x) for x in (rowProv, row['Area']))
address, (latitude, longitude) = geolocator.geocode(query)
except: # error
query = ','.join(str(x) for x in (rowProv, rowProv))
address, (latitude, longitude) = geolocator.geocode(query)
lookups[query] = address, (latitude, longitude)
print(query)
我没有看到什么是因为它不起作用?
终端给我这条信息
Test_Whosebug_cached Ian$ python geopy_script_bodycount.py
Traceback (most recent call last):
File "geopy_script_bodycount.py", line 22, in <module>
if prevArea == row['Area'] and prevCause == row['Cause_of_Death'] and prevDate['Date_of_Death']:
TypeError: string indices must be integers, not str
我不确定你在那一行试图做什么,但问题出在跟踪中提到的那一行:
if prevArea == row['Area'] and prevCause == row['Cause_of_Death'] and prevDate['Date_of_Death']:
这里的prevDate
是一个字符串,而不是像row
变量那样的字典。你可能想比较它的东西,就像在其他条款中一样。
使用 pandas
、python 数据处理和分析库,做这类事情会变得容易得多。
这是一个很好的介绍:http://youtube.com/watch?v=otCriSKVV_8
这会让你开始解决你的问题:
import pandas as pd
data = pd.read_csv(
'data.csv',
index_col='rowid',
parse_dates=['Date_of_Death'],
)
# How many died at each day?
print('Deaths by date')
print(data.groupby('Date_of_Death').size())
# How many per area:
print('\nDeaths by Area')
print(data.groupby('Area').size())
# Or both:
print('\nPer date and Area')
print(data.groupby(['Date_of_Death', 'Area']).size())
我的 CSV 包含有关个人及其死亡日期和死亡地区的信息。我也在使用 geopy 来定位这个区域。这部分工作
rowid,Name,Status,Sex,Province,Area,Date_of_Death,Cause_of_Death
1,Hasan Ali Saad al-Hareeri ,Civilian,Adult - Male,Daraa,Sawara,2015-12-10,Warplane shelling
2,Riham Adnan Swais ,Civilian,Adult - Female,Homs,Telbeiseh,2015-12-10,Shelling
3,Ammar Ahmad al-Njoam ,Civilian,Adult - Male,Aleppo,Jarablus,2015-12-10,Explosion
4,Yaseen Slaiman al-Salkhadi ,Civilian,Adult - Male,Daraa,Jassim,2015-12-10,Warplane shelling
5,Ayman Noman Qanatra ,Civilian,Adult - Male,Daraa,Jassim,2015-12-10,Warplane shelling
6,Abdul Kareem Mohammad al-Tamki ,Civilian,Adult - Male,Daraa,Sheikh Miskeen,2015-12-10,Warplane shelling
7,Ahmad Mohammad al-Tamki ,Civilian,Child - Male,Daraa,Sheikh Miskeen,2015-12-10,Warplane shelling
8,Hamze Mohammad al-Tamki ,Civilian,Adult - Male,Daraa,Sheikh Miskeen,2015-12-10,Warplane shelling
12,Ahmad Ibraheem al-Sayed Omar ,Civilian,Adult - Male,Aleppo,,2015-12-10,Warplane shelling
13,Unidentified ,Civilian,Child - Male,Aleppo,Kafrnaya,2015-12-10,Warplane shelling
14,Unidentified 1 ,Civilian,Adult - Female,Aleppo,Bazzaa,2015-12-10,Warplane shelling
15,Unidentified 2 ,Civilian,Child - Male,Aleppo,Bazzaa,2015-12-10,Warplane shelling
16,Unidentified 3 ,Civilian,Child - Male,Aleppo,Bazzaa,2015-12-10,Warplane shelling
17,Mohammad Ahmad al-Farhoud ,Civilian,Adult - Male,Idlib,Telminis,2015-12-10,Shooting
23,wife of Faisal al-Ahmad al-Khaleefe ,Civilian,Adult - Female,Deir Ezzor,Qourieh,2015-12-10,Warplane shelling
24,Unidentified ,Civilian,Adult - Male,Deir Ezzor,Takaya,2015-12-10,Kidnapping - Execution
25,Unidentified 1 ,Civilian,Adult - Male,Deir Ezzor,Tabya Jazera village,2015-12-10,Kidnapping - Execution
26,Unidentified 2 ,Civilian,Adult - Male,Deir Ezzor,Tabya Jazera village,2015-12-10,Kidnapping - Execution
27,Unidentified 3 ,Civilian,Adult - Male,Deir Ezzor,Tabya Jazera village,2015-12-10,Kidnapping - Execution
28,Unidentified 4 ,Civilian,Adult - Male,Deir Ezzor,Tabya Jazera village,2015-12-10,Kidnapping - Execution
29,Unidentified 5 ,Civilian,Adult - Male,Deir Ezzor,Tabya Jazera village,2015-12-10,Kidnapping - Execution
32,Amoun Murad ,Civilian,Adult - Female,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
33,Kafa Mezweq ,Civilian,Adult - Female,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
34,Hala Mohammad ,Civilian,Adult - Female,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
35,Ahmad Mezweq ,Civilian,Child - Male,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
36,Ahlam Mezweq ,Civilian,Child - Female,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
37,Adnan Hussain Haj Kheder ,Civilian,Child - Male,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
38,Wife of Hussain Haj Kheder ,Civilian,Adult - Female,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
39,daughter of Hussain Haj Kheder 1 ,Civilian,Child - Female,Idlib,Jisr Shagour: Mraand,2015-12-09,Warplane shelling
40,Muhammad Muhammad al-Aoutani ,Civilian,Adult - Male,Damascus Suburbs,Hamouria,2015-12-09,Warplane shelling
41,Ibrahim al-Shami ,Civilian,Adult - Male,Damascus Suburbs,Hamouria,2015-12-09,Warplane shelling
这是我的代码
import csv
from geopy.geocoders import GoogleV3
geolocator = GoogleV3() #here some parameters are needed
lookups = {}
valueCounter = 0
init= True
with open('151213_Martyrs_filtered_civilian_2015_with_country.csv', 'rb') as csvinput:
with open('151213_output_martyrs_final.csv', 'w') as csvoutput:
output_fieldnames = ['Value','Country','Province','Area','Date_of_Death','Cause_of_Death','Latitude','Longitude']
writer = csv.DictWriter(csvoutput, delimiter=',', fieldnames=output_fieldnames)
reader = csv.DictReader(csvinput)
for row in reader:
if init == True :
prevProvince = row['Province']
prevArea = row['Area']
prevDate = row['Date_of_Death']
prevCause = row['Cause_of_Death']
init = False
if prevArea == row['Area'] and prevCause == row['Cause_of_Death'] and prevDate['Date_of_Death']:
valueCounter +=1
if row['Province'].split(' ', 1) == 'Damascus':
rowProv = "Damascus"
else:
rowProv = row['Province']
#here you have to replace the dict item by your csv column names
query = ','.join(str(x) for x in (rowProv, row['Area']))
try:
address, (latitude, longitude) = lookups[query]
except KeyError: # new lookup
try:
query = ','.join(str(x) for x in (rowProv, rowProv))
address, (latitude, longitude) = lookups[query]
# print('lookup')
except KeyError: # search
try:
query = ','.join(str(x) for x in (rowProv, row['Area']))
address, (latitude, longitude) = geolocator.geocode(query)
except: # error
query = ','.join(str(x) for x in (rowProv, rowProv))
address, (latitude, longitude) = geolocator.geocode(query)
lookups[query] = address, (latitude, longitude)
print(query)
else: #write down and save previous Row data
output_row = {}
output_row['Value'] = valueCounter
output_row['Province'] = prevProvince
output_row['Area'] = prevArea
output_row['Date_of_Death'] = prevDate
output_row['Cause_of_Death'] = prevCause
output_row['Latitude'] = latitude
output_row['Longitude'] = longitude
writer.writerow(output_row)
prevProvince = row['Province']
prevArea = row['Area']
prevDate = row['Date_of_Death']
prevCause = row['Cause_of_Death']
valueCounter = 1
if row['Province'].split(' ', 1) == 'Damascus':
rowProv = "Damascus"
else:
rowProv = row['Province']
#here you have to replace the dict item by your csv column names
query = ','.join(str(x) for x in (rowProv, row['Area']))
try:
address, (latitude, longitude) = lookups[query]
except KeyError: # new lookup
try:
query = ','.join(str(x) for x in (rowProv, rowProv))
address, (latitude, longitude) = lookups[query]
# print('lookup')
except KeyError: # search
try:
query = ','.join(str(x) for x in (rowProv, row['Area']))
address, (latitude, longitude) = geolocator.geocode(query)
except: # error
query = ','.join(str(x) for x in (rowProv, rowProv))
address, (latitude, longitude) = geolocator.geocode(query)
lookups[query] = address, (latitude, longitude)
print(query)
我没有看到什么是因为它不起作用?
终端给我这条信息
Test_Whosebug_cached Ian$ python geopy_script_bodycount.py
Traceback (most recent call last):
File "geopy_script_bodycount.py", line 22, in <module>
if prevArea == row['Area'] and prevCause == row['Cause_of_Death'] and prevDate['Date_of_Death']:
TypeError: string indices must be integers, not str
我不确定你在那一行试图做什么,但问题出在跟踪中提到的那一行:
if prevArea == row['Area'] and prevCause == row['Cause_of_Death'] and prevDate['Date_of_Death']:
这里的prevDate
是一个字符串,而不是像row
变量那样的字典。你可能想比较它的东西,就像在其他条款中一样。
使用 pandas
、python 数据处理和分析库,做这类事情会变得容易得多。
这是一个很好的介绍:http://youtube.com/watch?v=otCriSKVV_8
这会让你开始解决你的问题:
import pandas as pd
data = pd.read_csv(
'data.csv',
index_col='rowid',
parse_dates=['Date_of_Death'],
)
# How many died at each day?
print('Deaths by date')
print(data.groupby('Date_of_Death').size())
# How many per area:
print('\nDeaths by Area')
print(data.groupby('Area').size())
# Or both:
print('\nPer date and Area')
print(data.groupby(['Date_of_Death', 'Area']).size())