使用 python 计算 csv 文件中的列
counting colums in csv file with python
我想分别统计男性和女性的电子邮件帐户我写的代码不能正常工作所以谁能帮我解决这个问题,拜托
这是我的代码提前谢谢你
import csv
mailAcc = {}
femailAcc = {}
with open('1000 Records.csv', 'r') as csv_file:
csv_reader = csv.reader(csv_file)
for i in csv_reader:
email = i[6]
gender = i[5]
doman = email.split('@')[-1]
if doman in mailAcc:
if gender == 'm':
mailAcc[doman] = mailAcc[doman] + 1
else:
mailAcc[doman] = 1
if doman in femailAcc:
if gender == 'F':
femailAcc[doman] = femailAcc[doman] + 1
else:
femailAcc[doman] = 1
print('Mail Email accounts: ', mailAcc)
print('Femail Email Accounts: ', femailAcc)
使用pandas
import pandas as pd
df = pd.read_csv('your_csv_file.csv') # read in csv
df['domain'] = df['email'].apply(lambda x: x[x.index('@')+1:]) # column with just domain
male = {} # setup male dictionary
female = {} # setup female dictionary
# iterate on unique domains to get a count of male/female and populate in dictionaries
for domain in df['domain'].unique():
male[domain] = df[(df['gender']=='M') & (df['domain']==domain)].shape[0]
female[domain] = df[(df['gender']=='F') & (df['domain']==domain)].shape[0]
这可以在 pandas
中完成。由于您的列未命名,因此在阅读 csv
时使用 header=None
并按编号访问列:
import pandas as pd
df = pd.read_csv('1000 Records.csv', header=None)
df['mailhosts'] = df[6].str.split('@').str[-1]
gp = df.groupby(5)
#count e-mail accounts per gender:
print('Female Email Accounts:', gp.get_group('F')['mailhosts'].value_counts())
print('Male Email Accounts:', gp.get_group('M')['mailhosts'].value_counts())
这是一个仅使用标准 Python 模块按域计算男性和女性帐户的解决方案:
import csv
from collections import Counter
males = Counter()
females = Counter()
with open('1000 Records.csv') as f:
records = csv.reader(f)
for record in records:
_, domain = record[6].split('@')
gender = record[5]
if gender.lower() == 'm':
males.update((domain.lower(),))
else:
females.update((domain.lower(),))
print('Total male accounts:', sum(males.values()))
print('Total male accounts by domain')
for k, v in males.items():
print(k, v)
print('Total female accounts:', sum(females.values()))
print('Total female accounts by domain')
for k, v in females.items():
print(k, v)
我想分别统计男性和女性的电子邮件帐户我写的代码不能正常工作所以谁能帮我解决这个问题,拜托 这是我的代码提前谢谢你
import csv
mailAcc = {}
femailAcc = {}
with open('1000 Records.csv', 'r') as csv_file:
csv_reader = csv.reader(csv_file)
for i in csv_reader:
email = i[6]
gender = i[5]
doman = email.split('@')[-1]
if doman in mailAcc:
if gender == 'm':
mailAcc[doman] = mailAcc[doman] + 1
else:
mailAcc[doman] = 1
if doman in femailAcc:
if gender == 'F':
femailAcc[doman] = femailAcc[doman] + 1
else:
femailAcc[doman] = 1
print('Mail Email accounts: ', mailAcc)
print('Femail Email Accounts: ', femailAcc)
使用pandas
import pandas as pd
df = pd.read_csv('your_csv_file.csv') # read in csv
df['domain'] = df['email'].apply(lambda x: x[x.index('@')+1:]) # column with just domain
male = {} # setup male dictionary
female = {} # setup female dictionary
# iterate on unique domains to get a count of male/female and populate in dictionaries
for domain in df['domain'].unique():
male[domain] = df[(df['gender']=='M') & (df['domain']==domain)].shape[0]
female[domain] = df[(df['gender']=='F') & (df['domain']==domain)].shape[0]
这可以在 pandas
中完成。由于您的列未命名,因此在阅读 csv
时使用 header=None
并按编号访问列:
import pandas as pd
df = pd.read_csv('1000 Records.csv', header=None)
df['mailhosts'] = df[6].str.split('@').str[-1]
gp = df.groupby(5)
#count e-mail accounts per gender:
print('Female Email Accounts:', gp.get_group('F')['mailhosts'].value_counts())
print('Male Email Accounts:', gp.get_group('M')['mailhosts'].value_counts())
这是一个仅使用标准 Python 模块按域计算男性和女性帐户的解决方案:
import csv
from collections import Counter
males = Counter()
females = Counter()
with open('1000 Records.csv') as f:
records = csv.reader(f)
for record in records:
_, domain = record[6].split('@')
gender = record[5]
if gender.lower() == 'm':
males.update((domain.lower(),))
else:
females.update((domain.lower(),))
print('Total male accounts:', sum(males.values()))
print('Total male accounts by domain')
for k, v in males.items():
print(k, v)
print('Total female accounts:', sum(females.values()))
print('Total female accounts by domain')
for k, v in females.items():
print(k, v)