使用 python 计算 csv 文件中的列

counting colums in csv file with python

我想分别统计男性和女性的电子邮件帐户我写的代码不能正常工作所以谁能帮我解决这个问题,拜托 这是我的代码提前谢谢你

    import csv

mailAcc = {}
femailAcc = {}

with open('1000 Records.csv', 'r') as csv_file:
    csv_reader = csv.reader(csv_file)
    for i in csv_reader:
        email = i[6]
        gender = i[5]
        doman = email.split('@')[-1]
        if doman in mailAcc:
            if gender == 'm':
                 mailAcc[doman] = mailAcc[doman] + 1
        else:
            mailAcc[doman] = 1

        if doman in femailAcc:
            if gender == 'F':
                femailAcc[doman] = femailAcc[doman] + 1
        else:
            femailAcc[doman] = 1
            
    print('Mail Email accounts: ', mailAcc)
    print('Femail Email Accounts: ', femailAcc)

使用pandas

import pandas as pd

df = pd.read_csv('your_csv_file.csv') # read in csv
df['domain'] = df['email'].apply(lambda x: x[x.index('@')+1:]) # column with just domain

male = {} # setup male dictionary
female = {} # setup female dictionary

# iterate on unique domains to get a count of male/female and populate in dictionaries
for domain in df['domain'].unique():   
    male[domain] = df[(df['gender']=='M') & (df['domain']==domain)].shape[0]
    female[domain] = df[(df['gender']=='F') & (df['domain']==domain)].shape[0]

这可以在 pandas 中完成。由于您的列未命名,因此在阅读 csv 时使用 header=None 并按编号访问列:

import pandas as pd

df = pd.read_csv('1000 Records.csv', header=None)
df['mailhosts'] = df[6].str.split('@').str[-1]

gp = df.groupby(5)

#count e-mail accounts per gender:
print('Female Email Accounts:', gp.get_group('F')['mailhosts'].value_counts())
print('Male Email Accounts:', gp.get_group('M')['mailhosts'].value_counts())

这是一个仅使用标准 Python 模块按域计算男性和女性帐户的解决方案:

import csv
from collections import Counter

males = Counter()
females = Counter()

with open('1000 Records.csv') as f:
    records = csv.reader(f)
    for record in records:
        _, domain = record[6].split('@')
        gender = record[5]
        if gender.lower() == 'm':
            males.update((domain.lower(),))
        else:
            females.update((domain.lower(),))

    print('Total male accounts:', sum(males.values()))
    print('Total male accounts by domain')
    for k, v in males.items():
        print(k, v)

    print('Total female accounts:', sum(females.values()))
    print('Total female accounts by domain')
    for k, v in females.items():
        print(k, v)