Python - 查找提取列表中的相同值并计算连续值之间的差异
Python - finding extracts the same values in a list and calculate the differences between consecutive values
我正在学习 python Dataquest 并尝试解决这个问题。
编写一个函数,提取不同年份的相同值并计算连续值之间的差异,以显示出生人数是增加还是减少。
例如,从 1994 年到 2003 年,每年星期六出生的人数有何变化?
我正在尝试在 Jupyter 中解决这个问题。我是 python 的新手,我不确定如何开始解决这个问题。
此处输入的数据为 CSV 格式:US births
# coding: utf-8
# In[1]:
text_file = open("US_births_1994-2003_CDC_NCHS.csv", "r").read()
line_split = text_file.split("\n")
line_split
# In[2]:
def read_csv(filename):
text = open(filename, "r").read()
string_list = text.split('\n')[1:]
final_list = []
for row in string_list:
int_fields = []
string_fields = row.split(',')
for item in string_fields:
int_fields.append(int(item))
final_list.append(int_fields)
return(final_list)
cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")
cdc_list[0:10]
# In[3]:
def months_births(filename):
births_per_month = dict()
for item in filename:
num_month = int(item[1])
num_births = int(item[4])
if num_month in births_per_month:
births_per_month[num_month] += num_births
else:
births_per_month[num_month] = num_births
return(births_per_month)
cdc_month_births = months_births(cdc_list)
cdc_month_births
# In[4]:
def dow_births(filename):
sum_births = dict()
for item in filename:
day_week = int(item[3])
day_birth = int(item[4])
if day_week in sum_births:
sum_births[day_week] += day_birth
else:
sum_births[day_week] = day_birth
return(sum_births)
cdc_day_births = dow_births(cdc_list)
cdc_day_births
# In[30]:
def calc_counts(data, column):
sum_dict = dict()
for item in data:
col_num = item[column]
birth_count = int(item[4])
if col_num in sum_dict:
sum_dict[col_num] += birth_count
else:
sum_dict[col_num] = birth_count
return(sum_dict)
cdc_year_births = calc_counts(cdc_list, 0)
cdc_month_births = calc_counts(cdc_list, 1)
cdc_dom_births = calc_counts(cdc_list, 2)
cdc_dow_births = calc_counts(cdc_list, 3)
# In[31]:
cdc_year_births
# In[32]:
cdc_month_births
# In[33]:
cdc_dom_births
# In[34]:
cdc_dow_births
# In[6]:
def min_max_dict(filename, request):
if request == "max":
max_value = max(filename, key=filename.get)
return(filename[max_value])
else:
min_value = min(filename, key=filename.get)
return(filename[min_value])
max_value = min_max_dict(cdc_year_births, "max")
print("max: ",max_value)
min_value = min_max_dict(cdc_year_births, "min")
print("min: ",min_value)
# In[36]:
def diff_in_values(filename):
final_dict = dict()
seen_set = set()
unique_values = list()
for item in filename:
year_count = int(item[0])
birth_count = int(item[4])
day_of_week = int(item[3])
if birth_count not in seen_set:
unique_values.append(birth_count)
seen_set.add(birth_count)
return(seen_set)
result = diff_in_values(cdc_list)
result
我也在做同一个项目。我已经分享了您需要的部分代码。我在 GitHub 上有项目的 .ipynb 文件。您可能还想查看我的函数结果。干杯!
def read_csv(birth_data_file):
raw_data = open(birth_data_file, "r").read()
raw_data = raw_data.split("\n")
string_list = raw_data[1:]
final_list = []
for data in string_list:
int_fields = []
string_fields = data.split(",")
for string_field in string_fields:
field = int(string_field)
int_fields.append(field)
final_list.append(int_fields)
return(final_list)
def calc_counts(data, column):
births_counts = {}
if not column > 0 and column <= 4:
return("'column' must be either 1, 2, 3, or 4")
else:
for instance in data:
field = instance[column-1]
births = instance[4]
if field in births_counts.keys():
births_counts[field] += births
else:
births_counts[field] = births
return(births_counts)
# Write a function that extracts the same values across years and calculates the
# differences between consecutive values to show if number of
# births is increasing or decreasing.
def check_birth_growth(birth_data_file):
cdc_list = read_csv(birth_data_file)
cdc_year_births = calc_counts(cdc_list, 1)
previous_year_birth = 0
previous_birth_diff = 0
for year, total_births in cdc_year_births.items():
current_year_birth = int(total_births)
if previous_year_birth == 0:
growth_status = "Growth of births in {} not available.".format(year)
print(growth_status)
previous_year_birth = current_year_birth
else:
if current_year_birth > previous_year_birth:
growth_status = "Births increased in {}.".format(year)
print(growth_status)
previous_year_birth = current_year_birth
elif current_year_birth < previous_year_birth:
growth_status = "Births decreased in {}.".format(year)
print(growth_status)
previous_year_birth = current_year_birth
elif current_year_birth == previous_year_birth:
growth_status = "Births in {} was same as previous year.".format(year)
print(growth_status)
previous_year_birth = current_year_birth
def calc_diff(filename, date_one, date_two, column, column_value):
birth_rate_result = []
previous_birth_rate = 0
for row in filename:
year = row[0]
current_birth_rate = row[4]
time_unit = row[column]
if year in range(date_one,date_two):
if time_unit is column_value:
birth_rate_diff = (current_birth_rate - previous_birth_rate)
if birth_rate_diff > 0:
growth_status = "increased"
previous_birth_rate = current_birth_rate
elif birth_rate_diff < 0:
growth_status = "decreased"
previous_birth_rate = current_birth_rate
elif birth_rate_diff == 0:
growth_status = "static"
previous_birth_rate = current_birth_rate
birth_rate_result.append([birth_rate_diff, growth_status,row[0]])
return birth_rate_result
我刚开始学习 python,你能告诉我如何 运行 这些代码示例 (birth_diffrence = calc_diff(cdc_list, 4,4,0,4)) 没有弹出答案。
我相信上面的代码只统计了连续年份、时期之间的出生差异,没有给不同参数的机会——例如,一月或星期日的出生差异
在下面的函数中,您可以执行此操作,例如查看星期一出生的年份差异,输入:列 = 3 和 值 = 1
def delta_year(input_list, column, value): # Column to loop over, value to compare over years
column_year_count = {} # Create dictionary to get attribute yearly count
for each in input_list:
attribute = each[column]
year = each[0]
births = each[4]
if attribute == value:
if year in column_year_count:
column_year_count[year] += births
else:
column_year_count[year] = births
column_year_count_list = [] # Create list to order years to get consecutive years
for key, value in column_year_count.items():
temp = [key,value]
column_year_count_list.append(temp)
column_year_delta = {} # Create dictionary to insert ordered years & delta values
for i in range(0,len(column_year_count_list)): # Loop over list index
for j in range(0,len(column_year_count_list)): # Loop over second list index
delta_year = column_year_count_list[j][0] - column_year_count_list[i][0] # Tally delta between years
if delta_year == 1: # Select consecutive years only
delta_year_births = column_year_count_list[j][1] - column_year_count_list[i][1]
column_year_delta[column_year_count_list[j][0]] = delta_year_births
return column_year_delta
我正在学习 python Dataquest 并尝试解决这个问题。
编写一个函数,提取不同年份的相同值并计算连续值之间的差异,以显示出生人数是增加还是减少。 例如,从 1994 年到 2003 年,每年星期六出生的人数有何变化?
我正在尝试在 Jupyter 中解决这个问题。我是 python 的新手,我不确定如何开始解决这个问题。
此处输入的数据为 CSV 格式:US births
# coding: utf-8
# In[1]:
text_file = open("US_births_1994-2003_CDC_NCHS.csv", "r").read()
line_split = text_file.split("\n")
line_split
# In[2]:
def read_csv(filename):
text = open(filename, "r").read()
string_list = text.split('\n')[1:]
final_list = []
for row in string_list:
int_fields = []
string_fields = row.split(',')
for item in string_fields:
int_fields.append(int(item))
final_list.append(int_fields)
return(final_list)
cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")
cdc_list[0:10]
# In[3]:
def months_births(filename):
births_per_month = dict()
for item in filename:
num_month = int(item[1])
num_births = int(item[4])
if num_month in births_per_month:
births_per_month[num_month] += num_births
else:
births_per_month[num_month] = num_births
return(births_per_month)
cdc_month_births = months_births(cdc_list)
cdc_month_births
# In[4]:
def dow_births(filename):
sum_births = dict()
for item in filename:
day_week = int(item[3])
day_birth = int(item[4])
if day_week in sum_births:
sum_births[day_week] += day_birth
else:
sum_births[day_week] = day_birth
return(sum_births)
cdc_day_births = dow_births(cdc_list)
cdc_day_births
# In[30]:
def calc_counts(data, column):
sum_dict = dict()
for item in data:
col_num = item[column]
birth_count = int(item[4])
if col_num in sum_dict:
sum_dict[col_num] += birth_count
else:
sum_dict[col_num] = birth_count
return(sum_dict)
cdc_year_births = calc_counts(cdc_list, 0)
cdc_month_births = calc_counts(cdc_list, 1)
cdc_dom_births = calc_counts(cdc_list, 2)
cdc_dow_births = calc_counts(cdc_list, 3)
# In[31]:
cdc_year_births
# In[32]:
cdc_month_births
# In[33]:
cdc_dom_births
# In[34]:
cdc_dow_births
# In[6]:
def min_max_dict(filename, request):
if request == "max":
max_value = max(filename, key=filename.get)
return(filename[max_value])
else:
min_value = min(filename, key=filename.get)
return(filename[min_value])
max_value = min_max_dict(cdc_year_births, "max")
print("max: ",max_value)
min_value = min_max_dict(cdc_year_births, "min")
print("min: ",min_value)
# In[36]:
def diff_in_values(filename):
final_dict = dict()
seen_set = set()
unique_values = list()
for item in filename:
year_count = int(item[0])
birth_count = int(item[4])
day_of_week = int(item[3])
if birth_count not in seen_set:
unique_values.append(birth_count)
seen_set.add(birth_count)
return(seen_set)
result = diff_in_values(cdc_list)
result
我也在做同一个项目。我已经分享了您需要的部分代码。我在 GitHub 上有项目的 .ipynb 文件。您可能还想查看我的函数结果。干杯!
def read_csv(birth_data_file):
raw_data = open(birth_data_file, "r").read()
raw_data = raw_data.split("\n")
string_list = raw_data[1:]
final_list = []
for data in string_list:
int_fields = []
string_fields = data.split(",")
for string_field in string_fields:
field = int(string_field)
int_fields.append(field)
final_list.append(int_fields)
return(final_list)
def calc_counts(data, column):
births_counts = {}
if not column > 0 and column <= 4:
return("'column' must be either 1, 2, 3, or 4")
else:
for instance in data:
field = instance[column-1]
births = instance[4]
if field in births_counts.keys():
births_counts[field] += births
else:
births_counts[field] = births
return(births_counts)
# Write a function that extracts the same values across years and calculates the
# differences between consecutive values to show if number of
# births is increasing or decreasing.
def check_birth_growth(birth_data_file):
cdc_list = read_csv(birth_data_file)
cdc_year_births = calc_counts(cdc_list, 1)
previous_year_birth = 0
previous_birth_diff = 0
for year, total_births in cdc_year_births.items():
current_year_birth = int(total_births)
if previous_year_birth == 0:
growth_status = "Growth of births in {} not available.".format(year)
print(growth_status)
previous_year_birth = current_year_birth
else:
if current_year_birth > previous_year_birth:
growth_status = "Births increased in {}.".format(year)
print(growth_status)
previous_year_birth = current_year_birth
elif current_year_birth < previous_year_birth:
growth_status = "Births decreased in {}.".format(year)
print(growth_status)
previous_year_birth = current_year_birth
elif current_year_birth == previous_year_birth:
growth_status = "Births in {} was same as previous year.".format(year)
print(growth_status)
previous_year_birth = current_year_birth
def calc_diff(filename, date_one, date_two, column, column_value):
birth_rate_result = []
previous_birth_rate = 0
for row in filename:
year = row[0]
current_birth_rate = row[4]
time_unit = row[column]
if year in range(date_one,date_two):
if time_unit is column_value:
birth_rate_diff = (current_birth_rate - previous_birth_rate)
if birth_rate_diff > 0:
growth_status = "increased"
previous_birth_rate = current_birth_rate
elif birth_rate_diff < 0:
growth_status = "decreased"
previous_birth_rate = current_birth_rate
elif birth_rate_diff == 0:
growth_status = "static"
previous_birth_rate = current_birth_rate
birth_rate_result.append([birth_rate_diff, growth_status,row[0]])
return birth_rate_result
我刚开始学习 python,你能告诉我如何 运行 这些代码示例 (birth_diffrence = calc_diff(cdc_list, 4,4,0,4)) 没有弹出答案。
我相信上面的代码只统计了连续年份、时期之间的出生差异,没有给不同参数的机会——例如,一月或星期日的出生差异
在下面的函数中,您可以执行此操作,例如查看星期一出生的年份差异,输入:列 = 3 和 值 = 1
def delta_year(input_list, column, value): # Column to loop over, value to compare over years
column_year_count = {} # Create dictionary to get attribute yearly count
for each in input_list:
attribute = each[column]
year = each[0]
births = each[4]
if attribute == value:
if year in column_year_count:
column_year_count[year] += births
else:
column_year_count[year] = births
column_year_count_list = [] # Create list to order years to get consecutive years
for key, value in column_year_count.items():
temp = [key,value]
column_year_count_list.append(temp)
column_year_delta = {} # Create dictionary to insert ordered years & delta values
for i in range(0,len(column_year_count_list)): # Loop over list index
for j in range(0,len(column_year_count_list)): # Loop over second list index
delta_year = column_year_count_list[j][0] - column_year_count_list[i][0] # Tally delta between years
if delta_year == 1: # Select consecutive years only
delta_year_births = column_year_count_list[j][1] - column_year_count_list[i][1]
column_year_delta[column_year_count_list[j][0]] = delta_year_births
return column_year_delta