将 pandas df 写入 xlsx 时如何使此循环正常工作?
How do I get this loop to work correctly when writing pandas df to xlsx?
我用过this code,还算可以。现在在较小的 'rep_list' 中,当它执行列表中的第一个代表时,即 CP 被添加,但是当它进入 AM 时,它会覆盖 CP。所以现在当我 运行 这段代码时,它实际上只保存了循环中的最后一个人。如果我 运行 代码只有 "CP" 然后只是 "AM" 它会按原样附加它。 for循环有问题吗?还是工作簿本身的问题?
import pandas as pd
import datetime
from openpyxl import load_workbook
now = datetime.datetime.now()
currentDate = now.strftime("%Y-%m-%d")
call_report = pd.read_excel("Ending 2016-07-30.xlsx", "raw_data")
#rep_list = ["CP", "AM", "JB", "TT", "KE"]
rep_list = ["CP", "AM"]
def call_log_reader(rep_name):
rep_log = currentDate + "-" + rep_name + ".csv"
df = pd.read_csv(rep_log)
df = df.drop(['From Name', 'From Number', 'To Name / Reference', 'To Number', 'Billing Code', 'Original Dialed Number',
'First Hunt Group', 'Last Hunt Group'], axis=1)
df['rep'] = rep_name
book = load_workbook('Ending 2016-07-30.xlsx')
writer = pd.ExcelWriter('Ending 2016-07-30.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, "raw_data", index=False)
writer.save()
## I tried adding this : writer.close() hoping it would close the book and then force it to reopen for the next rep in the loop but it doesn't seem to work.
for rep in rep_list:
call_log_reader(rep)
非常感谢!
编辑:
Gaurav Dhama 给出了一个非常有效的答案。他指出 Pandas excelwriter (refer to this link) 有一些局限性,并提出了一个解决方案,每个代表最终都有自己的 sheet。这行得通,但是在我考虑之后我选择不使用额外的 sheets 并在知道存在限制的情况下提出了这个解决方案。基本上,我附加了一个 CSV 而不是实际的 XLSX 文件,然后在最后打开该 CSV 并将一个大列表附加到 XLSX 文件中。任何一个都有效,仅取决于您的最终产品的外观。
import pandas as pd
import datetime
from openpyxl import load_workbook
now = datetime.datetime.now()
currentDate = now.strftime("%Y-%m-%d")
call_report = "Ending 2016-07-30.xlsx"
#rep_list = ["CP", "AM", "JB", "TT", "KE"]
rep_list = ["CP", "AM"]
csv_to_xl_files = []
merged_csv = currentDate + "-master.csv"
def call_log_reader(rep_name):
rep_log = currentDate + "-" + rep_name + ".csv"
df = pd.read_csv(rep_log)
df = df.drop(['TimestampDetail', 'Billing Code', 'From Name', 'From Number', 'To Name / Reference', 'To Number',
'Original Dialed Number', 'First Hunt Group', 'Last Hunt Group'], axis=1)
df['rep'] = rep_name
#print (df.head(3))
df.to_csv(merged_csv, mode='a', index=False, header=False)
csv_to_xl_files.append(rep_log)
book = load_workbook(call_report)
writer = pd.ExcelWriter(call_report, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
for rep in rep_list:
call_log_reader(rep)
master_df = pd.read_csv(merged_csv)
master_df.to_excel(writer, "raw_data", index=False)
writer.save()
#this csv_to_xl_files list isn't finished yet, basically I'm going to use it to delete the files from the directory as I don't need them once the script is run.
print (csv_to_xl_files)
尝试使用以下方法:
import pandas as pd
import datetime
from openpyxl import load_workbook
now = datetime.datetime.now()
currentDate = now.strftime("%Y-%m-%d")
call_report = pd.read_excel("Ending 2016-07-30.xlsx", "raw_data")
#rep_list = ["CP", "AM", "JB", "TT", "KE"]
rep_list = ["CP", "AM"]
def call_log_reader(rep_name):
rep_log = currentDate + "-" + rep_name + ".csv"
df = pd.read_csv(rep_log)
df = df.drop(['From Name', 'From Number', 'To Name / Reference', 'To Number', 'Billing Code', 'Original Dialed Number',
'First Hunt Group', 'Last Hunt Group'], axis=1)
df['rep'] = rep_name
df.to_excel(writer, "raw_data"+rep, index=False)
return df
book = load_workbook('Ending 2016-07-30.xlsx')
writer = pd.ExcelWriter('Ending 2016-07-30.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
for rep in rep_list:
call_log_reader(rep)
writer.save()
如果你使用 openpyxl 2.4 那么你可以 work with Pandas dataframes in directly in openpyxl.
我用过this code,还算可以。现在在较小的 'rep_list' 中,当它执行列表中的第一个代表时,即 CP 被添加,但是当它进入 AM 时,它会覆盖 CP。所以现在当我 运行 这段代码时,它实际上只保存了循环中的最后一个人。如果我 运行 代码只有 "CP" 然后只是 "AM" 它会按原样附加它。 for循环有问题吗?还是工作簿本身的问题?
import pandas as pd
import datetime
from openpyxl import load_workbook
now = datetime.datetime.now()
currentDate = now.strftime("%Y-%m-%d")
call_report = pd.read_excel("Ending 2016-07-30.xlsx", "raw_data")
#rep_list = ["CP", "AM", "JB", "TT", "KE"]
rep_list = ["CP", "AM"]
def call_log_reader(rep_name):
rep_log = currentDate + "-" + rep_name + ".csv"
df = pd.read_csv(rep_log)
df = df.drop(['From Name', 'From Number', 'To Name / Reference', 'To Number', 'Billing Code', 'Original Dialed Number',
'First Hunt Group', 'Last Hunt Group'], axis=1)
df['rep'] = rep_name
book = load_workbook('Ending 2016-07-30.xlsx')
writer = pd.ExcelWriter('Ending 2016-07-30.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
df.to_excel(writer, "raw_data", index=False)
writer.save()
## I tried adding this : writer.close() hoping it would close the book and then force it to reopen for the next rep in the loop but it doesn't seem to work.
for rep in rep_list:
call_log_reader(rep)
非常感谢!
编辑:
Gaurav Dhama 给出了一个非常有效的答案。他指出 Pandas excelwriter (refer to this link) 有一些局限性,并提出了一个解决方案,每个代表最终都有自己的 sheet。这行得通,但是在我考虑之后我选择不使用额外的 sheets 并在知道存在限制的情况下提出了这个解决方案。基本上,我附加了一个 CSV 而不是实际的 XLSX 文件,然后在最后打开该 CSV 并将一个大列表附加到 XLSX 文件中。任何一个都有效,仅取决于您的最终产品的外观。
import pandas as pd
import datetime
from openpyxl import load_workbook
now = datetime.datetime.now()
currentDate = now.strftime("%Y-%m-%d")
call_report = "Ending 2016-07-30.xlsx"
#rep_list = ["CP", "AM", "JB", "TT", "KE"]
rep_list = ["CP", "AM"]
csv_to_xl_files = []
merged_csv = currentDate + "-master.csv"
def call_log_reader(rep_name):
rep_log = currentDate + "-" + rep_name + ".csv"
df = pd.read_csv(rep_log)
df = df.drop(['TimestampDetail', 'Billing Code', 'From Name', 'From Number', 'To Name / Reference', 'To Number',
'Original Dialed Number', 'First Hunt Group', 'Last Hunt Group'], axis=1)
df['rep'] = rep_name
#print (df.head(3))
df.to_csv(merged_csv, mode='a', index=False, header=False)
csv_to_xl_files.append(rep_log)
book = load_workbook(call_report)
writer = pd.ExcelWriter(call_report, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
for rep in rep_list:
call_log_reader(rep)
master_df = pd.read_csv(merged_csv)
master_df.to_excel(writer, "raw_data", index=False)
writer.save()
#this csv_to_xl_files list isn't finished yet, basically I'm going to use it to delete the files from the directory as I don't need them once the script is run.
print (csv_to_xl_files)
尝试使用以下方法:
import pandas as pd
import datetime
from openpyxl import load_workbook
now = datetime.datetime.now()
currentDate = now.strftime("%Y-%m-%d")
call_report = pd.read_excel("Ending 2016-07-30.xlsx", "raw_data")
#rep_list = ["CP", "AM", "JB", "TT", "KE"]
rep_list = ["CP", "AM"]
def call_log_reader(rep_name):
rep_log = currentDate + "-" + rep_name + ".csv"
df = pd.read_csv(rep_log)
df = df.drop(['From Name', 'From Number', 'To Name / Reference', 'To Number', 'Billing Code', 'Original Dialed Number',
'First Hunt Group', 'Last Hunt Group'], axis=1)
df['rep'] = rep_name
df.to_excel(writer, "raw_data"+rep, index=False)
return df
book = load_workbook('Ending 2016-07-30.xlsx')
writer = pd.ExcelWriter('Ending 2016-07-30.xlsx', engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
for rep in rep_list:
call_log_reader(rep)
writer.save()
如果你使用 openpyxl 2.4 那么你可以 work with Pandas dataframes in directly in openpyxl.