Python 从不同目录读取数据并合并到一个 excel 文件中
Python reading data from different directories and merging into one excel file
我只是 python 的初学者。现在尝试使用 python 合并来自不同目录的 excel 个文件。
我有一个代码,如下所示。然而,当执行时只有一个文件被读取,而其他 2 个文件没有合并到最终的 excel 文件中。我应该在哪里更改代码?
在此先感谢您的帮助。
此致
凯雷姆
import glob
import pandas as pd
import os
#path = r'E:\Users\KEREMH\.spyder-py3210326GecikmedekiMusteriler'
subelistesi = ['1140','1150','1190']
klasor_adi= input('Klasör Adı: ')
gonderilen ='gonderilen'
for sube in subelistesi:
path = os.path.join(os.getcwd(), sube, klasor_adi, gonderilen )
filenames = glob.glob(path + "/*.xlsx")
print(filenames)
for file in filenames:
concat_all_sheets_all_files = pd.DataFrame()
df = pd.read_excel(file, sheet_name=None, skiprows=None,nrows=None,usecols=None,header=None,index_col=None)
concat_all_sheets_single_file = pd.concat(df,sort=False)
concat_all_sheets_single_file['filename']=os.path.basename(file)
concat_all_sheets_all_files = concat_all_sheets_all_files.append(concat_all_sheets_single_file)
writer = pd.ExcelWriter(r'E:\Users\KEREMH\.spyder-py3\Konsolide.xlsx')
concat_all_sheets_all_files.to_excel(writer)
writer.save()
print(concat_all_sheets_all_files)
当您遍历 subelistesi
时,只保留 filenames
的最后一个值。更好的方法是在任何循环之前创建一个列表,并将所有数据帧添加到其中,然后在完成循环后连接。
import glob
import pandas as pd
import os
#path = r'E:\Users\KEREMH\.spyder-py3210326GecikmedekiMusteriler'
subelistesi = ['1140','1150','1190']
klasor_adi= input('Klasör Adı: ')
gonderilen ='gonderilen'
all_dfs = [] # new list here
for sube in subelistesi:
path = os.path.join(os.getcwd(), sube, klasor_adi, gonderilen )
filenames = glob.glob(path + "/*.xlsx")
print(filenames)
for file in filenames:
concat_all_sheets_all_files = pd.DataFrame()
df = pd.read_excel(file, sheet_name=None, skiprows=None,nrows=None,usecols=None,header=None,index_col=None)
df['filename'] = os.path.basename(file)
all_dfs.append(df)
concat_all_sheets_all_files = pd.concat(all_dfs, axis=0)
writer = pd.ExcelWriter(r'E:\Users\KEREMH\.spyder-py3\Konsolide.xlsx')
concat_all_sheets_all_files.to_excel(writer)
writer.save()
print(concat_all_sheets_all_files)
我只是 python 的初学者。现在尝试使用 python 合并来自不同目录的 excel 个文件。
我有一个代码,如下所示。然而,当执行时只有一个文件被读取,而其他 2 个文件没有合并到最终的 excel 文件中。我应该在哪里更改代码?
在此先感谢您的帮助。
此致 凯雷姆
import glob
import pandas as pd
import os
#path = r'E:\Users\KEREMH\.spyder-py3210326GecikmedekiMusteriler'
subelistesi = ['1140','1150','1190']
klasor_adi= input('Klasör Adı: ')
gonderilen ='gonderilen'
for sube in subelistesi:
path = os.path.join(os.getcwd(), sube, klasor_adi, gonderilen )
filenames = glob.glob(path + "/*.xlsx")
print(filenames)
for file in filenames:
concat_all_sheets_all_files = pd.DataFrame()
df = pd.read_excel(file, sheet_name=None, skiprows=None,nrows=None,usecols=None,header=None,index_col=None)
concat_all_sheets_single_file = pd.concat(df,sort=False)
concat_all_sheets_single_file['filename']=os.path.basename(file)
concat_all_sheets_all_files = concat_all_sheets_all_files.append(concat_all_sheets_single_file)
writer = pd.ExcelWriter(r'E:\Users\KEREMH\.spyder-py3\Konsolide.xlsx')
concat_all_sheets_all_files.to_excel(writer)
writer.save()
print(concat_all_sheets_all_files)
当您遍历 subelistesi
时,只保留 filenames
的最后一个值。更好的方法是在任何循环之前创建一个列表,并将所有数据帧添加到其中,然后在完成循环后连接。
import glob
import pandas as pd
import os
#path = r'E:\Users\KEREMH\.spyder-py3210326GecikmedekiMusteriler'
subelistesi = ['1140','1150','1190']
klasor_adi= input('Klasör Adı: ')
gonderilen ='gonderilen'
all_dfs = [] # new list here
for sube in subelistesi:
path = os.path.join(os.getcwd(), sube, klasor_adi, gonderilen )
filenames = glob.glob(path + "/*.xlsx")
print(filenames)
for file in filenames:
concat_all_sheets_all_files = pd.DataFrame()
df = pd.read_excel(file, sheet_name=None, skiprows=None,nrows=None,usecols=None,header=None,index_col=None)
df['filename'] = os.path.basename(file)
all_dfs.append(df)
concat_all_sheets_all_files = pd.concat(all_dfs, axis=0)
writer = pd.ExcelWriter(r'E:\Users\KEREMH\.spyder-py3\Konsolide.xlsx')
concat_all_sheets_all_files.to_excel(writer)
writer.save()
print(concat_all_sheets_all_files)