每当将新文件添加到文件夹中时,XSLX 转换为 CSV 文件
XSLX Conversion to CSV file whenever a new file is added into the folder
每当将新文件添加到 Inputfolder 并将转换后的 .csv 文件放入 OutputFolder 时,我都会尝试将 .xlsx 文件转换为 .csv 文件。
import glob
import time
import os
import pandas as pd
#Get timestamp
timestr = time.strftime("%Y%m%d_%H%M%S")
#Input file path
input_filepath = 'C:/Documents/InputFile'
folderSize = 0
#Function to convert file
def format_csv(latest_file):
#Output file path
filenamepath = 'C:/Documents/OutputFile/' + timestr + '.csv'
read_Excelfile = pd.read_excel(latest_file)
read_Excelfile.to_csv(filenamepath, index=None, header=True)
while True:
checkFolder = folderSize
folderSize = 0
#Check the size of the Input Folder
for path, dirs, files in os.walk(input_filepath):
for f in files:
fp = os.path.join(path, f)
folderSize += os.path.getsize(fp)
print(folderSize)
#Create new .csv file if the Input folder has new file added
if(folderSize > checkFolder):
list_of_files = glob.glob('C:/Documents/InputFile/*.xlsx')
latest_file = max(list_of_files, key=os.path.getctime)
format_csv(latest_file)
print(latest_file)
time.sleep(15)
现在程序只会转换第一个 .xlsx 文件。如果我将一个新的 .xlsx 文件添加到 InputFolder 中,该文件不会被转换。
如果找到一个将所有 .xlsx 文件转换为 .csv,您可以尝试阅读文件夹中的所有 .xlsx 文件
这里我们正在读取所有 xlsx 文件的目录,通过创建 csv 版本的副本来转换它们,然后删除原始 xlsx 版本
import pandas as pd
import os
path = 'C:/Documents/InputFile'
files = os.listdir(path)
for file in files:
if '.xlsx' in file:
filename = file[:-5]
new_filename = path + "/" + filename + ".csv"
if filename + ".csv" in files:
pass
else:
df = pd.read_excel(file)
df.to_csv(new_filename)
我已经即兴创作了我的原始代码。因此,每当我将新的 excel 文件放入 InputFolder 时,程序会将文件转换为 .csv 格式并将格式化后的文件插入 OutputFolder
import glob
import time
import os
import pandas as pd
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
#Function if new file is created in the folder
def on_created(event):
list_of_files = glob.glob('C:/Users/Documents/InputFolder/*.xlsx')
latest_file = max(list_of_files, key=os.path.getctime)
format_csv(latest_file)
#Function to convert .xlsx to .csv
def format_csv(latest_file):
# Get timestamp
timestr = time.strftime("%d%m%Y_%H%M%S")
#Output file path
filenamepath = 'C:/Users/Documents/OutputFolder/' + timestr + '.csv'
read_Excelfile = pd.read_excel(latest_file)
read_Excelfile.to_csv(filenamepath, index=None, header=True)
print(filenamepath)
if __name__ == "__main__":
event_handler = FileSystemEventHandler()
#Calling function for file insertion
event_handler.on_created = on_created
#Input Folder
path = 'C:/Users/Documents/InputFolder'
#Function to observe file
observer = Observer()
observer.schedule(event_handler, path, recursive=True)
observer.start()
try:
#Check every one second
while True:
time.sleep(1)
except KeyboardInterrupt:
#Program stop if keyboard interupt
observer.stop()
observer.join()
每当将新文件添加到 Inputfolder 并将转换后的 .csv 文件放入 OutputFolder 时,我都会尝试将 .xlsx 文件转换为 .csv 文件。
import glob
import time
import os
import pandas as pd
#Get timestamp
timestr = time.strftime("%Y%m%d_%H%M%S")
#Input file path
input_filepath = 'C:/Documents/InputFile'
folderSize = 0
#Function to convert file
def format_csv(latest_file):
#Output file path
filenamepath = 'C:/Documents/OutputFile/' + timestr + '.csv'
read_Excelfile = pd.read_excel(latest_file)
read_Excelfile.to_csv(filenamepath, index=None, header=True)
while True:
checkFolder = folderSize
folderSize = 0
#Check the size of the Input Folder
for path, dirs, files in os.walk(input_filepath):
for f in files:
fp = os.path.join(path, f)
folderSize += os.path.getsize(fp)
print(folderSize)
#Create new .csv file if the Input folder has new file added
if(folderSize > checkFolder):
list_of_files = glob.glob('C:/Documents/InputFile/*.xlsx')
latest_file = max(list_of_files, key=os.path.getctime)
format_csv(latest_file)
print(latest_file)
time.sleep(15)
现在程序只会转换第一个 .xlsx 文件。如果我将一个新的 .xlsx 文件添加到 InputFolder 中,该文件不会被转换。
如果找到一个将所有 .xlsx 文件转换为 .csv,您可以尝试阅读文件夹中的所有 .xlsx 文件
这里我们正在读取所有 xlsx 文件的目录,通过创建 csv 版本的副本来转换它们,然后删除原始 xlsx 版本
import pandas as pd
import os
path = 'C:/Documents/InputFile'
files = os.listdir(path)
for file in files:
if '.xlsx' in file:
filename = file[:-5]
new_filename = path + "/" + filename + ".csv"
if filename + ".csv" in files:
pass
else:
df = pd.read_excel(file)
df.to_csv(new_filename)
我已经即兴创作了我的原始代码。因此,每当我将新的 excel 文件放入 InputFolder 时,程序会将文件转换为 .csv 格式并将格式化后的文件插入 OutputFolder
import glob
import time
import os
import pandas as pd
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
#Function if new file is created in the folder
def on_created(event):
list_of_files = glob.glob('C:/Users/Documents/InputFolder/*.xlsx')
latest_file = max(list_of_files, key=os.path.getctime)
format_csv(latest_file)
#Function to convert .xlsx to .csv
def format_csv(latest_file):
# Get timestamp
timestr = time.strftime("%d%m%Y_%H%M%S")
#Output file path
filenamepath = 'C:/Users/Documents/OutputFolder/' + timestr + '.csv'
read_Excelfile = pd.read_excel(latest_file)
read_Excelfile.to_csv(filenamepath, index=None, header=True)
print(filenamepath)
if __name__ == "__main__":
event_handler = FileSystemEventHandler()
#Calling function for file insertion
event_handler.on_created = on_created
#Input Folder
path = 'C:/Users/Documents/InputFolder'
#Function to observe file
observer = Observer()
observer.schedule(event_handler, path, recursive=True)
observer.start()
try:
#Check every one second
while True:
time.sleep(1)
except KeyboardInterrupt:
#Program stop if keyboard interupt
observer.stop()
observer.join()