每当将新文件添加到文件夹中时,XSLX 转换为 CSV 文件

XSLX Conversion to CSV file whenever a new file is added into the folder

每当将新文件添加到 Inputfolder 并将转换后的 .csv 文件放入 OutputFolder 时,我都会尝试将 .xlsx 文件转换为 .csv 文件。

import glob
import time
import os
import pandas as pd

#Get timestamp
timestr = time.strftime("%Y%m%d_%H%M%S")

#Input file path
input_filepath = 'C:/Documents/InputFile'
folderSize = 0


#Function to convert file
def format_csv(latest_file):
#Output file path
filenamepath = 'C:/Documents/OutputFile/' + timestr + '.csv'
read_Excelfile = pd.read_excel(latest_file)
read_Excelfile.to_csv(filenamepath, index=None, header=True)

while True:
  checkFolder = folderSize
  folderSize = 0

  #Check the size of the Input Folder
  for path, dirs, files in os.walk(input_filepath):
      for f in files:
          fp = os.path.join(path, f)
          folderSize += os.path.getsize(fp)
  print(folderSize)

  #Create new .csv file if the Input folder has new file added
  if(folderSize > checkFolder):
      list_of_files = glob.glob('C:/Documents/InputFile/*.xlsx')
      latest_file = max(list_of_files, key=os.path.getctime)
      format_csv(latest_file)
      print(latest_file)

  time.sleep(15)

现在程序只会转换第一个 .xlsx 文件。如果我将一个新的 .xlsx 文件添加到 InputFolder 中,该文件不会被转换。

如果找到一个将所有 .xlsx 文件转换为 .csv,您可以尝试阅读文件夹中的所有 .xlsx 文件

这里我们正在读取所有 xlsx 文件的目录,通过创建 csv 版本的副本来转换它们,然后删除原始 xlsx 版本

import pandas as pd
import os

path = 'C:/Documents/InputFile'

files = os.listdir(path)
for file in files:
    if '.xlsx' in file:
        filename = file[:-5]
        new_filename = path + "/" + filename + ".csv"
        if filename + ".csv" in files:
            pass
        else:
            df = pd.read_excel(file)
            df.to_csv(new_filename)
        

我已经即兴创作了我的原始代码。因此,每当我将新的 excel 文件放入 InputFolder 时,程序会将文件转换为 .csv 格式并将格式化后的文件插入 OutputFolder

import glob
import time
import os
import pandas as pd
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler

#Function if new file is created in the folder
def on_created(event):
    list_of_files = glob.glob('C:/Users/Documents/InputFolder/*.xlsx')
    latest_file = max(list_of_files, key=os.path.getctime)
    format_csv(latest_file)

#Function to convert .xlsx to .csv
def format_csv(latest_file):

  # Get timestamp
  timestr = time.strftime("%d%m%Y_%H%M%S")

  #Output file path
  filenamepath = 'C:/Users/Documents/OutputFolder/' + timestr + '.csv'
  read_Excelfile = pd.read_excel(latest_file)
  read_Excelfile.to_csv(filenamepath, index=None, header=True)
  print(filenamepath)

if __name__ == "__main__":
  event_handler = FileSystemEventHandler()

  #Calling function for file insertion
  event_handler.on_created = on_created

  #Input Folder
  path = 'C:/Users/Documents/InputFolder'

  #Function to observe file
  observer = Observer()
  observer.schedule(event_handler, path, recursive=True)
  observer.start()

  try:
      #Check every one second
      while True:
          time.sleep(1)
  except KeyboardInterrupt:
      #Program stop if keyboard interupt
      observer.stop()
  observer.join()