在从一个文件夹传输到另一个文件夹的过程中修改多个文件 Python

Modifying multiple files during a transportation from one folder to another Python

现在我有一个程序可以将文件从 SOURCE 文件夹的子目录移动到 DESTINATION 文件夹的子目录。这些文件包含如下信息: content of file before the move.

现在,在从 SOURCE 移动到 DESTINATION 的过程中,我想在 2 个地方修改移动文件。

因此,在文件从 SOURCE 移动到 DESTINATION 后,它必须如下所示:

content of file after the move.

这是我现在拥有的用于移动文件的代码,所有移动工作都很顺利。就是想修改文件内容不知道从何下手:

import os, os.path
import time

#Make source, destination and archive paths.
source = r'c:\data\AS\Desktop\Source'
destination = r'c:\data\AS\Desktop\Destination'
archive = r'c:\data\AS\Desktop\Archive'

#Make directory paths and make sure to consider only directories under source.
for subdir in os.listdir(source):
    subdir_path = os.path.join(source, subdir)
    if not os.path.isdir(subdir_path):
        continue

#Now we want to get the absolute paths of the files inside those directories 
#and store them in a list.
    all_file_paths = [os.path.join(subdir_path, file) for file in os.listdir(subdir_path)]
    all_file_paths = [p for p in all_file_paths if os.path.isfile(p)]

#Exclude empty sub-directories
    if len(all_file_paths) == 0:
        continue

#Get only the newest files of those directories.
    newest_file_paths = max(all_file_paths, key=os.path.getctime)


#Now we are selecting the files which will be moved
#and make a destination path for them.
    for file_path in all_file_paths:
        if file_path == newest_file_paths and os.path.getctime(newest_file_paths) < time.time() - 120:
            dst_root = destination
        else:
            dst_root = archive

#Now its time to make the move.
        dst_path = os.path.join(dst_root, subdir, os.path.basename(file_path))
        os.rename(file_path, dst_path)

移动途中不能修改。你首先必须移动它然后你才能做你的工作。为此,您可以将文件的最终目的地(包括名称中的子目录)存储在一个数组中,稍后对其进行迭代以打开文件并完成您的工作。

这是一个最小的例子

def changeFile(fileName):
    # do your desired work here
    pass

files = ["dir/subdir1/file1", "dir/file"]

for file in files:
    os.rename(file, newPath)
    changeFile(newPath)

如果文件很小,那么您可以简单地执行以下操作而不是移动文件:

  1. 读取所有文件的信息
  2. 找到您要替换的数据
  3. 将新数据写入源目录中的文件
  4. 删除旧文件

类似

def move_file(file_path, dst_path):
  with open(file_path, "r") as input_file, open(dst_path, "w") as output_file:
      for line in input_file:
         if <line meets criteria to modify>:
             <modify_line>
         print(line, file=output_file)
      for <data> in <additional_data>:
         print(<data>, file=output_file)

  # remove the old file
  os.remove(file_path)

然后在您的原始代码中调用 move_file 函数而不是 os.rename

#Now we are selecting the files which will be moved
#and make a destination path for them.
    for file_path in all_file_paths:
        if file_path == newest_file_paths and os.path.getctime(newest_file_paths) < time.time() - 120:
            dst_root = destination
        else:
            dst_root = archive
#Now its time to make the move.
        dst_path = os.path.join(dst_root, subdir, os.path.basename(file_path))
        move_file(file_path, dst_path)

你可以这样实现

import os
import time
from datetime import datetime

SOURCE = r'c:\data\AS\Desktop\Source'
DESTINATION = r'c:\data\AS\Desktop\Destination'
ARCHIVE = r'c:\data\AS\Desktop\Archive'

def get_time_difference(date, time_string):
    """
    You may want to modify this logic to change the way the time difference is calculated.
    """
    time_difference = datetime.now() - datetime.strptime(f"{date} {time_string}", "%d-%m-%Y %H:%M")
    hours = time_difference.total_seconds() // 3600
    minutes = (time_difference.total_seconds() % 3600) // 60
    return f"{int(hours)}:{int(minutes)}"

def move_and_transform_file(file_path, dst_path, delimiter="\t"):
    """
    Reads the data from the old file, writes it into the new file and then 
    deletes the old file.
    """
    with open(file_path, "r") as input_file, open(dst_path, "w") as output_file:
        data = {
            "Date": None,
            "Time": None,
            "Power": None,
        }
        time_difference_seen = False
        for line in input_file:
            (line_id, item, line_type, value) = line.strip().split()
            if item in data:
                data[item] = value
                if not time_difference_seen and data["Date"] is not None and data["Time"] is not None:
                    time_difference = get_time_difference(data["Date"], data["Time"])
                    time_difference_seen = True
                    print(delimiter.join([line_id, "TimeDif", line_type, time_difference]), file=output_file)
                if item == "Power":
                    value = str(int(value) * 10)
            print(delimiter.join((line_id, item, line_type, value)), file=output_file)

    os.remove(file_path)

def process_files(all_file_paths, newest_file_path, subdir):
    """
    For each file, decide where to send it, then perform the transformation.
    """
    for file_path in all_file_paths:
        if file_path == newest_file_path and os.path.getctime(newest_file_path) < time.time() - 120:
            dst_root = DESTINATION
        else:
            dst_root = ARCHIVE

        dst_path = os.path.join(dst_root, subdir, os.path.basename(file_path))
        move_and_transform_file(file_path, dst_path)

def main():
    """
    Gather the files from the directories and then process them.
    """
    for subdir in os.listdir(SOURCE):
        subdir_path = os.path.join(SOURCE, subdir)
        if not os.path.isdir(subdir_path):
            continue

        all_file_paths = [
            os.path.join(subdir_path, p) 
            for p in os.listdir(subdir_path) 
            if os.path.isfile(os.path.join(subdir_path, p))
        ]

        if all_file_paths:
            newest_path = max(all_file_paths, key=os.path.getctime)
            process_files(all_file_paths, newest_path, subdir)

if __name__ == "__main__":
    main()

@MindOfMetalAndWheels 你的代码肯定只是修改文件而不是移动它们?我想移动和修改它们。顺便说一下,如果我尝试将您的代码插入我的代码,我会得到一个无效的语法。

import os
import time
from datetime import datetime

SOURCE = r'c:\data\AS\Desktop\Source'
DESTINATION = r'c:\data\AS\Desktop\Destination'
ARCHIVE = r'c:\data\AS\Desktop\Archive'

def get_time_difference(date, time_string):
    """
    You may want to modify this logic to change the way the time difference is calculated.
    """
    time_difference = datetime.now() - datetime.strptime(f"{date} {time_string}", "%d-%m-%Y %H:%M")
    hours = time_difference.total_seconds() // 3600
    minutes = (time_difference.total_seconds() % 3600) // 60
    return f"{int(hours)}:{int(minutes)}"

def move_and_transform_file(file_path, dst_path, delimiter="\t"):
    """
    Reads the data from the old file, writes it into the new file and then 
    deletes the old file.
    """
    with open(file_path, "r") as input_file, open(dst_path, "w") as output_file:
        data = {
            "Date": None,
            "Time": None,
            "Power": None,
        }
        time_difference_seen = False
        for line in input_file:
            (line_id, item, line_type, value) = line.strip().split()
            if item in data:
                data[item] = value
                if not time_difference_seen and data["Date"] is not None and data["Time"] is not None:
                    time_difference = get_time_difference(data["Date"], data["Time"])
                    time_difference_seen = True
                    print(delimiter.join([line_id, "TimeDif", line_type, time_difference]), file=output_file)
                if item == "Power":
                    value = str(int(value) * 10)
            print(delimiter.join((line_id, item, line_type, value)), file=output_file)

    os.remove(file_path)

def process_files(all_file_paths, newest_file_path, subdir):
    """
    For each file, decide where to send it, then perform the transformation.
    """
    for file_path in all_file_paths:
        if file_path == newest_file_path and os.path.getctime(newest_file_path) < time.time() - 120:
            dst_root = DESTINATION
        else:
            dst_root = ARCHIVE

        dst_path = os.path.join(dst_root, subdir, os.path.basename(file_path))
        move_and_transform_file(file_path, dst_path)

def main():
    """
    Gather the files from the directories and then process them.
    """
    for subdir in os.listdir(SOURCE):
        subdir_path = os.path.join(SOURCE, subdir)
        if not os.path.isdir(subdir_path):
            continue

        all_file_paths = [
            os.path.join(subdir_path, p) 
            for p in os.listdir(subdir_path) 
            if os.path.isfile(os.path.join(subdir_path, p))
        ]

        if all_file_paths:
            newest_path = max(all_file_paths, key=os.path.getctime)
            process_files(all_file_paths, newest_path, subdir)

if __name__ == "__main__":
    main()