我想将基于部分名称的文件移动到具有该名称的文件夹中

I want to move a file based on part of the name to a folder with that name

我有一个包含大量文件的目录,我想根据部分文件名将这些文件移动到文件夹中。我的文件列表如下所示:

001-020-012B-B.nc
001-022-151-A.nc
001-023-022-PY-T1.nc.nc
001-096-016B-A.nc

我想根据文件名的第一部分 (001-096-016B001-023-022001-022-151) 将我拥有的文件移动到单独的文件夹中。文件名的第一部分始终具有相同数量的数字,并且始终分为由下划线 '-'.

分隔的 3 个部分

文件夹名称是这样命名的\oe-xxxx\xxxx\xxxx[=19=]1-Disc-PED0-Rotor-parts-1200

因此,例如,根据文件夹名称(数字),此文件应放在上述文件夹中:

001-020-012B-B.nc

文件路径分为几列以显示上述文件必须移动到的位置:
(001)-Disc-PED\(020)-Rotor-parts-1200.

因此:

(001)-Disc-PED\(020)-Rotor-parts-1200 (001)-(020)-012B-B.nc

这是我在网上试过的方法,但没有用: 我的想法是我想遍历文件夹并查找匹配项。

import os
import glob
import itertools
import re

#Source file
sourcefile = r'C:\Users\cah\Desktop[=13=]0Turning'
destinationPath = r'C:\Users\cah\Desktop-CAM'

#Seperation
dirs = glob.glob('*-*')

#Every file with file extension .nc
files = glob.glob('*.nc')

for root, dirs, files in os.walk(sourcefile):
    for file in files:
        if file.endswith(".nc"):

            first3Char = str(file[0:3])
            last3Char = str(file[4:7])

            for root in os.walk(destinationPath):

                first33CharsOfRoot = str(root[0:33])

                cleanRoot1 = str(root).replace("[", "")
                cleanRoot2 = str(cleanRoot1).replace("]", "")
                cleanRoot3 = str(cleanRoot2).replace(")", "")
                cleanRoot4 = str(cleanRoot3).replace("'", "")
                cleanRoot5 = str(cleanRoot4).replace(",", "")

                firstCharOfRoot = re.findall(r'(.{3})\s*$', str(cleanRoot5))

                print(firstCharOfRoot==first3Char)

                if(firstCharOfRoot == first3Char):
                    print("Hello")
                    for root in os.walk(destinationPath):
                        print(os.path.basename(root))

                     #   if(os.path)

我意识到我不应该寻找路径中的最后 3 个字符,因为它是第一个 (001) 等。我需要在开始时寻找数字以找到我需要的第一条路径去.

编辑:

    import os
import glob
import itertools
import re

#Source file 
sourcefile = r'C:\Users\cah\Desktop[=14=]0Turning'
destinationPath = r'C:\Users\cah\Desktop-CAM'

#Seperation
dirs = glob.glob('*-*')

#Every file with file extension .nc
files = glob.glob('*.nc')



for root, dirs, files in os.walk(sourcefile):
    for file in files:
        if file.endswith(".nc"):
            
            first3Char = str(file[0:3])
            last3Char = str(file[4:7])


            for root in os.walk(destinationPath):
                
                
            

                cleanRoot1 = str(root).replace("[", "")
                cleanRoot2 = str(cleanRoot1).replace("]", "")
                cleanRoot3 = str(cleanRoot2).replace(")", "")
                cleanRoot4 = str(cleanRoot3).replace("'", "")
                cleanRoot5 = str(cleanRoot4).replace(",", "")

                firstCharOfRoot = re.findall(r'^(?:[^\]+\\){5}(\d+).*$', str(cleanRoot5))
                secondCharOfRoot = re.findall(r'^(?:[^\]+\\){6}(\d+).*$', str(cleanRoot5))
                

                firstCharOfRootCleaned = ''.join(firstCharOfRoot)
                secondCharOfRoot = ''.join(secondCharOfRoot)


                cleanRoot6 = str(cleanRoot5).replace("(", "")

                if(firstCharOfRootCleaned == str(first3Char) & secondCharOfRoot == str(last3Char)):

                    print("BINGOf")
                  #  for root1 in os.walk(cleanRoot6):
                   

解决方案

下一节有一个改进的解决方案。不过先分解一下直截了当的解法吧

首先,获取子文件夹的完整列表。

all_folders_splitted = [os.path.split(f)\
    for f in glob.iglob(os.path.join(destinationPath, "**"), recursive=True)\
    if os.path.isdir(f)]

然后,对每个文件使用一个函数来查找其匹配的文件夹,如果不存在则使用新的文件路径。我在脚本的其余部分包含了这个名为 find_folder() 的函数:

import os
import glob
import shutil

sourcefile= r'C:\Users\cah\Desktop[=11=]0Turning'
destinationPath = r'C:\Users\cah\Desktop-CAM'

all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
if os.path.isdir(f)]

# It will create and return a new directory if no directory matches
def find_folder(part1, part2):
    matching_folders1 = [folder for folder in all_folders_splitted\
                    if os.path.split(folder[0])[-1].startswith(part1)]
    matching_folder2 = None
    for matching_folder2 in matching_folders1:
        if matching_folder2[-1].startswith(part2):
            return os.path.join(*matching_folder2)

    # Whole new folder tree
    if matching_folder2 is None:
        dest = os.path.join(destinationPath, part1, part2)
        os.makedirs(dest)
        return dest

    # Inside the already existing folder part "1"
    dest = os.path.join(matching_folder2[0], part2)
    os.makedirs(dest)
    return dest

# All the files you want to move
files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)

for file in files_gen:
    # Split the first two "-"
    basename = os.path.basename(file)
    splitted = basename.split("-", 2)

    # Format the destination folder.
    # Creates it if necessary
    destination_folder = find_folder(splitted[0], splitted[1])

    # Copying the file 
    shutil.copy2(file, os.path.join(destination_folder, basename))

改进的解决方案

如果您有大量文件,在每次迭代时“拆分和匹配”每个文件夹可能是有害的。

我们可以将根据给定模式找到的文件夹存储在字典中。如果给出新模式,字典将被更新,否则它将 return 以前找到的文件夹。

import os
import glob
import shutil

sourcefile= r'C:\Users\cah\Desktop[=12=]0Turning'
destinationPath = r'C:\Users\cah\Desktop-CAM'

# Global dictionary to store folder paths, relative to a pattern
found_pattern = dict()

all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
if os.path.isdir(f)]


def find_folder(part1, part2):

    current_key = tuple([part1, part2])

    if current_key in pattern_match:
        # Already found previously. 
        # We just return the folder path, stored as the value.
        return pattern_match[current_key]

    matching_folders1 = [folder for folder in all_folders_splitted\
                    if os.path.split(folder[0])[-1].startswith(part1)]
    matching_folder2 = None

    for matching_folder2 in matching_folders1:
        if matching_folder2[-1].startswith(part2):
            dest = os.path.join(*matching_folder2)
            # Update the dictionary
            pattern_match[current_key] = dest
            return dest

    if matching_folder2 is None:
        dest = os.path.join(destinationPath, part1, part2)
    else:
        dest = os.path.join(matching_folder2[0], part2)
    
    # Update the dictionary
    pattern_match[current_key] = dest
    
    os.makedirs(dest, exist_ok = True)
    return dest

# All the files you want to move
files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)

for file in files_gen:
    # Split the first two "-"
    basename = os.path.basename(file)
    splitted = basename.split("-", 2)

    # Format the destination folder.
    # Creates it if necessary
    destination_folder = find_folder(splitted[0], splitted[1])

    # Copying the file 
    shutil.copy2(file, os.path.join(destination_folder, basename))

这个更新的解决方案使它更有效(尤其是当许多文件应该共享同一个文件夹时),如果您保存它,您以后也可以使用字典。