我想将基于部分名称的文件移动到具有该名称的文件夹中
I want to move a file based on part of the name to a folder with that name
我有一个包含大量文件的目录,我想根据部分文件名将这些文件移动到文件夹中。我的文件列表如下所示:
001-020-012B-B.nc
001-022-151-A.nc
001-023-022-PY-T1.nc.nc
001-096-016B-A.nc
我想根据文件名的第一部分 (001-096-016B
、001-023-022
、001-022-151
) 将我拥有的文件移动到单独的文件夹中。文件名的第一部分始终具有相同数量的数字,并且始终分为由下划线 '-'
.
分隔的 3 个部分
文件夹名称是这样命名的\oe-xxxx\xxxx\xxxx[=19=]1-Disc-PED0-Rotor-parts-1200
。
因此,例如,根据文件夹名称(数字),此文件应放在上述文件夹中:
001-020-012B-B.nc
文件路径分为几列以显示上述文件必须移动到的位置:
(001)-Disc-PED\(020)-Rotor-parts-1200
.
因此:
(001)-Disc-PED\(020)-Rotor-parts-1200 (001)-(020)-012B-B.nc
这是我在网上试过的方法,但没有用:
我的想法是我想遍历文件夹并查找匹配项。
import os
import glob
import itertools
import re
#Source file
sourcefile = r'C:\Users\cah\Desktop[=13=]0Turning'
destinationPath = r'C:\Users\cah\Desktop-CAM'
#Seperation
dirs = glob.glob('*-*')
#Every file with file extension .nc
files = glob.glob('*.nc')
for root, dirs, files in os.walk(sourcefile):
for file in files:
if file.endswith(".nc"):
first3Char = str(file[0:3])
last3Char = str(file[4:7])
for root in os.walk(destinationPath):
first33CharsOfRoot = str(root[0:33])
cleanRoot1 = str(root).replace("[", "")
cleanRoot2 = str(cleanRoot1).replace("]", "")
cleanRoot3 = str(cleanRoot2).replace(")", "")
cleanRoot4 = str(cleanRoot3).replace("'", "")
cleanRoot5 = str(cleanRoot4).replace(",", "")
firstCharOfRoot = re.findall(r'(.{3})\s*$', str(cleanRoot5))
print(firstCharOfRoot==first3Char)
if(firstCharOfRoot == first3Char):
print("Hello")
for root in os.walk(destinationPath):
print(os.path.basename(root))
# if(os.path)
我意识到我不应该寻找路径中的最后 3 个字符,因为它是第一个 (001) 等。我需要在开始时寻找数字以找到我需要的第一条路径去.
编辑:
import os
import glob
import itertools
import re
#Source file
sourcefile = r'C:\Users\cah\Desktop[=14=]0Turning'
destinationPath = r'C:\Users\cah\Desktop-CAM'
#Seperation
dirs = glob.glob('*-*')
#Every file with file extension .nc
files = glob.glob('*.nc')
for root, dirs, files in os.walk(sourcefile):
for file in files:
if file.endswith(".nc"):
first3Char = str(file[0:3])
last3Char = str(file[4:7])
for root in os.walk(destinationPath):
cleanRoot1 = str(root).replace("[", "")
cleanRoot2 = str(cleanRoot1).replace("]", "")
cleanRoot3 = str(cleanRoot2).replace(")", "")
cleanRoot4 = str(cleanRoot3).replace("'", "")
cleanRoot5 = str(cleanRoot4).replace(",", "")
firstCharOfRoot = re.findall(r'^(?:[^\]+\\){5}(\d+).*$', str(cleanRoot5))
secondCharOfRoot = re.findall(r'^(?:[^\]+\\){6}(\d+).*$', str(cleanRoot5))
firstCharOfRootCleaned = ''.join(firstCharOfRoot)
secondCharOfRoot = ''.join(secondCharOfRoot)
cleanRoot6 = str(cleanRoot5).replace("(", "")
if(firstCharOfRootCleaned == str(first3Char) & secondCharOfRoot == str(last3Char)):
print("BINGOf")
# for root1 in os.walk(cleanRoot6):
解决方案
下一节有一个改进的解决方案。不过先分解一下直截了当的解法吧
首先,获取子文件夹的完整列表。
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath, "**"), recursive=True)\
if os.path.isdir(f)]
然后,对每个文件使用一个函数来查找其匹配的文件夹,如果不存在则使用新的文件路径。我在脚本的其余部分包含了这个名为 find_folder()
的函数:
import os
import glob
import shutil
sourcefile= r'C:\Users\cah\Desktop[=11=]0Turning'
destinationPath = r'C:\Users\cah\Desktop-CAM'
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
if os.path.isdir(f)]
# It will create and return a new directory if no directory matches
def find_folder(part1, part2):
matching_folders1 = [folder for folder in all_folders_splitted\
if os.path.split(folder[0])[-1].startswith(part1)]
matching_folder2 = None
for matching_folder2 in matching_folders1:
if matching_folder2[-1].startswith(part2):
return os.path.join(*matching_folder2)
# Whole new folder tree
if matching_folder2 is None:
dest = os.path.join(destinationPath, part1, part2)
os.makedirs(dest)
return dest
# Inside the already existing folder part "1"
dest = os.path.join(matching_folder2[0], part2)
os.makedirs(dest)
return dest
# All the files you want to move
files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)
for file in files_gen:
# Split the first two "-"
basename = os.path.basename(file)
splitted = basename.split("-", 2)
# Format the destination folder.
# Creates it if necessary
destination_folder = find_folder(splitted[0], splitted[1])
# Copying the file
shutil.copy2(file, os.path.join(destination_folder, basename))
改进的解决方案
如果您有大量文件,在每次迭代时“拆分和匹配”每个文件夹可能是有害的。
我们可以将根据给定模式找到的文件夹存储在字典中。如果给出新模式,字典将被更新,否则它将 return 以前找到的文件夹。
import os
import glob
import shutil
sourcefile= r'C:\Users\cah\Desktop[=12=]0Turning'
destinationPath = r'C:\Users\cah\Desktop-CAM'
# Global dictionary to store folder paths, relative to a pattern
found_pattern = dict()
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
if os.path.isdir(f)]
def find_folder(part1, part2):
current_key = tuple([part1, part2])
if current_key in pattern_match:
# Already found previously.
# We just return the folder path, stored as the value.
return pattern_match[current_key]
matching_folders1 = [folder for folder in all_folders_splitted\
if os.path.split(folder[0])[-1].startswith(part1)]
matching_folder2 = None
for matching_folder2 in matching_folders1:
if matching_folder2[-1].startswith(part2):
dest = os.path.join(*matching_folder2)
# Update the dictionary
pattern_match[current_key] = dest
return dest
if matching_folder2 is None:
dest = os.path.join(destinationPath, part1, part2)
else:
dest = os.path.join(matching_folder2[0], part2)
# Update the dictionary
pattern_match[current_key] = dest
os.makedirs(dest, exist_ok = True)
return dest
# All the files you want to move
files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)
for file in files_gen:
# Split the first two "-"
basename = os.path.basename(file)
splitted = basename.split("-", 2)
# Format the destination folder.
# Creates it if necessary
destination_folder = find_folder(splitted[0], splitted[1])
# Copying the file
shutil.copy2(file, os.path.join(destination_folder, basename))
这个更新的解决方案使它更有效(尤其是当许多文件应该共享同一个文件夹时),如果您保存它,您以后也可以使用字典。
我有一个包含大量文件的目录,我想根据部分文件名将这些文件移动到文件夹中。我的文件列表如下所示:
001-020-012B-B.nc
001-022-151-A.nc
001-023-022-PY-T1.nc.nc
001-096-016B-A.nc
我想根据文件名的第一部分 (001-096-016B
、001-023-022
、001-022-151
) 将我拥有的文件移动到单独的文件夹中。文件名的第一部分始终具有相同数量的数字,并且始终分为由下划线 '-'
.
文件夹名称是这样命名的\oe-xxxx\xxxx\xxxx[=19=]1-Disc-PED0-Rotor-parts-1200
。
因此,例如,根据文件夹名称(数字),此文件应放在上述文件夹中:
001-020-012B-B.nc
文件路径分为几列以显示上述文件必须移动到的位置:
(001)-Disc-PED\(020)-Rotor-parts-1200
.
因此:
(001)-Disc-PED\(020)-Rotor-parts-1200 (001)-(020)-012B-B.nc
这是我在网上试过的方法,但没有用: 我的想法是我想遍历文件夹并查找匹配项。
import os
import glob
import itertools
import re
#Source file
sourcefile = r'C:\Users\cah\Desktop[=13=]0Turning'
destinationPath = r'C:\Users\cah\Desktop-CAM'
#Seperation
dirs = glob.glob('*-*')
#Every file with file extension .nc
files = glob.glob('*.nc')
for root, dirs, files in os.walk(sourcefile):
for file in files:
if file.endswith(".nc"):
first3Char = str(file[0:3])
last3Char = str(file[4:7])
for root in os.walk(destinationPath):
first33CharsOfRoot = str(root[0:33])
cleanRoot1 = str(root).replace("[", "")
cleanRoot2 = str(cleanRoot1).replace("]", "")
cleanRoot3 = str(cleanRoot2).replace(")", "")
cleanRoot4 = str(cleanRoot3).replace("'", "")
cleanRoot5 = str(cleanRoot4).replace(",", "")
firstCharOfRoot = re.findall(r'(.{3})\s*$', str(cleanRoot5))
print(firstCharOfRoot==first3Char)
if(firstCharOfRoot == first3Char):
print("Hello")
for root in os.walk(destinationPath):
print(os.path.basename(root))
# if(os.path)
我意识到我不应该寻找路径中的最后 3 个字符,因为它是第一个 (001) 等。我需要在开始时寻找数字以找到我需要的第一条路径去.
编辑:
import os
import glob
import itertools
import re
#Source file
sourcefile = r'C:\Users\cah\Desktop[=14=]0Turning'
destinationPath = r'C:\Users\cah\Desktop-CAM'
#Seperation
dirs = glob.glob('*-*')
#Every file with file extension .nc
files = glob.glob('*.nc')
for root, dirs, files in os.walk(sourcefile):
for file in files:
if file.endswith(".nc"):
first3Char = str(file[0:3])
last3Char = str(file[4:7])
for root in os.walk(destinationPath):
cleanRoot1 = str(root).replace("[", "")
cleanRoot2 = str(cleanRoot1).replace("]", "")
cleanRoot3 = str(cleanRoot2).replace(")", "")
cleanRoot4 = str(cleanRoot3).replace("'", "")
cleanRoot5 = str(cleanRoot4).replace(",", "")
firstCharOfRoot = re.findall(r'^(?:[^\]+\\){5}(\d+).*$', str(cleanRoot5))
secondCharOfRoot = re.findall(r'^(?:[^\]+\\){6}(\d+).*$', str(cleanRoot5))
firstCharOfRootCleaned = ''.join(firstCharOfRoot)
secondCharOfRoot = ''.join(secondCharOfRoot)
cleanRoot6 = str(cleanRoot5).replace("(", "")
if(firstCharOfRootCleaned == str(first3Char) & secondCharOfRoot == str(last3Char)):
print("BINGOf")
# for root1 in os.walk(cleanRoot6):
解决方案
下一节有一个改进的解决方案。不过先分解一下直截了当的解法吧
首先,获取子文件夹的完整列表。
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath, "**"), recursive=True)\
if os.path.isdir(f)]
然后,对每个文件使用一个函数来查找其匹配的文件夹,如果不存在则使用新的文件路径。我在脚本的其余部分包含了这个名为 find_folder()
的函数:
import os
import glob
import shutil
sourcefile= r'C:\Users\cah\Desktop[=11=]0Turning'
destinationPath = r'C:\Users\cah\Desktop-CAM'
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
if os.path.isdir(f)]
# It will create and return a new directory if no directory matches
def find_folder(part1, part2):
matching_folders1 = [folder for folder in all_folders_splitted\
if os.path.split(folder[0])[-1].startswith(part1)]
matching_folder2 = None
for matching_folder2 in matching_folders1:
if matching_folder2[-1].startswith(part2):
return os.path.join(*matching_folder2)
# Whole new folder tree
if matching_folder2 is None:
dest = os.path.join(destinationPath, part1, part2)
os.makedirs(dest)
return dest
# Inside the already existing folder part "1"
dest = os.path.join(matching_folder2[0], part2)
os.makedirs(dest)
return dest
# All the files you want to move
files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)
for file in files_gen:
# Split the first two "-"
basename = os.path.basename(file)
splitted = basename.split("-", 2)
# Format the destination folder.
# Creates it if necessary
destination_folder = find_folder(splitted[0], splitted[1])
# Copying the file
shutil.copy2(file, os.path.join(destination_folder, basename))
改进的解决方案
如果您有大量文件,在每次迭代时“拆分和匹配”每个文件夹可能是有害的。
我们可以将根据给定模式找到的文件夹存储在字典中。如果给出新模式,字典将被更新,否则它将 return 以前找到的文件夹。
import os
import glob
import shutil
sourcefile= r'C:\Users\cah\Desktop[=12=]0Turning'
destinationPath = r'C:\Users\cah\Desktop-CAM'
# Global dictionary to store folder paths, relative to a pattern
found_pattern = dict()
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
if os.path.isdir(f)]
def find_folder(part1, part2):
current_key = tuple([part1, part2])
if current_key in pattern_match:
# Already found previously.
# We just return the folder path, stored as the value.
return pattern_match[current_key]
matching_folders1 = [folder for folder in all_folders_splitted\
if os.path.split(folder[0])[-1].startswith(part1)]
matching_folder2 = None
for matching_folder2 in matching_folders1:
if matching_folder2[-1].startswith(part2):
dest = os.path.join(*matching_folder2)
# Update the dictionary
pattern_match[current_key] = dest
return dest
if matching_folder2 is None:
dest = os.path.join(destinationPath, part1, part2)
else:
dest = os.path.join(matching_folder2[0], part2)
# Update the dictionary
pattern_match[current_key] = dest
os.makedirs(dest, exist_ok = True)
return dest
# All the files you want to move
files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)
for file in files_gen:
# Split the first two "-"
basename = os.path.basename(file)
splitted = basename.split("-", 2)
# Format the destination folder.
# Creates it if necessary
destination_folder = find_folder(splitted[0], splitted[1])
# Copying the file
shutil.copy2(file, os.path.join(destination_folder, basename))
这个更新的解决方案使它更有效(尤其是当许多文件应该共享同一个文件夹时),如果您保存它,您以后也可以使用字典。