Python 从目录中读取的自定义排序文件
Python Custom sort files read from directory
我有一个结构如下的目录:
Main directory:
|--2001
|--200101
|--feed_013_01.zip
|--feed_restr_013_01.zip
|--feed_013_04.zip
|--feed_restr_013_04.zip
...
|--feed_013_30.zip
|--feed_restr_013_30.zip
...
|--2021
|--202101
|--feed_013_01.zip
|--feed_restr_013_01.zip
|--feed_013_04.zip
|--feed_restr_013_04.zip
...
|--feed_013_30.zip
|--feed_restr_013_30.zip
我需要按顺序阅读和排序 zip 文件:
feed_restr_013_30.zip, feed_013_30.zip.....feed_restr_013_01.zip, feed_013_01.zip
我目前正在做这样的事情:
def atoi(text):
return int(text) if text.isdigit() else text
def natural_keys(text):
return [atoi(c) for c in re.split(r'(\d+)', text)]
for path, subdirs, files in os.walk(directory):
subdirs.sort(key=natural_keys)
subdirs.reverse()
files.sort(key=natural_keys)
files.reverse()
它首先获取所有“restr”文件,我得到的列表为:
feed_restr_013_30.zip,feed_restr_013_01.zip.....feed_013_30.zip, feed_013_01.zip
更新
我能够使用 buran 和 SCKU 的答案以及我现有的逻辑来解决这个问题
def atoi(text):
return int(text) if text.isdigit() else text
def parse(fname):
try:
prefix, *middle, n1, n2 = fname.split('_')
except:
prefix, *middle, n1 = fname.split('_')
n2 = ''
return (prefix, n1, [atoi(c) for c in re.split(r'(\d+)',n2)], ''.join(middle))
def get_Files(self, directory, source, keywords):
file_paths = []
for path, subdirs, files in os.walk(directory):
for file in files:
file_name = os.path.join(path, file)
file_paths.append(file_name)
return file_paths
files = get_Files(directory, source, keywords)
files.sort(key=parse, reverse=True)
files = ['feed_013_01.zip', 'feed_restr_013_01.zip',
'feed_013_04.zip', 'feed_restr_013_04.zip',
'feed_013_30.zip', 'feed_restr_013_30.zip']
def parse(fname):
prefix, *middle, n1, n2 = fname.split('_')
return (prefix, int(n1), int(n2), ''.join(middle))
files.sort(key=parse, reverse=True)
print(files)
输出
['feed_restr_013_30.zip', 'feed_013_30.zip', 'feed_restr_013_04.zip', 'feed_013_04.zip',
'feed_restr_013_01.zip', 'feed_013_01.zip']
如果你的目录结构很好而且不太大,我建议获取所有文件路径并立即对它们进行排序:
#get all file with path
all_files_path = []
for path, subdirs, files in os.walk(directory):
for f in files:
all_files_path.append(os.path.join(path, f))
# define custom sort key function
def which_items_you_want_to_compare(fpath):
#from buran's answer for sorting the part of file name
def parse(fname):
prefix, *middle, n1, n2 = fname.split('_')
return (prefix, n1, n2, ''.join(middle))
fpath_split = fpath.split(os.path.sep)
fn = fpath_split[-1] # file name 'feed_restr_013_01.zip'
sort_key_fn = parse(fn) # from buran's answer
d_ym = fpath_split[-2] # dir '202101'
d_y = fpath_split[-3] # dir '2021'
#compare with year first, then month (last two words in d_ym), then file name sort from buran's answer
return (int(d_y), int(d_ym[4:])) + sort_key_fn
sorted_res = sorted(all_files_path, key=which_items_you_want_to_compare, reverse=True)
如果不想按年份倒序,可以在key函数中使用-int(d_y)
等来倒序。
我有一个结构如下的目录:
Main directory:
|--2001
|--200101
|--feed_013_01.zip
|--feed_restr_013_01.zip
|--feed_013_04.zip
|--feed_restr_013_04.zip
...
|--feed_013_30.zip
|--feed_restr_013_30.zip
...
|--2021
|--202101
|--feed_013_01.zip
|--feed_restr_013_01.zip
|--feed_013_04.zip
|--feed_restr_013_04.zip
...
|--feed_013_30.zip
|--feed_restr_013_30.zip
我需要按顺序阅读和排序 zip 文件:
feed_restr_013_30.zip, feed_013_30.zip.....feed_restr_013_01.zip, feed_013_01.zip
我目前正在做这样的事情:
def atoi(text):
return int(text) if text.isdigit() else text
def natural_keys(text):
return [atoi(c) for c in re.split(r'(\d+)', text)]
for path, subdirs, files in os.walk(directory):
subdirs.sort(key=natural_keys)
subdirs.reverse()
files.sort(key=natural_keys)
files.reverse()
它首先获取所有“restr”文件,我得到的列表为:
feed_restr_013_30.zip,feed_restr_013_01.zip.....feed_013_30.zip, feed_013_01.zip
更新
我能够使用 buran 和 SCKU 的答案以及我现有的逻辑来解决这个问题
def atoi(text):
return int(text) if text.isdigit() else text
def parse(fname):
try:
prefix, *middle, n1, n2 = fname.split('_')
except:
prefix, *middle, n1 = fname.split('_')
n2 = ''
return (prefix, n1, [atoi(c) for c in re.split(r'(\d+)',n2)], ''.join(middle))
def get_Files(self, directory, source, keywords):
file_paths = []
for path, subdirs, files in os.walk(directory):
for file in files:
file_name = os.path.join(path, file)
file_paths.append(file_name)
return file_paths
files = get_Files(directory, source, keywords)
files.sort(key=parse, reverse=True)
files = ['feed_013_01.zip', 'feed_restr_013_01.zip',
'feed_013_04.zip', 'feed_restr_013_04.zip',
'feed_013_30.zip', 'feed_restr_013_30.zip']
def parse(fname):
prefix, *middle, n1, n2 = fname.split('_')
return (prefix, int(n1), int(n2), ''.join(middle))
files.sort(key=parse, reverse=True)
print(files)
输出
['feed_restr_013_30.zip', 'feed_013_30.zip', 'feed_restr_013_04.zip', 'feed_013_04.zip',
'feed_restr_013_01.zip', 'feed_013_01.zip']
如果你的目录结构很好而且不太大,我建议获取所有文件路径并立即对它们进行排序:
#get all file with path
all_files_path = []
for path, subdirs, files in os.walk(directory):
for f in files:
all_files_path.append(os.path.join(path, f))
# define custom sort key function
def which_items_you_want_to_compare(fpath):
#from buran's answer for sorting the part of file name
def parse(fname):
prefix, *middle, n1, n2 = fname.split('_')
return (prefix, n1, n2, ''.join(middle))
fpath_split = fpath.split(os.path.sep)
fn = fpath_split[-1] # file name 'feed_restr_013_01.zip'
sort_key_fn = parse(fn) # from buran's answer
d_ym = fpath_split[-2] # dir '202101'
d_y = fpath_split[-3] # dir '2021'
#compare with year first, then month (last two words in d_ym), then file name sort from buran's answer
return (int(d_y), int(d_ym[4:])) + sort_key_fn
sorted_res = sorted(all_files_path, key=which_items_you_want_to_compare, reverse=True)
如果不想按年份倒序,可以在key函数中使用-int(d_y)
等来倒序。