Python 从目录中读取的自定义排序文件

Python Custom sort files read from directory

我有一个结构如下的目录:

Main directory:
|--2001
   |--200101
      |--feed_013_01.zip
      |--feed_restr_013_01.zip
      |--feed_013_04.zip
      |--feed_restr_013_04.zip
      ...
      |--feed_013_30.zip
      |--feed_restr_013_30.zip
...
|--2021
   |--202101
      |--feed_013_01.zip
      |--feed_restr_013_01.zip
      |--feed_013_04.zip
      |--feed_restr_013_04.zip
      ...
      |--feed_013_30.zip
      |--feed_restr_013_30.zip

我需要按顺序阅读和排序 zip 文件:

feed_restr_013_30.zip, feed_013_30.zip.....feed_restr_013_01.zip, feed_013_01.zip

我目前正在做这样的事情:

def atoi(text):
    return int(text) if text.isdigit() else text

def natural_keys(text):
    return [atoi(c) for c in re.split(r'(\d+)', text)]

for path, subdirs, files in os.walk(directory):
    subdirs.sort(key=natural_keys)
    subdirs.reverse()
    files.sort(key=natural_keys)
    files.reverse()

它首先获取所有“restr”文件,我得到的列表为:

feed_restr_013_30.zip,feed_restr_013_01.zip.....feed_013_30.zip, feed_013_01.zip

更新

我能够使用 buran 和 SCKU 的答案以及我现有的逻辑来解决这个问题

def atoi(text):
    return int(text) if text.isdigit() else text

def parse(fname):
    try:
        prefix, *middle, n1, n2 = fname.split('_')
    except:
        prefix, *middle, n1 = fname.split('_')
        n2 = ''
    return (prefix, n1, [atoi(c) for c in re.split(r'(\d+)',n2)], ''.join(middle))

def get_Files(self, directory, source, keywords):
    file_paths = []
    for path, subdirs, files in os.walk(directory):
        for file in files:
            file_name = os.path.join(path, file)
            file_paths.append(file_name)
    return file_paths

files = get_Files(directory, source, keywords)
files.sort(key=parse, reverse=True)
files = ['feed_013_01.zip', 'feed_restr_013_01.zip',
        'feed_013_04.zip', 'feed_restr_013_04.zip', 
        'feed_013_30.zip', 'feed_restr_013_30.zip']

def parse(fname):
    prefix, *middle, n1, n2 = fname.split('_')
    return (prefix, int(n1), int(n2), ''.join(middle))

files.sort(key=parse, reverse=True)
print(files)

输出

['feed_restr_013_30.zip', 'feed_013_30.zip', 'feed_restr_013_04.zip', 'feed_013_04.zip',
 'feed_restr_013_01.zip', 'feed_013_01.zip']

如果你的目录结构很好而且不太大,我建议获取所有文件路径并立即对它们进行排序:

#get all file with path
all_files_path = []
for path, subdirs, files in os.walk(directory):
    for f in files:
        all_files_path.append(os.path.join(path, f))

# define custom sort key function
def which_items_you_want_to_compare(fpath):
    #from buran's answer for sorting the part of file name
    def parse(fname):
        prefix, *middle, n1, n2 = fname.split('_')
        return (prefix, n1, n2, ''.join(middle))

    fpath_split = fpath.split(os.path.sep)
    fn = fpath_split[-1] # file name 'feed_restr_013_01.zip'
    sort_key_fn = parse(fn) # from buran's answer
    d_ym = fpath_split[-2] # dir '202101'
    d_y = fpath_split[-3] # dir '2021'
    
    #compare with year first, then month (last two words in d_ym), then file name sort from buran's answer
    return (int(d_y), int(d_ym[4:])) + sort_key_fn 


sorted_res = sorted(all_files_path, key=which_items_you_want_to_compare, reverse=True)

如果不想按年份倒序,可以在key函数中使用-int(d_y)等来倒序。