我们如何从所有子目录中读取所有 json 文件?
How can we read all json files from all sub directory?
我想一步从多个子目录读取大量 json 文件。我该怎么做?
import glob
hit = glob.glob("/./*.json")
您可以使用以下逻辑来检索文件路径列表。
This answer 有很多愚蠢的方法可以做到这一点。
一旦有了文件列表,加载它们就非常简单了...
import os
import json
def get_files_from_path(path: str='.', extension: str=None) -> list:
"""return list of files from path"""
# see the answer on the link below for a ridiculously
# complete answer for this. I tend to use this one.
# note that it also goes into subdirs of the path
#
result = []
for subdir, dirs, files in os.walk(path):
for filename in files:
filepath = subdir + os.sep + filename
if extension == None:
result.append(filepath)
elif filename.lower().endswith(extension.lower()):
result.append(filepath)
return result
filelist = get_files_from_path(extension='.json')
jsonlist = []
for filepath in filelist:
with open(filepath) as infile:
jsonlist.append(json.load(infile))
from pprint import pprint
pprint(jsonlist)
您可以通过在您的原始代码中进行少量修改来获取所有文件路径。
If recursive is true, the pattern “**” will match any files and zero or more directories, subdirectories and symbolic links to directories.
from glob import glob
hits = glob("**/*.json", recursive=True)
"""
You will get a list of paths like this
hits = [
#....
'<path>/delta/job-3/33ce4079-c8db-11eb-8683-f30d80ef99b2.json',
'<path>/delta/pair-1/cf81188b-c8d7-11eb-8367-5fb2efe63fc6.json',
'<path>/sub/b1d91522-cab4-11eb-a5cb-113c64720fe0.json',
'<path>/sub/979a7c2b-cab4-11eb-a5cb-b91d90206530.json',
'<path>/sub/977e4199-cab4-11eb-a5cb-33b60824fb94.json',
'<path>/sub/a5fb35cd-cab4-11eb-a5cb-5f6cd57276ff.json',
'<path>/sub/a60520de-cab4-11eb-a5cb-2723d138a03b.json',
'<path>/sub/9805e82c-cab4-11eb-a5cb-3987a3853fca.json'
]
"""
我想一步从多个子目录读取大量 json 文件。我该怎么做?
import glob
hit = glob.glob("/./*.json")
您可以使用以下逻辑来检索文件路径列表。
This answer 有很多愚蠢的方法可以做到这一点。
一旦有了文件列表,加载它们就非常简单了...
import os
import json
def get_files_from_path(path: str='.', extension: str=None) -> list:
"""return list of files from path"""
# see the answer on the link below for a ridiculously
# complete answer for this. I tend to use this one.
# note that it also goes into subdirs of the path
#
result = []
for subdir, dirs, files in os.walk(path):
for filename in files:
filepath = subdir + os.sep + filename
if extension == None:
result.append(filepath)
elif filename.lower().endswith(extension.lower()):
result.append(filepath)
return result
filelist = get_files_from_path(extension='.json')
jsonlist = []
for filepath in filelist:
with open(filepath) as infile:
jsonlist.append(json.load(infile))
from pprint import pprint
pprint(jsonlist)
您可以通过在您的原始代码中进行少量修改来获取所有文件路径。
If recursive is true, the pattern “**” will match any files and zero or more directories, subdirectories and symbolic links to directories.
from glob import glob
hits = glob("**/*.json", recursive=True)
"""
You will get a list of paths like this
hits = [
#....
'<path>/delta/job-3/33ce4079-c8db-11eb-8683-f30d80ef99b2.json',
'<path>/delta/pair-1/cf81188b-c8d7-11eb-8367-5fb2efe63fc6.json',
'<path>/sub/b1d91522-cab4-11eb-a5cb-113c64720fe0.json',
'<path>/sub/979a7c2b-cab4-11eb-a5cb-b91d90206530.json',
'<path>/sub/977e4199-cab4-11eb-a5cb-33b60824fb94.json',
'<path>/sub/a5fb35cd-cab4-11eb-a5cb-5f6cd57276ff.json',
'<path>/sub/a60520de-cab4-11eb-a5cb-2723d138a03b.json',
'<path>/sub/9805e82c-cab4-11eb-a5cb-3987a3853fca.json'
]
"""