如何使用 pymongo 将集合转储到 json 文件
How to dump a collection to json file using pymongo
我正在尝试将一个集合转储到 .json 文件,但在查看 pymongo 教程后我找不到任何与之相关的东西。
只需获取所有文档并将它们保存到文件中,例如:
from bson.json_util import dumps
from pymongo import MongoClient
if __name__ == '__main__':
client = MongoClient()
db = client.db_name
collection = db.collection_name
cursor = collection.find({})
with open('collection.json', 'w') as file:
file.write('[')
for document in cursor:
file.write(dumps(document))
file.write(',')
file.write(']')
补充@kamilitw 我使用游标的长度来正确制作JSON 文件。我使用 count()
和 if-else
:
def writeToJSONFile(collection):
cursor = collection.find({})
file = open("collection.json", "w")
file.write('[')
qnt_cursor = 0
for document in cursor:
qnt_cursor += 1
num_max = cursor.count()
if (num_max == 1):
file.write(json.dumps(document, indent=4, default=json_util.default))
elif (num_max >= 1 and qnt_cursor <= num_max-1):
file.write(json.dumps(document, indent=4, default=json_util.default))
file.write(',')
elif (qnt_cursor == num_max):
file.write(json.dumps(document, indent=4, default=json_util.default))
file.write(']')
return file
所以JSON文件在and中是正确的,因为以前是这样写的:[{"test": "test"},]
,现在是这样写的:[{"test":"test1"},{"test":"test2"}]
这是另一种不在右方括号前保存 ,
的方法。还使用 with open
保存一些 space.
filter = {"type": "something"}
type_documents = db['cluster'].find(filter)
type_documents_count = db['cluster'].count_documents(filter)
with open("type_documents.json", "w") as file:
file.write('[')
# Start from one as type_documents_count also starts from 1.
for i, document in enumerate(type_documents, 1):
file.write(json.dumps(document, default=str))
if i != type_documents_count:
file.write(',')
file.write(']')
如果迭代次数等于文档数(这是它保存的最后一个文档),它基本上不会写逗号。
"""
@Author: Aseem Jain
@profile: https://www.linkedin.com/in/premaseem/
"""
import os
import pymongo
# configure credentials / db name
db_user = os.environ["MONGO_ATLAS_USER"]
db_pass = os.environ["MONGO_ATLAS_PASSWORD"]
db_name = "sample_mflix"
connection_string = f"mongodb+srv://{db_user}:{db_pass}@sharedcluster.lv3wx.mongodb.net/{db_name}?retryWrites=true&w=majority"
client = pymongo.MongoClient(connection_string)
db = client[db_name]
# create database back directory with db_name
os.makedirs(db_name, exist_ok=True)
# list all tables in database
tables = db.list_collection_names()
# dump all tables in db
for table in tables:
print("exporting data for table", table )
data = list(db[table].find())
# write data in json file
with open(f"{db.name}/{table}.json","w") as writer:
writer.write(str(data))
exit(0)
已接受的解决方案产生了无效 JSON。它导致在右方括号 ]
之前出现尾随逗号 ,
。 JSON 规范不允许尾随逗号。看到这个 answer and this reference.
为了构建公认的解决方案,我使用了以下内容:
from bson.json_util import dumps
from pymongo import MongoClient
import json
if __name__ == '__main__':
client = MongoClient()
db = client.db_name
collection = db.collection_name
cursor = collection.find({})
with open('collection.json', 'w') as file:
json.dump(json.loads(dumps(cursor)), file)
我正在尝试将一个集合转储到 .json 文件,但在查看 pymongo 教程后我找不到任何与之相关的东西。
只需获取所有文档并将它们保存到文件中,例如:
from bson.json_util import dumps
from pymongo import MongoClient
if __name__ == '__main__':
client = MongoClient()
db = client.db_name
collection = db.collection_name
cursor = collection.find({})
with open('collection.json', 'w') as file:
file.write('[')
for document in cursor:
file.write(dumps(document))
file.write(',')
file.write(']')
补充@kamilitw 我使用游标的长度来正确制作JSON 文件。我使用 count()
和 if-else
:
def writeToJSONFile(collection):
cursor = collection.find({})
file = open("collection.json", "w")
file.write('[')
qnt_cursor = 0
for document in cursor:
qnt_cursor += 1
num_max = cursor.count()
if (num_max == 1):
file.write(json.dumps(document, indent=4, default=json_util.default))
elif (num_max >= 1 and qnt_cursor <= num_max-1):
file.write(json.dumps(document, indent=4, default=json_util.default))
file.write(',')
elif (qnt_cursor == num_max):
file.write(json.dumps(document, indent=4, default=json_util.default))
file.write(']')
return file
所以JSON文件在and中是正确的,因为以前是这样写的:[{"test": "test"},]
,现在是这样写的:[{"test":"test1"},{"test":"test2"}]
这是另一种不在右方括号前保存 ,
的方法。还使用 with open
保存一些 space.
filter = {"type": "something"}
type_documents = db['cluster'].find(filter)
type_documents_count = db['cluster'].count_documents(filter)
with open("type_documents.json", "w") as file:
file.write('[')
# Start from one as type_documents_count also starts from 1.
for i, document in enumerate(type_documents, 1):
file.write(json.dumps(document, default=str))
if i != type_documents_count:
file.write(',')
file.write(']')
如果迭代次数等于文档数(这是它保存的最后一个文档),它基本上不会写逗号。
"""
@Author: Aseem Jain
@profile: https://www.linkedin.com/in/premaseem/
"""
import os
import pymongo
# configure credentials / db name
db_user = os.environ["MONGO_ATLAS_USER"]
db_pass = os.environ["MONGO_ATLAS_PASSWORD"]
db_name = "sample_mflix"
connection_string = f"mongodb+srv://{db_user}:{db_pass}@sharedcluster.lv3wx.mongodb.net/{db_name}?retryWrites=true&w=majority"
client = pymongo.MongoClient(connection_string)
db = client[db_name]
# create database back directory with db_name
os.makedirs(db_name, exist_ok=True)
# list all tables in database
tables = db.list_collection_names()
# dump all tables in db
for table in tables:
print("exporting data for table", table )
data = list(db[table].find())
# write data in json file
with open(f"{db.name}/{table}.json","w") as writer:
writer.write(str(data))
exit(0)
已接受的解决方案产生了无效 JSON。它导致在右方括号 ]
之前出现尾随逗号 ,
。 JSON 规范不允许尾随逗号。看到这个 answer and this reference.
为了构建公认的解决方案,我使用了以下内容:
from bson.json_util import dumps
from pymongo import MongoClient
import json
if __name__ == '__main__':
client = MongoClient()
db = client.db_name
collection = db.collection_name
cursor = collection.find({})
with open('collection.json', 'w') as file:
json.dump(json.loads(dumps(cursor)), file)