递归生成器函数 Python 嵌套 JSON 数据
Recursive Generator Function Python Nested JSON Data
我正在尝试编写一个递归生成器函数来展平混合类型、列表和字典的嵌套 json 对象。我这样做部分是为了我自己的学习,所以避免从互联网上获取示例以确保我更好地理解正在发生的事情,但我认为 yield 语句在函数中的正确位置相对于循环。
传递给生成器函数的数据源是迭代 mongo 集合的外循环的输出。
当我在与 Yield 语句相同的位置使用 print 语句时,我得到了我期望的结果,但是当我将其切换为 yield 语句时,生成器似乎每次外循环迭代只产生一个项目。
希望有人能告诉我哪里出错了。
columns = ['_id'
, 'name'
, 'personId'
, 'status'
, 'explorerProgress'
, 'isSelectedForReview'
]
db = MongoClient().abcDatabase
coll = db.abcCollection
def dic_recurse(data, fields, counter, source_field):
counter += 1
if isinstance(data, dict):
for k, v in data.items():
if k in fields and isinstance(v, list) is False and isinstance(v, dict) is False:
# print "{0}{1}".format(source_field, k)[1:], v
yield "{0}{1}".format(source_field, k)[1:], v
elif isinstance(v, list):
source_field += "_{0}".format(k)
[dic_recurse(l, fields, counter, source_field) for l in data.get(k)]
elif isinstance(v, dict):
source_field += "_{0}".format(k)
dic_recurse(v, fields, counter, source_field)
elif isinstance(data, list):
[dic_recurse(l, fields, counter, '') for l in data]
for item in coll.find():
for d in dic_recurse(item, columns, 0, ''):
print d
下面是它正在迭代的数据示例,但嵌套的增加确实超出了显示的范围。
{
"_id" : ObjectId("5478464ee4b0a44213e36eb0"),
"consultationId" : "54784388e4b0a44213e36d5f",
"modules" : [
{
"_id" : "FF",
"name" : "Foundations",
"strategyHeaders" : [
{
"_id" : "FF_Money",
"description" : "Let's see where you're spending your money.",
"name" : "Managing money day to day",
"statuses" : [
{
"pid" : "54784388e4b0a44213e36d5d",
"status" : "selected",
"whenUpdated" : NumberLong(1425017616062)
},
{
"pid" : "54783da8e4b09cf5d82d4e11",
"status" : "selected",
"whenUpdated" : NumberLong(1425017616062)
}
],
"strategies" : [
{
"_id" : "FF_Money_CF",
"description" : "This option helps you get a picture of how much you're spending",
"name" : "Your spending and savings.",
"relatedGoals" : [
{
"_id" : ObjectId("54784581e4b0a44213e36e2f")
},
{
"_id" : ObjectId("5478458ee4b0a44213e36e33")
},
{
"_id" : ObjectId("547845a5e4b0a44213e36e37")
},
{
"_id" : ObjectId("54784577e4b0a44213e36e2b")
},
{
"_id" : ObjectId("5478456ee4b0a44213e36e27")
}
],
"soaTrashWarning" : "Understanding what you are spending and saving is crucial to helping you achieve your goals. Without this in place, you may be spending more than you can afford. ",
"statuses" : [
{
"personId" : "54784388e4b0a44213e36d5d",
"status" : "selected",
"whenUpdated" : NumberLong(1425017616062)
},
{
"personId" : "54783da8e4b09cf5d82d4e11",
"status" : "selected",
"whenUpdated" : NumberLong(1425017616062)
}
],
"trashWarning" : "This option helps you get a picture of how much you're spending and how much you could save.\nAre you sure you don't want to take up this option now?\n\n",
"weight" : NumberInt(1)
},
更新
我对生成器函数做了一些更改,尽管我不确定它们是否真的改变了什么,而且我一直在调试器中逐行调试打印版本和产出版本。新代码如下。
def dic_recurse(data, fields, counter, source_field):
print 'Called'
if isinstance(data, dict):
for k, v in data.items():
if isinstance(v, list):
source_field += "_{0}".format(k)
[dic_recurse(l, fields, counter, source_field) for l in v]
elif isinstance(v, dict):
source_field += "_{0}".format(k)
dic_recurse(v, fields, counter, source_field)
elif k in fields and isinstance(v, list) is False and isinstance(v, dict) is False:
counter += 1
yield "L{0}_{1}_{2}".format(counter, source_field, k.replace('_', ''))[1:], v
elif isinstance(data, list):
for l in data:
dic_recurse(l, fields, counter, '')
两个版本在调试时的主要区别似乎是在命中这段代码时。
elif isinstance(data, list):
for l in data:
dic_recurse(l, fields, counter, '')
如果我正在测试 yield 版本,对 dic_recurse(l, fields, counter, '')
行的调用会被命中,但它似乎没有调用该函数,因为我在函数开头设置的任何打印语句都没有被命中,但是如果我使用 print 做同样的事情,那么当代码到达同一部分时,它会愉快地调用该函数并返回整个函数。
我确定我可能误解了有关生成器和 yield 语句使用的一些基本知识。
我只想 post 我更新的解决方案,以防它对其他人有用。
我需要向该函数添加额外的 yield 语句,以便可以将生成器函数的每次递归调用的结果传递给下一个函数,至少我是这样理解的。很高兴被纠正。
def dic_recurse(data, fields, counter, source_field):
if isinstance(data, dict):
counter += 1
for k, v in data.items():
if isinstance(v, list):
for field_data in v:
for list_field in dic_recurse(field_data, fields, counter, source_field):
yield list_field
elif isinstance(v, dict):
for dic_field in dic_recurse(v, fields, counter, source_field):
yield dic_field
elif k in fields and isinstance(v, list) is False and isinstance(v, dict) is False:
yield counter, {"{0}_L{1}".format(k, counter): v}
elif isinstance(data, list):
counter += 1
for list_item in data:
for li2 in dic_recurse(list_item, fields, counter, ''):
yield li2
我正在尝试编写一个递归生成器函数来展平混合类型、列表和字典的嵌套 json 对象。我这样做部分是为了我自己的学习,所以避免从互联网上获取示例以确保我更好地理解正在发生的事情,但我认为 yield 语句在函数中的正确位置相对于循环。
传递给生成器函数的数据源是迭代 mongo 集合的外循环的输出。
当我在与 Yield 语句相同的位置使用 print 语句时,我得到了我期望的结果,但是当我将其切换为 yield 语句时,生成器似乎每次外循环迭代只产生一个项目。
希望有人能告诉我哪里出错了。
columns = ['_id'
, 'name'
, 'personId'
, 'status'
, 'explorerProgress'
, 'isSelectedForReview'
]
db = MongoClient().abcDatabase
coll = db.abcCollection
def dic_recurse(data, fields, counter, source_field):
counter += 1
if isinstance(data, dict):
for k, v in data.items():
if k in fields and isinstance(v, list) is False and isinstance(v, dict) is False:
# print "{0}{1}".format(source_field, k)[1:], v
yield "{0}{1}".format(source_field, k)[1:], v
elif isinstance(v, list):
source_field += "_{0}".format(k)
[dic_recurse(l, fields, counter, source_field) for l in data.get(k)]
elif isinstance(v, dict):
source_field += "_{0}".format(k)
dic_recurse(v, fields, counter, source_field)
elif isinstance(data, list):
[dic_recurse(l, fields, counter, '') for l in data]
for item in coll.find():
for d in dic_recurse(item, columns, 0, ''):
print d
下面是它正在迭代的数据示例,但嵌套的增加确实超出了显示的范围。
{
"_id" : ObjectId("5478464ee4b0a44213e36eb0"),
"consultationId" : "54784388e4b0a44213e36d5f",
"modules" : [
{
"_id" : "FF",
"name" : "Foundations",
"strategyHeaders" : [
{
"_id" : "FF_Money",
"description" : "Let's see where you're spending your money.",
"name" : "Managing money day to day",
"statuses" : [
{
"pid" : "54784388e4b0a44213e36d5d",
"status" : "selected",
"whenUpdated" : NumberLong(1425017616062)
},
{
"pid" : "54783da8e4b09cf5d82d4e11",
"status" : "selected",
"whenUpdated" : NumberLong(1425017616062)
}
],
"strategies" : [
{
"_id" : "FF_Money_CF",
"description" : "This option helps you get a picture of how much you're spending",
"name" : "Your spending and savings.",
"relatedGoals" : [
{
"_id" : ObjectId("54784581e4b0a44213e36e2f")
},
{
"_id" : ObjectId("5478458ee4b0a44213e36e33")
},
{
"_id" : ObjectId("547845a5e4b0a44213e36e37")
},
{
"_id" : ObjectId("54784577e4b0a44213e36e2b")
},
{
"_id" : ObjectId("5478456ee4b0a44213e36e27")
}
],
"soaTrashWarning" : "Understanding what you are spending and saving is crucial to helping you achieve your goals. Without this in place, you may be spending more than you can afford. ",
"statuses" : [
{
"personId" : "54784388e4b0a44213e36d5d",
"status" : "selected",
"whenUpdated" : NumberLong(1425017616062)
},
{
"personId" : "54783da8e4b09cf5d82d4e11",
"status" : "selected",
"whenUpdated" : NumberLong(1425017616062)
}
],
"trashWarning" : "This option helps you get a picture of how much you're spending and how much you could save.\nAre you sure you don't want to take up this option now?\n\n",
"weight" : NumberInt(1)
},
更新 我对生成器函数做了一些更改,尽管我不确定它们是否真的改变了什么,而且我一直在调试器中逐行调试打印版本和产出版本。新代码如下。
def dic_recurse(data, fields, counter, source_field):
print 'Called'
if isinstance(data, dict):
for k, v in data.items():
if isinstance(v, list):
source_field += "_{0}".format(k)
[dic_recurse(l, fields, counter, source_field) for l in v]
elif isinstance(v, dict):
source_field += "_{0}".format(k)
dic_recurse(v, fields, counter, source_field)
elif k in fields and isinstance(v, list) is False and isinstance(v, dict) is False:
counter += 1
yield "L{0}_{1}_{2}".format(counter, source_field, k.replace('_', ''))[1:], v
elif isinstance(data, list):
for l in data:
dic_recurse(l, fields, counter, '')
两个版本在调试时的主要区别似乎是在命中这段代码时。
elif isinstance(data, list):
for l in data:
dic_recurse(l, fields, counter, '')
如果我正在测试 yield 版本,对 dic_recurse(l, fields, counter, '')
行的调用会被命中,但它似乎没有调用该函数,因为我在函数开头设置的任何打印语句都没有被命中,但是如果我使用 print 做同样的事情,那么当代码到达同一部分时,它会愉快地调用该函数并返回整个函数。
我确定我可能误解了有关生成器和 yield 语句使用的一些基本知识。
我只想 post 我更新的解决方案,以防它对其他人有用。
我需要向该函数添加额外的 yield 语句,以便可以将生成器函数的每次递归调用的结果传递给下一个函数,至少我是这样理解的。很高兴被纠正。
def dic_recurse(data, fields, counter, source_field):
if isinstance(data, dict):
counter += 1
for k, v in data.items():
if isinstance(v, list):
for field_data in v:
for list_field in dic_recurse(field_data, fields, counter, source_field):
yield list_field
elif isinstance(v, dict):
for dic_field in dic_recurse(v, fields, counter, source_field):
yield dic_field
elif k in fields and isinstance(v, list) is False and isinstance(v, dict) is False:
yield counter, {"{0}_L{1}".format(k, counter): v}
elif isinstance(data, list):
counter += 1
for list_item in data:
for li2 in dic_recurse(list_item, fields, counter, ''):
yield li2