多嵌套 Json 到 Python 中的扁平 Json

Multi Nested Json to Flat Json in Python

我正在尝试将多嵌套 JSON 转换为平面,通过使用 panda 中的内置函数,以及堆栈上可用的一些解决方案,能够正常化到第一级或展平创建不需要的结果或修改它们没有提供所需的输出、任何见解,欢迎。

样本JSON:


{
  "Records": [
    {
      "Name": "Student1",
      "Result": "Pass",
      "Marks": [
        {
          "Sub1": "50",
          "Sub2": "40",
          "YOP": [
            {
              "prim": "2010",
              "sch": "abc"
            },
            {
              "prim": "2010",
              "sch": "abc"
            }
          ]
        }
      ]
    },
    {
      "Name": "Stu2",
      "Result": "Pass",
      "Marks": [
        {
          "Sub1": "33",
          "Sub2": "33",
          "YOP": [
            {
              "prim": "2010",
              "sch": "def"
            },
            {
              "high": "2010",
              "sch": "abc"
            }
          ]
        }
      ]
    }
  ]
}

当前代码

from itertools import chain, starmap
import json
from itertools import islice
from pandas.io.json import json_normalize



from collections import MutableMapping
crumbs = True

def flatten(dictionary, parent_key=False, separator='.'):
    """
    Turn a nested dictionary into a flattened dictionary
    :param dictionary: The dictionary to flatten
    :param parent_key: The string to prepend to dictionary's keys
    :param separator: The string used to separate flattened keys
    :return: A flattened dictionary
    """

    items = []
    for key, value in dictionary.items():
        if crumbs: print('checking:',key)
        new_key = str(parent_key) + separator + key if parent_key else key
        if isinstance(value, MutableMapping):
            if crumbs: print(new_key,': dict found')
            if not value.items():
                if crumbs: print('Adding key-value pair:',new_key,None)
                items.append((new_key,None))
            else:
                items.extend(flatten(value, new_key, separator).items())
        elif isinstance(value, list):
            if crumbs: print(new_key,': list found')
            if len(value):
                for k, v in enumerate(value):
                    items.extend(flatten({str(k): v}, new_key).items())
            else:
                if crumbs: print('Adding key-value pair:',new_key,None)
                items.append((new_key,None))
        else:
            if crumbs: print('Adding key-value pair:',new_key,value)
            items.append((new_key, value))
    return dict(items)

    
def main():
  with open("aaa.json", "r") as f:
    data = json.loads(f.read())
  print(type(data))
  flat = flatten(data)
  print(flat)


if __name__ == '__main__':
    main()

输出

{
  "Records.0.Name": "Student1",
  "Records.0.Result": "Pass",
  "Records.0.Marks.0.Sub1": "50",
  "Records.0.Marks.0.Sub2": "40",
  "Records.0.Marks.0.YOP.0.prim": "2010",
  "Records.0.Marks.0.YOP.0.sch": "abc",
  "Records.0.Marks.0.YOP.1.high": "2012",
  "Records.0.Marks.0.YOP.1.sch": "abc",
  "Records.1.Name": "Stu2",
  "Records.1.Result": "Pass",
  "Records.1.Marks.0.Sub1": "33",
  "Records.1.Marks.0.Sub2": "33",
  "Records.1.Marks.0.YOP.0.prim": "210",
  "Records.1.Marks.0.YOP.0.sch": "def",
  "Records.1.Marks.0.YOP.1.high": "999",
  "Records.1.Marks.0.YOP.1.sch": "abc"
}

有了这段代码,知道如何删除数字并将其分开

结束期望

{
  "Records.Name": "Student1",
  "Records.Result": "Pass",
  "Records.Marks.Sub1": "50",
  "Records.Marks.Sub2": "40",
  "Records.Marks.YOP.prim": "2010",
  "Records.Marks.YOP.sch": "abc",
  "Records.Marks.YOP.high": "2012",
  "Records.Marks.YOP.sch": "abc",
},

{
  "Records.Name": "Stu2",
  "Records.Result": "Pass",
  "Records.Marks.Sub1": "33",
  "Records.Marks.Sub2": "33",
  "Records.Marks.YOP.prim": "210",
  "Records.Marks.YOP.sch": "def",
  "Records.Marks.YOP.high": "999",
  "Records.Marks.YOP.sch": "abc"
}

from flatten_json import flatten

records = flatten(json[0])

输入:

json = [{
  "Records": [
    {
      "Name": "Student1",
      "Result": "Pass",
      "Marks": [
        {
          "Sub1": "50",
          "Sub2": "40",
          "YOP": [
            {
              "prim": "2010",
              "sch": "abc"
            },
            {
              "prim": "2010",
              "sch": "abc"
            }
          ]
        }
      ]
    },
    {
      "Name": "Stu2",
      "Result": "Pass",
      "Marks": [
        {
          "Sub1": "33",
          "Sub2": "33",
          "YOP": [
            {
              "prim": "2010",
              "sch": "def"
            },
            {
              "high": "2010",
              "sch": "abc"
            }
          ]
        }
      ]
    }
  ]
}]

输出:

{'Records_0_Name': 'Student1',
 'Records_0_Result': 'Pass',
 'Records_0_Marks_0_Sub1': '50',
 'Records_0_Marks_0_Sub2': '40',
 'Records_0_Marks_0_YOP_0_prim': '2010',
 'Records_0_Marks_0_YOP_0_sch': 'abc',
 'Records_0_Marks_0_YOP_1_prim': '2010',
 'Records_0_Marks_0_YOP_1_sch': 'abc',
 'Records_1_Name': 'Stu2',
 'Records_1_Result': 'Pass',
 'Records_1_Marks_0_Sub1': '33',
 'Records_1_Marks_0_Sub2': '33',
 'Records_1_Marks_0_YOP_0_prim': '2010',
 'Records_1_Marks_0_YOP_0_sch': 'def',
 'Records_1_Marks_0_YOP_1_high': '2010',
 'Records_1_Marks_0_YOP_1_sch': 'abc'}

更新:

您要查找的结果:

records = [flatten(record, “.”) for record in json[0]['Records']]

输出:

[{'Name': 'Student1',
  'Result': 'Pass',
  'Marks_0_Sub1': '50',
  'Marks_0_Sub2': '40',
  'Marks_0_YOP_0_prim': '2010',
  'Marks_0_YOP_0_sch': 'abc',
  'Marks_0_YOP_1_prim': '2010',
  'Marks_0_YOP_1_sch': 'abc'},
 {'Name': 'Stu2',
  'Result': 'Pass',
  'Marks_0_Sub1': '33',
  'Marks_0_Sub2': '33',
  'Marks_0_YOP_0_prim': '2010',
  'Marks_0_YOP_0_sch': 'def',
  'Marks_0_YOP_1_high': '2010',
  'Marks_0_YOP_1_sch': 'abc'}]