如何使用 Intake 打开 json 文件?
How to open a json file with Intake?
我正在尝试使用 intake
为 JSON 文件创建数据目录。 #197 提到“本质上,您需要提供 reader 函数 json.loads
,如果您的每个文件都是单个 JSON 块,其计算结果为对象列表。”。 =22=]
我创建了一个test.json
{
"test": "test"
}
并(复制 Data Engineering with Intake)尝试了
import json
import intake
source = intake.open_textfiles("test.json", decoder=json.loads)
print(source.yaml())
将输出保存到 source.yaml
sources:
textfiles:
args:
decoder: !!python/name:json.loads ''
urlpath: test.json
description: ''
driver: intake.source.textfiles.TextFilesSource
metadata: {}
并尝试打开它
cat = intake.open_catalog('source.yaml')
产生了:
---------------------------------------------------------------------------
ConstructorError Traceback (most recent call last)
<ipython-input-55-9b8e3a51ebc2> in <module>()
----> 1 cat = intake.open_catalog('source.yaml')
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/__init__.py in open_catalog(uri, **kwargs)
160 raise ValueError('Unknown catalog driver (%s), supply one of: %s'
161 % (driver, list(sorted(registry))))
--> 162 return registry[driver](uri, **kwargs)
163
164
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/catalog/local.py in __init__(self, path, autoreload, **kwargs)
550 self.autoreload = autoreload # set this to False if don't want reloads
551 self.filesystem = kwargs.pop('fs', None)
--> 552 super(YAMLFileCatalog, self).__init__(**kwargs)
553
554 def _load(self, reload=False):
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/catalog/base.py in __init__(self, name, description, metadata, auth, ttl, getenv, getshell, persist_mode, storage_options, *args)
111 self.updated = time.time()
112 self._entries = self._make_entries_container()
--> 113 self.force_reload()
114
115 @classmethod
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/catalog/base.py in force_reload(self)
168 def force_reload(self):
169 """Imperative reload data now"""
--> 170 self._load()
171 self.updated = time.time()
172
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/catalog/local.py in _load(self, reload)
580 logger.warning("Use of '!template' deprecated - fixing")
581 text = text.replace('!template ', '')
--> 582 self.parse(text)
583
584 def add(self, source, name=None, path=None, storage_options=None):
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/catalog/local.py in parse(self, text)
649 """
650 self.text = text
--> 651 data = yaml_load(self.text)
652
653 if data is None:
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/utils.py in yaml_load(stream)
73 """Parse YAML in a context where duplicate keys raise exception"""
74 with no_duplicate_yaml():
---> 75 return yaml.safe_load(stream)
76
77
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/__init__.py in safe_load(stream)
160 to be safe for untrusted input.
161 """
--> 162 return load(stream, SafeLoader)
163
164 def safe_load_all(stream):
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/__init__.py in load(stream, Loader)
112 loader = Loader(stream)
113 try:
--> 114 return loader.get_single_data()
115 finally:
116 loader.dispose()
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in get_single_data(self)
49 node = self.get_single_node()
50 if node is not None:
---> 51 return self.construct_document(node)
52 return None
53
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_document(self, node)
53
54 def construct_document(self, node):
---> 55 data = self.construct_object(node)
56 while self.state_generators:
57 state_generators = self.state_generators
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_object(self, node, deep)
98 constructor = self.__class__.construct_mapping
99 if tag_suffix is None:
--> 100 data = constructor(self, node)
101 else:
102 data = constructor(self, tag_suffix, node)
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/utils.py in no_duplicates_constructor(loader, node, deep)
30 for key_node, value_node in node.value:
31 key = loader.construct_object(key_node, deep=deep)
---> 32 value = loader.construct_object(value_node, deep=deep)
33 if key in mapping:
34 from intake.catalog.exceptions import DuplicateKeyError
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_object(self, node, deep)
98 constructor = self.__class__.construct_mapping
99 if tag_suffix is None:
--> 100 data = constructor(self, node)
101 else:
102 data = constructor(self, tag_suffix, node)
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/utils.py in no_duplicates_constructor(loader, node, deep)
30 for key_node, value_node in node.value:
31 key = loader.construct_object(key_node, deep=deep)
---> 32 value = loader.construct_object(value_node, deep=deep)
33 if key in mapping:
34 from intake.catalog.exceptions import DuplicateKeyError
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_object(self, node, deep)
98 constructor = self.__class__.construct_mapping
99 if tag_suffix is None:
--> 100 data = constructor(self, node)
101 else:
102 data = constructor(self, tag_suffix, node)
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/utils.py in no_duplicates_constructor(loader, node, deep)
30 for key_node, value_node in node.value:
31 key = loader.construct_object(key_node, deep=deep)
---> 32 value = loader.construct_object(value_node, deep=deep)
33 if key in mapping:
34 from intake.catalog.exceptions import DuplicateKeyError
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_object(self, node, deep)
98 constructor = self.__class__.construct_mapping
99 if tag_suffix is None:
--> 100 data = constructor(self, node)
101 else:
102 data = constructor(self, tag_suffix, node)
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/utils.py in no_duplicates_constructor(loader, node, deep)
30 for key_node, value_node in node.value:
31 key = loader.construct_object(key_node, deep=deep)
---> 32 value = loader.construct_object(value_node, deep=deep)
33 if key in mapping:
34 from intake.catalog.exceptions import DuplicateKeyError
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_object(self, node, deep)
98 constructor = self.__class__.construct_mapping
99 if tag_suffix is None:
--> 100 data = constructor(self, node)
101 else:
102 data = constructor(self, tag_suffix, node)
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_undefined(self, node)
427 raise ConstructorError(None, None,
428 "could not determine a constructor for the tag %r" % node.tag,
--> 429 node.start_mark)
430
431 SafeConstructor.add_constructor(
ConstructorError: could not determine a constructor for the tag 'tag:yaml.org,2002:python/name:json.loads'
in "<unicode string>", line 4, column 16:
decoder: !!python/name:json.loads ''
我不确定如何解决这个问题,非常感谢任何提示!
不幸的是,该示例已过时。原因是,我们决定应该通过安全模式加载 YAML 文件,这样就不会执行包含对 python 对象的引用。这意味着您始终可以加载未知目录并对其进行检查,然后再决定是否要访问其任何内容并可能执行代码。
就目前情况而言,您的解决方法是将数据存入内存,然后手动解码,例如,
cat = intake.open_catalog('source.yaml')
cat.textfiles.to_dask.map(json.loads). # via dask
[json.loads(obj) for obj in cat.textfiles.read()] # straight python
您可以向 Intake 提出问题,建议应该能够使用函数的完全限定名称(“json.loads”在文本文件中定义 post-load 处理步骤在这种情况下)而不是函数对象。
此外,我们计划在 Intake 中引入一个“派生数据源”,它将更普遍地将函数和参数应用于任何其他数据源,并且也可以用于您的案例 - 但目前还没有已实施。
我正在尝试使用 intake
为 JSON 文件创建数据目录。 #197 提到“本质上,您需要提供 reader 函数 json.loads
,如果您的每个文件都是单个 JSON 块,其计算结果为对象列表。”。 =22=]
我创建了一个test.json
{
"test": "test"
}
并(复制 Data Engineering with Intake)尝试了
import json
import intake
source = intake.open_textfiles("test.json", decoder=json.loads)
print(source.yaml())
将输出保存到 source.yaml
sources:
textfiles:
args:
decoder: !!python/name:json.loads ''
urlpath: test.json
description: ''
driver: intake.source.textfiles.TextFilesSource
metadata: {}
并尝试打开它
cat = intake.open_catalog('source.yaml')
产生了:
---------------------------------------------------------------------------
ConstructorError Traceback (most recent call last)
<ipython-input-55-9b8e3a51ebc2> in <module>()
----> 1 cat = intake.open_catalog('source.yaml')
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/__init__.py in open_catalog(uri, **kwargs)
160 raise ValueError('Unknown catalog driver (%s), supply one of: %s'
161 % (driver, list(sorted(registry))))
--> 162 return registry[driver](uri, **kwargs)
163
164
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/catalog/local.py in __init__(self, path, autoreload, **kwargs)
550 self.autoreload = autoreload # set this to False if don't want reloads
551 self.filesystem = kwargs.pop('fs', None)
--> 552 super(YAMLFileCatalog, self).__init__(**kwargs)
553
554 def _load(self, reload=False):
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/catalog/base.py in __init__(self, name, description, metadata, auth, ttl, getenv, getshell, persist_mode, storage_options, *args)
111 self.updated = time.time()
112 self._entries = self._make_entries_container()
--> 113 self.force_reload()
114
115 @classmethod
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/catalog/base.py in force_reload(self)
168 def force_reload(self):
169 """Imperative reload data now"""
--> 170 self._load()
171 self.updated = time.time()
172
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/catalog/local.py in _load(self, reload)
580 logger.warning("Use of '!template' deprecated - fixing")
581 text = text.replace('!template ', '')
--> 582 self.parse(text)
583
584 def add(self, source, name=None, path=None, storage_options=None):
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/catalog/local.py in parse(self, text)
649 """
650 self.text = text
--> 651 data = yaml_load(self.text)
652
653 if data is None:
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/utils.py in yaml_load(stream)
73 """Parse YAML in a context where duplicate keys raise exception"""
74 with no_duplicate_yaml():
---> 75 return yaml.safe_load(stream)
76
77
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/__init__.py in safe_load(stream)
160 to be safe for untrusted input.
161 """
--> 162 return load(stream, SafeLoader)
163
164 def safe_load_all(stream):
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/__init__.py in load(stream, Loader)
112 loader = Loader(stream)
113 try:
--> 114 return loader.get_single_data()
115 finally:
116 loader.dispose()
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in get_single_data(self)
49 node = self.get_single_node()
50 if node is not None:
---> 51 return self.construct_document(node)
52 return None
53
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_document(self, node)
53
54 def construct_document(self, node):
---> 55 data = self.construct_object(node)
56 while self.state_generators:
57 state_generators = self.state_generators
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_object(self, node, deep)
98 constructor = self.__class__.construct_mapping
99 if tag_suffix is None:
--> 100 data = constructor(self, node)
101 else:
102 data = constructor(self, tag_suffix, node)
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/utils.py in no_duplicates_constructor(loader, node, deep)
30 for key_node, value_node in node.value:
31 key = loader.construct_object(key_node, deep=deep)
---> 32 value = loader.construct_object(value_node, deep=deep)
33 if key in mapping:
34 from intake.catalog.exceptions import DuplicateKeyError
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_object(self, node, deep)
98 constructor = self.__class__.construct_mapping
99 if tag_suffix is None:
--> 100 data = constructor(self, node)
101 else:
102 data = constructor(self, tag_suffix, node)
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/utils.py in no_duplicates_constructor(loader, node, deep)
30 for key_node, value_node in node.value:
31 key = loader.construct_object(key_node, deep=deep)
---> 32 value = loader.construct_object(value_node, deep=deep)
33 if key in mapping:
34 from intake.catalog.exceptions import DuplicateKeyError
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_object(self, node, deep)
98 constructor = self.__class__.construct_mapping
99 if tag_suffix is None:
--> 100 data = constructor(self, node)
101 else:
102 data = constructor(self, tag_suffix, node)
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/utils.py in no_duplicates_constructor(loader, node, deep)
30 for key_node, value_node in node.value:
31 key = loader.construct_object(key_node, deep=deep)
---> 32 value = loader.construct_object(value_node, deep=deep)
33 if key in mapping:
34 from intake.catalog.exceptions import DuplicateKeyError
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_object(self, node, deep)
98 constructor = self.__class__.construct_mapping
99 if tag_suffix is None:
--> 100 data = constructor(self, node)
101 else:
102 data = constructor(self, tag_suffix, node)
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/intake/utils.py in no_duplicates_constructor(loader, node, deep)
30 for key_node, value_node in node.value:
31 key = loader.construct_object(key_node, deep=deep)
---> 32 value = loader.construct_object(value_node, deep=deep)
33 if key in mapping:
34 from intake.catalog.exceptions import DuplicateKeyError
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_object(self, node, deep)
98 constructor = self.__class__.construct_mapping
99 if tag_suffix is None:
--> 100 data = constructor(self, node)
101 else:
102 data = constructor(self, tag_suffix, node)
/home/wsl-rowanm/miniconda3/envs/ireland-smartmeterdata/lib/python3.7/site-packages/yaml/constructor.py in construct_undefined(self, node)
427 raise ConstructorError(None, None,
428 "could not determine a constructor for the tag %r" % node.tag,
--> 429 node.start_mark)
430
431 SafeConstructor.add_constructor(
ConstructorError: could not determine a constructor for the tag 'tag:yaml.org,2002:python/name:json.loads'
in "<unicode string>", line 4, column 16:
decoder: !!python/name:json.loads ''
我不确定如何解决这个问题,非常感谢任何提示!
不幸的是,该示例已过时。原因是,我们决定应该通过安全模式加载 YAML 文件,这样就不会执行包含对 python 对象的引用。这意味着您始终可以加载未知目录并对其进行检查,然后再决定是否要访问其任何内容并可能执行代码。
就目前情况而言,您的解决方法是将数据存入内存,然后手动解码,例如,
cat = intake.open_catalog('source.yaml')
cat.textfiles.to_dask.map(json.loads). # via dask
[json.loads(obj) for obj in cat.textfiles.read()] # straight python
您可以向 Intake 提出问题,建议应该能够使用函数的完全限定名称(“json.loads”在文本文件中定义 post-load 处理步骤在这种情况下)而不是函数对象。
此外,我们计划在 Intake 中引入一个“派生数据源”,它将更普遍地将函数和参数应用于任何其他数据源,并且也可以用于您的案例 - 但目前还没有已实施。