在 elasticsearch-py 中索引特殊 JSON 文件?
Index special JSON file in elasticsearch-py?
我正在关注这个答案 但是我的 JSON 文件仍然只被识别为一个文档。
ElasticSearch 为每个条目创建单个字段,如在 Kibana 中看到的“0.created_at”。
我的 JSON 文件看起来有点不同。这可能是个问题吗?
我对此很陌生...有人可以帮忙吗?
我的代码:
import requests, json, os, sys
from elasticsearch import Elasticsearch, helpers
es = Elasticsearch([{'host': 'localhost', 'port': '9200'}])
directory = '.'
def load_json(directory):
" Use a generator, no need to load all in memory"
for filename in os.listdir(directory):
if filename.endswith('mini.json'):
with open(filename, 'r') as open_file:
yield json.load(open_file)
helpers.bulk(es, load_json('.'), index='my-index', doc_type='my-type')
res = requests.get('http://localhost:9200')
print(res.content)
我的 JSON 文件
{
"0": {
"created_at": "Sat May 09 23:57:13 +0000 2020",
"id_str": "1259271234912522240",
"text": "@VitalVegas Not sure if you\u2019ve seen this, but seems relevant\u2014,
"in_reply_to_status_id_str": "1259203897538654208",
"in_reply_to_user_id_str": "514487309",
"in_reply_to_screen_name": "VitalVegas",
"retweet_count": 0,
"favorite_count": 0,
"lang": "en",
"quoted_status_id_str": "1259178497320185856",
"hashtags": [],
"user_mentions": [
"514487309"
],
"user_id": "23156407",
"screen_name": "azzabazazz",
"followers_count": 321,
"friends_count": 846,
"favourites_count": 6292,
"statuses_count": 2895
},
"1": {
"created_at": "Sat May 09 23:57:14 +0000 2020",
"id_str": "1259271241430695937",
"text": "RT @GABI6here: #SaturdayThoughts WAKE-UP REMEMBER: Spread word on HouseBill to allow entering our house to test for #coronavirus >SO ban\ud83d\uded1 \ud83d\udc41\u2026",
"in_reply_to_status_id_str": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"retweet_count": 30,
"favorite_count": 0,
"lang": "en",
"quoted_status_id_str": "1259242130062618624",
"hashtags": [
"SaturdayThoughts",
"coronavirus"
],
"user_mentions": [
"1229162888478150657"
],
"user_id": "756894930270904320",
"screen_name": "Lily4ever3",
"followers_count": 23862,
"friends_count": 23293,
"favourites_count": 12527,
"statuses_count": 36183
}
}
import requests, json, os, sys
from elasticsearch import Elasticsearch, helpers
es = Elasticsearch([{'host': 'localhost', 'port': '9200'}])
directory = '.'
def load_json(directory):
data = []
for filename in os.listdir(directory):
if filename.endswith('mini.json'):
with open(filename, 'r') as open_file:
json_data = json.load(open_file)
for i,k in enumerate(json_data.keys()):
data.append({
"_index": "my-index",
"_type": "my-type",
"_id": i,
"_source": json_data[k]
})
return data
helpers.bulk(es, load_json('.'))
res = requests.get('http://localhost:9200')
print(res.content)
我正在关注这个答案
我的 JSON 文件看起来有点不同。这可能是个问题吗? 我对此很陌生...有人可以帮忙吗?
我的代码:
import requests, json, os, sys
from elasticsearch import Elasticsearch, helpers
es = Elasticsearch([{'host': 'localhost', 'port': '9200'}])
directory = '.'
def load_json(directory):
" Use a generator, no need to load all in memory"
for filename in os.listdir(directory):
if filename.endswith('mini.json'):
with open(filename, 'r') as open_file:
yield json.load(open_file)
helpers.bulk(es, load_json('.'), index='my-index', doc_type='my-type')
res = requests.get('http://localhost:9200')
print(res.content)
我的 JSON 文件
{
"0": {
"created_at": "Sat May 09 23:57:13 +0000 2020",
"id_str": "1259271234912522240",
"text": "@VitalVegas Not sure if you\u2019ve seen this, but seems relevant\u2014,
"in_reply_to_status_id_str": "1259203897538654208",
"in_reply_to_user_id_str": "514487309",
"in_reply_to_screen_name": "VitalVegas",
"retweet_count": 0,
"favorite_count": 0,
"lang": "en",
"quoted_status_id_str": "1259178497320185856",
"hashtags": [],
"user_mentions": [
"514487309"
],
"user_id": "23156407",
"screen_name": "azzabazazz",
"followers_count": 321,
"friends_count": 846,
"favourites_count": 6292,
"statuses_count": 2895
},
"1": {
"created_at": "Sat May 09 23:57:14 +0000 2020",
"id_str": "1259271241430695937",
"text": "RT @GABI6here: #SaturdayThoughts WAKE-UP REMEMBER: Spread word on HouseBill to allow entering our house to test for #coronavirus >SO ban\ud83d\uded1 \ud83d\udc41\u2026",
"in_reply_to_status_id_str": null,
"in_reply_to_user_id_str": null,
"in_reply_to_screen_name": null,
"retweet_count": 30,
"favorite_count": 0,
"lang": "en",
"quoted_status_id_str": "1259242130062618624",
"hashtags": [
"SaturdayThoughts",
"coronavirus"
],
"user_mentions": [
"1229162888478150657"
],
"user_id": "756894930270904320",
"screen_name": "Lily4ever3",
"followers_count": 23862,
"friends_count": 23293,
"favourites_count": 12527,
"statuses_count": 36183
}
}
import requests, json, os, sys
from elasticsearch import Elasticsearch, helpers
es = Elasticsearch([{'host': 'localhost', 'port': '9200'}])
directory = '.'
def load_json(directory):
data = []
for filename in os.listdir(directory):
if filename.endswith('mini.json'):
with open(filename, 'r') as open_file:
json_data = json.load(open_file)
for i,k in enumerate(json_data.keys()):
data.append({
"_index": "my-index",
"_type": "my-type",
"_id": i,
"_source": json_data[k]
})
return data
helpers.bulk(es, load_json('.'))
res = requests.get('http://localhost:9200')
print(res.content)