Python Cassandra 无法完成对任何主机的操作
Python Cassandra Unable to complet the operation agains any host
我正在尝试 运行 使用 Python 在 Apache Cassandra 上插入一些查询。我想从 json 文件中插入数据,这是我的代码:
import logging
from cassandra.cluster import Cluster
import json
logging.basicConfig(level=logging.INFO)
def connect_db():
"""Func to connect to cassandra db"""
cluster = Cluster(['127.0.0.1'], port=9042)
session = cluster.connect()
# session.execute("DROP TABLE player_session.events")
# session.execute("DROP TABLE player_session.startevents ")
# session.execute("DROP TABLE player_session.endevents ")
return session
def execute_query():
"""Func to execute query in cassandra """
session = connect_db()
print("Creating KEYSPACE")
session.execute("""
CREATE KEYSPACE IF NOT EXISTS player_session
WITH REPLICATION =
{ 'class' : 'NetworkTopologyStrategy', 'data_center' : 1 }
""")
print("Creating player_session table")
session.execute("""
CREATE TABLE IF NOT EXISTS
player_session.events(player_id text, country text, event text, session_id text,ts timestamp,
PRIMARY KEY(player_id, ts)) WITH CLUSTERING ORDER BY ("ts" DESC)
""")
print("Creating start session table")
session.execute("""
CREATE TABLE IF NOT EXISTS
player_session.startevents(player_id text, country text, event text, session_id text,ts timestamp,
PRIMARY KEY(player_id, ts)) WITH CLUSTERING ORDER BY ("ts" DESC)
""")
print("Creating end session table")
session.execute("""
CREATE TABLE IF NOT EXISTS
player_session.endevents(player_id text, country text, event text, session_id text,ts timestamp,
PRIMARY KEY(player_id, ts)) WITH CLUSTERING ORDER BY ("ts" DESC)
""")
return session
def insert_data(session):
"""Func to insert json data """
with open('my_json.jsonl') as f:
data = f.readlines()
for row in data:
row = json.loads(row)
if row['event'] == "start":
session.execute(
"INSERT INTO player_session.startevents (player_id, event, country, session_id, ts) VALUES (%s,%s,%s,%s,%s) ",
[row['player_id'], row['event'], row['country'], row['session_id'], row['ts']]
)
if row['event'] == "end":
session.execute(
"INSERT INTO player_session.endevents (player_id, event, session_id, ts) VALUES (%s,%s,%s,%s) ",
[row['player_id'], row['event'], row['session_id'], row['ts']]
)
f.close()
print("data import complete")
if __name__ == "__main__":
session = connect_db()
insert_data(session)
我的表是在 Cassandra 中创建的,但我总是会收到此错误:
Traceback (most recent call last):
line 64, in insert_data
session.execute(
File "cassandra/cluster.py", line 2618, in cassandra.cluster.Session.execute
File "cassandra/cluster.py", line 4894, in cassandra.cluster.ResponseFuture.result
cassandra.cluster.NoHostAvailable: ('Unable to complete the operation against any hosts', {<Host: 127.0.0.1:9042 datacenter1>: Unavailable('Error from server: code=1000 [Unavailable exception] message="Cannot achieve consistency level LOCAL_ONE" info={\'consistency\': \'LOCAL_ONE\', \'required_replicas\': 1, \'alive_replicas\': 0}')})
错误消息提示了两种可能性:
Cassandra 不是 运行。您可以使用 nodetool status
.
来验证这一点
您用于连接的数据中心名称在键空间定义或集群配置中不正确。数据中心的默认名称是 dc1
。无论将其设置为什么,它都必须与 nodetool status
、describe keyspace player_session
中显示的数据中心名称以及连接属性中指定的数据中心名称(可选)相匹配。
我正在尝试 运行 使用 Python 在 Apache Cassandra 上插入一些查询。我想从 json 文件中插入数据,这是我的代码:
import logging
from cassandra.cluster import Cluster
import json
logging.basicConfig(level=logging.INFO)
def connect_db():
"""Func to connect to cassandra db"""
cluster = Cluster(['127.0.0.1'], port=9042)
session = cluster.connect()
# session.execute("DROP TABLE player_session.events")
# session.execute("DROP TABLE player_session.startevents ")
# session.execute("DROP TABLE player_session.endevents ")
return session
def execute_query():
"""Func to execute query in cassandra """
session = connect_db()
print("Creating KEYSPACE")
session.execute("""
CREATE KEYSPACE IF NOT EXISTS player_session
WITH REPLICATION =
{ 'class' : 'NetworkTopologyStrategy', 'data_center' : 1 }
""")
print("Creating player_session table")
session.execute("""
CREATE TABLE IF NOT EXISTS
player_session.events(player_id text, country text, event text, session_id text,ts timestamp,
PRIMARY KEY(player_id, ts)) WITH CLUSTERING ORDER BY ("ts" DESC)
""")
print("Creating start session table")
session.execute("""
CREATE TABLE IF NOT EXISTS
player_session.startevents(player_id text, country text, event text, session_id text,ts timestamp,
PRIMARY KEY(player_id, ts)) WITH CLUSTERING ORDER BY ("ts" DESC)
""")
print("Creating end session table")
session.execute("""
CREATE TABLE IF NOT EXISTS
player_session.endevents(player_id text, country text, event text, session_id text,ts timestamp,
PRIMARY KEY(player_id, ts)) WITH CLUSTERING ORDER BY ("ts" DESC)
""")
return session
def insert_data(session):
"""Func to insert json data """
with open('my_json.jsonl') as f:
data = f.readlines()
for row in data:
row = json.loads(row)
if row['event'] == "start":
session.execute(
"INSERT INTO player_session.startevents (player_id, event, country, session_id, ts) VALUES (%s,%s,%s,%s,%s) ",
[row['player_id'], row['event'], row['country'], row['session_id'], row['ts']]
)
if row['event'] == "end":
session.execute(
"INSERT INTO player_session.endevents (player_id, event, session_id, ts) VALUES (%s,%s,%s,%s) ",
[row['player_id'], row['event'], row['session_id'], row['ts']]
)
f.close()
print("data import complete")
if __name__ == "__main__":
session = connect_db()
insert_data(session)
我的表是在 Cassandra 中创建的,但我总是会收到此错误:
Traceback (most recent call last):
line 64, in insert_data
session.execute(
File "cassandra/cluster.py", line 2618, in cassandra.cluster.Session.execute
File "cassandra/cluster.py", line 4894, in cassandra.cluster.ResponseFuture.result
cassandra.cluster.NoHostAvailable: ('Unable to complete the operation against any hosts', {<Host: 127.0.0.1:9042 datacenter1>: Unavailable('Error from server: code=1000 [Unavailable exception] message="Cannot achieve consistency level LOCAL_ONE" info={\'consistency\': \'LOCAL_ONE\', \'required_replicas\': 1, \'alive_replicas\': 0}')})
错误消息提示了两种可能性:
Cassandra 不是 运行。您可以使用
来验证这一点nodetool status
.您用于连接的数据中心名称在键空间定义或集群配置中不正确。数据中心的默认名称是
dc1
。无论将其设置为什么,它都必须与nodetool status
、describe keyspace player_session
中显示的数据中心名称以及连接属性中指定的数据中心名称(可选)相匹配。