Python JDBC 连接抛出 JVM 问题
Python JDBC Connection throwing JVM Issue
我正在尝试使用 Python jaydebeapi 连接到 Impala 数据库。当我调用连接 class 两次时,我遇到了 JVM 问题。请在下面找到我的连接 class 和 Sql_Query
Class.
Connection_Class:
import jaydebeapi
import jpype
import datetime
import ConfigParser
from fileinput import close
config = ConfigParser.RawConfigParser ( )
config.read ( 'ConfigFile.properties' )
def Impala_Connection(sql_query):
conn_impala = None
try:
jars_location = config.get ( 'Jars_info' , 'Jars_Location' )
args = "-Djava.class.path=%s" % jars_location
jvm_path = jpype.getDefaultJVMPath ( )
jpype.startJVM ( jvm_path , args )
except IOError as err:
print('An error occurred trying to read the file:{}".format(e)')
sql_query.close ( )
else:
try:
print "Start executing: " + sql_query + " at " + str (
datetime.datetime.now ( ).strftime ( "%Y-%m-%d %H:%M" ) ) + "\n"
url = config.get ( 'Jars_info' , 'Jdbc_Url' )
Jdbc_Driver_Class = config.get ( 'Jars_info' , 'Jdbc_Driver_Class' )
username = config.get ( 'Jars_info' , 'username' )
password = config.get ( 'Jars_info' , 'password' )
jdbc_jar_location = config.get ( 'Jars_info' , 'Jdbc_Jar_Location' )
conn_impala = jaydebeapi.connect ( Jdbc_Driver_Class , url , {username , password} , jdbc_jar_location )
curs = conn_impala.cursor ( )
sql_execution = curs.execute ( sql_query )
data = curs.fetchall ( sql_execution )
curs.close()
return (data)
except Exception, err:
print("Something went wrong with Impala Connection: {}".format(err))
finally:
close(conn_impala)
jpype.shutdownJVM()
Sql_Query_Class:
from pyspark import SparkConf, SparkContext
from com.my.common_funcitons.Impala_Query_Executor import Impala_Connection
import sys
conf = SparkConf().setAppName("pyspark")
sc = SparkContext(conf=conf)
tbl_name = sys.argv[1]
refid = sys.argv[2]
metadata_Query="SELECT * from Metadata_Table TABLE_NAME='%s' and TEMP.unique_id=%s" %(tbl_name,refid)
metadata_info=Impala_Connection(metadata_Query)
if len(metadata_info) == 0:
new_tbl_name = tbl_name+"_%"
metadata_Query="SELECT * from Metadata_Table TABLE_NAME='%s' and TEMP.unique_id=%s"" (new_tbl_name,refid)
metadata_info=Impala_Connection(metadata_Query)
for row in metadata_info:
metadata_no_of_columns=row[0]
metadata_table_id=row[1]
else:
for row in metadata_info:
metadata_no_of_columns=row[0]
metadata_table_id=row[1]
我有两种类型的 table 名称,如 table 和 table_000,所以如果我得到空数据,我需要添加 table_% 并要求 same.when 我正在从同一个 class 调用 Impala 连接 我遇到了 JVM 问题请在下面找到错误堆栈:
File "/usr/lib64/python2.7/site-packages/jpype/_core.py", line 50, in startJVM
_jpype.startup(jvm, tuple(args), True)
RuntimeError: Unable to start JVM at native/common/jp_env.cpp:78
我尝试在最终 class 中添加 JVMshutdown 但仍然面临 issue.Please 建议我一个解决方案。
使用 jpype.isJVMStarted() 函数我可以检查 jvm 是否启动我解决了我的问题。
当我试图解决这个问题时,我检查了在这个 jaydebeapi 连接之前我正在做一个到 HDFS 系统的 pyarrow 连接。我意识到这个问题中的顺序很重要,如果你以相反的方式进行连接(Jaydebeapi 然后是 pyarrow 连接)它就有效。
不知道这是否与您的问题有关,但这可以帮助缓解其他人的问题。
我正在尝试使用 Python jaydebeapi 连接到 Impala 数据库。当我调用连接 class 两次时,我遇到了 JVM 问题。请在下面找到我的连接 class 和 Sql_Query
Class.
Connection_Class:
import jaydebeapi
import jpype
import datetime
import ConfigParser
from fileinput import close
config = ConfigParser.RawConfigParser ( )
config.read ( 'ConfigFile.properties' )
def Impala_Connection(sql_query):
conn_impala = None
try:
jars_location = config.get ( 'Jars_info' , 'Jars_Location' )
args = "-Djava.class.path=%s" % jars_location
jvm_path = jpype.getDefaultJVMPath ( )
jpype.startJVM ( jvm_path , args )
except IOError as err:
print('An error occurred trying to read the file:{}".format(e)')
sql_query.close ( )
else:
try:
print "Start executing: " + sql_query + " at " + str (
datetime.datetime.now ( ).strftime ( "%Y-%m-%d %H:%M" ) ) + "\n"
url = config.get ( 'Jars_info' , 'Jdbc_Url' )
Jdbc_Driver_Class = config.get ( 'Jars_info' , 'Jdbc_Driver_Class' )
username = config.get ( 'Jars_info' , 'username' )
password = config.get ( 'Jars_info' , 'password' )
jdbc_jar_location = config.get ( 'Jars_info' , 'Jdbc_Jar_Location' )
conn_impala = jaydebeapi.connect ( Jdbc_Driver_Class , url , {username , password} , jdbc_jar_location )
curs = conn_impala.cursor ( )
sql_execution = curs.execute ( sql_query )
data = curs.fetchall ( sql_execution )
curs.close()
return (data)
except Exception, err:
print("Something went wrong with Impala Connection: {}".format(err))
finally:
close(conn_impala)
jpype.shutdownJVM()
Sql_Query_Class:
from pyspark import SparkConf, SparkContext
from com.my.common_funcitons.Impala_Query_Executor import Impala_Connection
import sys
conf = SparkConf().setAppName("pyspark")
sc = SparkContext(conf=conf)
tbl_name = sys.argv[1]
refid = sys.argv[2]
metadata_Query="SELECT * from Metadata_Table TABLE_NAME='%s' and TEMP.unique_id=%s" %(tbl_name,refid)
metadata_info=Impala_Connection(metadata_Query)
if len(metadata_info) == 0:
new_tbl_name = tbl_name+"_%"
metadata_Query="SELECT * from Metadata_Table TABLE_NAME='%s' and TEMP.unique_id=%s"" (new_tbl_name,refid)
metadata_info=Impala_Connection(metadata_Query)
for row in metadata_info:
metadata_no_of_columns=row[0]
metadata_table_id=row[1]
else:
for row in metadata_info:
metadata_no_of_columns=row[0]
metadata_table_id=row[1]
我有两种类型的 table 名称,如 table 和 table_000,所以如果我得到空数据,我需要添加 table_% 并要求 same.when 我正在从同一个 class 调用 Impala 连接 我遇到了 JVM 问题请在下面找到错误堆栈:
File "/usr/lib64/python2.7/site-packages/jpype/_core.py", line 50, in startJVM
_jpype.startup(jvm, tuple(args), True)
RuntimeError: Unable to start JVM at native/common/jp_env.cpp:78
我尝试在最终 class 中添加 JVMshutdown 但仍然面临 issue.Please 建议我一个解决方案。
使用 jpype.isJVMStarted() 函数我可以检查 jvm 是否启动我解决了我的问题。
当我试图解决这个问题时,我检查了在这个 jaydebeapi 连接之前我正在做一个到 HDFS 系统的 pyarrow 连接。我意识到这个问题中的顺序很重要,如果你以相反的方式进行连接(Jaydebeapi 然后是 pyarrow 连接)它就有效。
不知道这是否与您的问题有关,但这可以帮助缓解其他人的问题。