python - presto - 时间戳和 decimal(38,18) 作为字符串返回?
python - presto - timestamps and decimal(38,18) returned as strings?
为什么 presto timestamp/decimal(38,18) 数据类型返回一个字符串(包含在 u'' 中)而不是 python datetime/numeric 类型?
急速 jdbc:
select 来自 hive.x.y
的 typeof(col1),typeof(col2),typeof(col3),typeof(col4),typeof(col5),typeof(col6)
结果是
timestamp timestamp bigint decimal(38,18) varchar varchar
desc hive.x.y
#result is
for_dt timestamp NO NO NO NO 1
for_d timestamp NO NO NO NO 2
for_h bigint NO NO NO NO 3
value decimal(38,18) NO NO NO NO 4
metric varchar(2147483647) NO NO NO NO 5
lat_lon varchar(2147483647) NO NO NO NO 6
attempt 1
#python
from sqlalchemy.engine import create_engine
engine = create_engine('presto://u:p@host:port',connect_args={'protocol': 'https', 'requests_kwargs': {'verify': 'mypem'}})
result = engine.execute('select * from hive.x.y limit 1')
print(result.fetchall())
#result is
[(u'2010-02-18 03:00:00.000', u'2010-02-18 00:00:00.000', 3, u'-0.191912651062011660', u'hey', u'there')]
attempt 2
#python
from pyhive import presto
import requests
from requests.auth import HTTPBasicAuth
req_kw = {
'verify': 'mypem',
'auth': HTTPBasicAuth('u', 'p')
}
cursor = presto.connect(
host='host',
port=port,
protocol='https',
username='u',
requests_kwargs=req_kw,
).cursor()
query = '''select * from x.y limit 1'''
cursor.execute(query)
print cursor.fetchall()
#result is
[(u'2010-02-18 03:00:00.000', u'2010-02-18 00:00:00.000', 3, u'-0.191912651062011660', u'hey', u'there')]
您从 sql 查询中获得的输出来自该格式的数据库。
你有两个选择
- 自己映射数据(编写自己的 ORM)
- 学习使用 ORM
选项 1
请注意,我刚刚在此处硬编码了您的查询结果以供测试。
from sqlalchemy.engine import create_engine
from datetime import datetime
from decimal import Decimal
# 2010-02-18 03:00:00.000
dateTimeFormat = "%Y-%m-%d %H:%M:%S.%f"
class hivexy:
def __init__(self, for_dt, for_d, for_h, value, metric, lat_lon):
self.for_dt = for_dt
self.for_d = for_d
self.for_h = for_h
self.value = value
self.metric = metric
self.lat_lon = lat_lon
# Pretty Printing on print(hivexy)
def __str__(self):
baseString = ("for_dt: {}\n"
"for_d: {}\n"
"for_h: {}\n"
"value: {}\n"
"metric: {}\n"
"lat_lon: {}\n")
return baseString.format(for_dt, for_d, for_h, value, metric, lat_lon)
#engine = create_engine('presto://u:p@host:port',connect_args={'protocol': 'https', 'requests_kwargs': {'verify': 'mypem'}})
#results = engine.execute("select * from 'hive.x.y' limit 1")
results = [(u'2010-02-18 03:00:00.000', u'2010-02-18 00:00:00.000', 3, u'-0.191912651062011660', u'hey', u'there')]
hiveObjects = []
for row in results:
for_dt = datetime.strptime(row[0], dateTimeFormat)
for_d = datetime.strptime(row[1], dateTimeFormat)
for_h = row[2]
value = Decimal(row[3])
metric = row[4]
lat_lon = row[5]
hiveObjects.append(hivexy(for_dt, for_d, for_h, value, metric, lat_lon))
for hiveObject in hiveObjects:
print(hiveObject)
选项 2
这使用反射 - 它查询数据库元数据的字段类型,因此您不必在选项 1 中执行所有这些操作。
from sqlalchemy import *
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import *
engine = create_engine('presto://u:p@host:port',connect_args={'protocol': 'https', 'requests_kwargs': {'verify': 'mypem'}})
# Reflection - SQLAlchemy will get metadata from database including field types
hiveXYTable = Table('hive.x.y', MetaData(bind=engine), autoload=True)
s = select([hiveXYTable]).limit(1)
results = engine.execute(s)
for row in results:
print(row)
为什么 presto timestamp/decimal(38,18) 数据类型返回一个字符串(包含在 u'' 中)而不是 python datetime/numeric 类型?
急速 jdbc:
select 来自 hive.x.y
的 typeof(col1),typeof(col2),typeof(col3),typeof(col4),typeof(col5),typeof(col6)结果是
timestamp timestamp bigint decimal(38,18) varchar varchar
desc hive.x.y
#result is
for_dt timestamp NO NO NO NO 1
for_d timestamp NO NO NO NO 2
for_h bigint NO NO NO NO 3
value decimal(38,18) NO NO NO NO 4
metric varchar(2147483647) NO NO NO NO 5
lat_lon varchar(2147483647) NO NO NO NO 6
attempt 1
#python
from sqlalchemy.engine import create_engine
engine = create_engine('presto://u:p@host:port',connect_args={'protocol': 'https', 'requests_kwargs': {'verify': 'mypem'}})
result = engine.execute('select * from hive.x.y limit 1')
print(result.fetchall())
#result is
[(u'2010-02-18 03:00:00.000', u'2010-02-18 00:00:00.000', 3, u'-0.191912651062011660', u'hey', u'there')]
attempt 2
#python
from pyhive import presto
import requests
from requests.auth import HTTPBasicAuth
req_kw = {
'verify': 'mypem',
'auth': HTTPBasicAuth('u', 'p')
}
cursor = presto.connect(
host='host',
port=port,
protocol='https',
username='u',
requests_kwargs=req_kw,
).cursor()
query = '''select * from x.y limit 1'''
cursor.execute(query)
print cursor.fetchall()
#result is
[(u'2010-02-18 03:00:00.000', u'2010-02-18 00:00:00.000', 3, u'-0.191912651062011660', u'hey', u'there')]
您从 sql 查询中获得的输出来自该格式的数据库。
你有两个选择
- 自己映射数据(编写自己的 ORM)
- 学习使用 ORM
选项 1
请注意,我刚刚在此处硬编码了您的查询结果以供测试。
from sqlalchemy.engine import create_engine
from datetime import datetime
from decimal import Decimal
# 2010-02-18 03:00:00.000
dateTimeFormat = "%Y-%m-%d %H:%M:%S.%f"
class hivexy:
def __init__(self, for_dt, for_d, for_h, value, metric, lat_lon):
self.for_dt = for_dt
self.for_d = for_d
self.for_h = for_h
self.value = value
self.metric = metric
self.lat_lon = lat_lon
# Pretty Printing on print(hivexy)
def __str__(self):
baseString = ("for_dt: {}\n"
"for_d: {}\n"
"for_h: {}\n"
"value: {}\n"
"metric: {}\n"
"lat_lon: {}\n")
return baseString.format(for_dt, for_d, for_h, value, metric, lat_lon)
#engine = create_engine('presto://u:p@host:port',connect_args={'protocol': 'https', 'requests_kwargs': {'verify': 'mypem'}})
#results = engine.execute("select * from 'hive.x.y' limit 1")
results = [(u'2010-02-18 03:00:00.000', u'2010-02-18 00:00:00.000', 3, u'-0.191912651062011660', u'hey', u'there')]
hiveObjects = []
for row in results:
for_dt = datetime.strptime(row[0], dateTimeFormat)
for_d = datetime.strptime(row[1], dateTimeFormat)
for_h = row[2]
value = Decimal(row[3])
metric = row[4]
lat_lon = row[5]
hiveObjects.append(hivexy(for_dt, for_d, for_h, value, metric, lat_lon))
for hiveObject in hiveObjects:
print(hiveObject)
选项 2 这使用反射 - 它查询数据库元数据的字段类型,因此您不必在选项 1 中执行所有这些操作。
from sqlalchemy import *
from sqlalchemy.engine import create_engine
from sqlalchemy.schema import *
engine = create_engine('presto://u:p@host:port',connect_args={'protocol': 'https', 'requests_kwargs': {'verify': 'mypem'}})
# Reflection - SQLAlchemy will get metadata from database including field types
hiveXYTable = Table('hive.x.y', MetaData(bind=engine), autoload=True)
s = select([hiveXYTable]).limit(1)
results = engine.execute(s)
for row in results:
print(row)