如何从 KDB 获取 table 或查看元数据并保存到数据结构?
How to fetch the table or view metadata from KDB and save to a data-structure?
我一直在尝试使用 python 从 KDB+ 数据库中获取元数据,基本上,我安装了一个名为 qpython
的库并使用了这个库我们连接并查询 KDB+ 数据库。
我想使用 python 在 KDB+ 数据库中存储 table/view 的所有适当列的元数据。尽管尝试了无数种不同的方法,但我无法分离元数据部分。
即一些将输出类型转换为 list/tuple、使用 for
进行迭代等。
from qpython import qconnection
def fetch_metadata_from_kdb(params):
try:
kdb_connection_obj = qconnection.QConnection(host=params['host'], port=params['port'], username=params['username'], password=params['password'])
kdb_connection_obj.open()
PREDICATE = "meta[{}]".format(params['table'])
metadata = kdb_connection_obj(PREDICATE)
kdb_connection_obj.close()
return metadata
except Exception as error_msg:
return error_msg
def fetch_tables_from_kdb(params):
try:
kdb_connection_obj = qconnection.QConnection(host=params['host'], port=params['port'], username=params['username'], password=params['password'])
kdb_connection_obj.open()
tables = kdb_connection_obj("tables[]")
views = kdb_connection_obj("views[]")
kdb_connection_obj.close()
return [table.decode() for table in list(tables)], [view.decode() for view in list(views)]
except Exception as error_msg:
return error_msg
parms_q = {'host':'localhost', 'port':5010,
'username':'kdb', 'password':'kdb', 'table':'testing'}
print("fetch_tables_from_kdb:", fetch_tables_from_kdb(parms_q), "\n")
print("fetch_metadata_from_kdb:", fetch_metadata_from_kdb(parms_q), "\n")
我目前得到的输出如下;
fetch_tables_from_kdb: (['testing'], ['viewname'])
fetch_metadata_from_kdb: [(b'time',) (b'sym',) (b'price',) (b'qty',)]![(b'p', b'', b'') (b's', b'', b'') (b'f', b'', b'') (b'j', b'', b'')]
我无法将列部分和元数据部分分开。如何使用 python?
在 KDB 中仅存储 table/view 适当列的元数据
您从 kdb 返回的元数据是正确的,但在 python 中显示为 kdb 字典格式,我同意这不是很有用。
如果您将 pandas=True 标志传递到您的 qconnection 调用中,那么 qPython 将解析 kdb 数据结构,例如 table 到 pandas 数据结构或明智的 python 类型,在您的情况下看起来会更有用。
请看下面的例子-
kdb 设置(全部在本地主机上)
$ q -p 5000
q)testing:([]date:.z.d+0 1 2;`g#sym:`abc`def`ghi;num:`s#10 20 30)
q)testing
date sym num
------------------
2022.01.31 abc 10
2022.02.01 def 20
2022.02.02 ghi 30
q)meta testing
c | t f a
----| -----
date| d
sym | s g
num | j s
Python代码
from qpython import qconnection
#create and open 2 connections to kdb process - 1 without pandas flag and one
q = qconnection.QConnection(host="localhost", port=5000)
qpandas = qconnection.QConnection(host="localhost", port=5000, pandas=True)
q.open()
qpandas.open()
#see what is returned with a q table
print(q("testing"))
[(8066, b'abc', 10) (8067, b'def', 20) (8068, b'ghi', 30)]
#the data is a qPython data object
type(q("testing"))
qpython.qcollection.QTable
#whereas using the pandas=True flag a dataframe is returned.
print(qpandas("testing"))
date sym num
0 2022-01-31 b'abc' 10
1 2022-02-01 b'def' 20
2 2022-02-02 b'ghi' 30
#This is the same for the meta of a table
print(q("meta testing"))
[(b'date',) (b'sym',) (b'num',)]![(b'd', b'', b'') (b's', b'', b'g') (b'j', b'', b's')]
print(qpandas("meta testing"))
t f a
c
b'date' d b'' b''
b'sym' s b'' b'g'
b'num' j b'' b's'
通过上面的内容,您现在可以使用 pandas 访问列和行(b'num' 等是 qPython 表示反引号的方式 `
此外,如果您对 python 数据结构而不是 kdb 数据 structure/types 更感兴趣,现在您还可以使用 DataFrame.info()
提取数据类型。 qPython 会自动将 q 类型转换为合理的 python 类型。
qpandas("testing").info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 date 3 non-null datetime64[ns]
1 sym 3 non-null object
2 num 3 non-null int64
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 200.0+ bytes
同时,我检查了相当多的 KBD 文档,发现元数据提供以下内容作为输出。你可以在这里看到 kdb metadata
c | t f a
c-columns
t-symbol
f-foreign 键关联
a-attributes 与列关联
我们可以访问元数据对象( ),方法是在 for
循环中进行交互,如下所示;
from qpython import qconnection
def fetch_metadata_from_kdb(params):
try:
col_list, metadata_list = [], []
kdb_connection_obj = qconnection.QConnection(host=params['host'], port=params['port'], username=params['username'], password=params['password'])
kdb_connection_obj.open()
PREDICATE = "meta[{}]".format(params['table'])
############# FOR LOOP ##############
for i,j in kdb_connection_obj(PREDICATE).items():
col_list.append(i[0].decode())
metadata_list.append(j[0].decode())
kdb_connection_obj.close()
return col_list, metadata_list
except Exception as error_msg:
return error_msg
parms_q = {'host':'localhost', 'port':5010,
'username':'kdb', 'password':'kdb', 'table':'testing'}
print(fetch_metadata_from_kdb(parms_q))
Output: ['time', 'sym', 'price', 'qty'], ['p', 's', 'f', 'j']
我还从文档 here 中获得了 KDB 字符类型/q 数据类型。下面是相同的实现;
import pandas as pd
from qpython import qconnection
kdb_type_char_dict = dict()
df = pd.read_html('https://code.kx.com/q4m3/2_Basic_Data_Types_Atoms/')[1].iloc[:17, 0:3][['Type', 'CharType']]
for i, j in zip(df.iloc[:, 0], df.iloc[:, 1]): kdb_type_char_dict[str(j)] = str(i)
####### Q DATA TYPES DICTIONARY #######
print("Chat types/ q data types dictionary:", kdb_type_char_dict)
def fetch_metadata_from_kdb(params):
try:
col_list, metadata_list, temp_list = [], [], []
kdb_connection_obj = qconnection.QConnection(host=params['host'], port=params['port'],
username=params['username'], password=params['password'])
kdb_connection_obj.open()
PREDICATE = "meta[{}]".format(params['table'])
for i, j in kdb_connection_obj(PREDICATE).items():
col_list.append(i[0].decode())
temp_list.append(j[0].decode())
for i in temp_list:
metadata_list.append("{}".format(kdb_type_char_dict[i]))
kdb_connection_obj.close()
return col_list, metadata_list
except Exception as error_msg:
return error_msg
params = {'host': 'localhost', 'port': 5010,
'username': 'kdb', 'password': 'kdb', 'table': 'testing'}
print(fetch_metadata_from_kdb(params))
输出:
Chat types/ q data types dictionary: {'b': 'boolean', 'x': 'byte', 'h': 'short', 'i': 'int', 'j': 'long', 'e': 'real', 'f': 'float', 'c': 'char', 's': 'symbol', 'p': 'timestamp', 'm': 'month', 'd': 'date', 'z': '(datetime)', 'n': 'timespan', 'u': 'minute', 'v': 'second', 't': 'time'}
(['time', 'sym', 'price', 'qty'], ['timestamp', 'symbol', 'float', 'long'])
我一直在尝试使用 python 从 KDB+ 数据库中获取元数据,基本上,我安装了一个名为 qpython
的库并使用了这个库我们连接并查询 KDB+ 数据库。
我想使用 python 在 KDB+ 数据库中存储 table/view 的所有适当列的元数据。尽管尝试了无数种不同的方法,但我无法分离元数据部分。
即一些将输出类型转换为 list/tuple、使用 for
进行迭代等。
from qpython import qconnection
def fetch_metadata_from_kdb(params):
try:
kdb_connection_obj = qconnection.QConnection(host=params['host'], port=params['port'], username=params['username'], password=params['password'])
kdb_connection_obj.open()
PREDICATE = "meta[{}]".format(params['table'])
metadata = kdb_connection_obj(PREDICATE)
kdb_connection_obj.close()
return metadata
except Exception as error_msg:
return error_msg
def fetch_tables_from_kdb(params):
try:
kdb_connection_obj = qconnection.QConnection(host=params['host'], port=params['port'], username=params['username'], password=params['password'])
kdb_connection_obj.open()
tables = kdb_connection_obj("tables[]")
views = kdb_connection_obj("views[]")
kdb_connection_obj.close()
return [table.decode() for table in list(tables)], [view.decode() for view in list(views)]
except Exception as error_msg:
return error_msg
parms_q = {'host':'localhost', 'port':5010,
'username':'kdb', 'password':'kdb', 'table':'testing'}
print("fetch_tables_from_kdb:", fetch_tables_from_kdb(parms_q), "\n")
print("fetch_metadata_from_kdb:", fetch_metadata_from_kdb(parms_q), "\n")
我目前得到的输出如下;
fetch_tables_from_kdb: (['testing'], ['viewname'])
fetch_metadata_from_kdb: [(b'time',) (b'sym',) (b'price',) (b'qty',)]![(b'p', b'', b'') (b's', b'', b'') (b'f', b'', b'') (b'j', b'', b'')]
我无法将列部分和元数据部分分开。如何使用 python?
在 KDB 中仅存储 table/view 适当列的元数据您从 kdb 返回的元数据是正确的,但在 python 中显示为 kdb 字典格式,我同意这不是很有用。
如果您将 pandas=True 标志传递到您的 qconnection 调用中,那么 qPython 将解析 kdb 数据结构,例如 table 到 pandas 数据结构或明智的 python 类型,在您的情况下看起来会更有用。
请看下面的例子- kdb 设置(全部在本地主机上)
$ q -p 5000
q)testing:([]date:.z.d+0 1 2;`g#sym:`abc`def`ghi;num:`s#10 20 30)
q)testing
date sym num
------------------
2022.01.31 abc 10
2022.02.01 def 20
2022.02.02 ghi 30
q)meta testing
c | t f a
----| -----
date| d
sym | s g
num | j s
Python代码
from qpython import qconnection
#create and open 2 connections to kdb process - 1 without pandas flag and one
q = qconnection.QConnection(host="localhost", port=5000)
qpandas = qconnection.QConnection(host="localhost", port=5000, pandas=True)
q.open()
qpandas.open()
#see what is returned with a q table
print(q("testing"))
[(8066, b'abc', 10) (8067, b'def', 20) (8068, b'ghi', 30)]
#the data is a qPython data object
type(q("testing"))
qpython.qcollection.QTable
#whereas using the pandas=True flag a dataframe is returned.
print(qpandas("testing"))
date sym num
0 2022-01-31 b'abc' 10
1 2022-02-01 b'def' 20
2 2022-02-02 b'ghi' 30
#This is the same for the meta of a table
print(q("meta testing"))
[(b'date',) (b'sym',) (b'num',)]![(b'd', b'', b'') (b's', b'', b'g') (b'j', b'', b's')]
print(qpandas("meta testing"))
t f a
c
b'date' d b'' b''
b'sym' s b'' b'g'
b'num' j b'' b's'
通过上面的内容,您现在可以使用 pandas 访问列和行(b'num' 等是 qPython 表示反引号的方式 `
此外,如果您对 python 数据结构而不是 kdb 数据 structure/types 更感兴趣,现在您还可以使用 DataFrame.info()
提取数据类型。 qPython 会自动将 q 类型转换为合理的 python 类型。
qpandas("testing").info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 date 3 non-null datetime64[ns]
1 sym 3 non-null object
2 num 3 non-null int64
dtypes: datetime64[ns](1), int64(1), object(1)
memory usage: 200.0+ bytes
同时,我检查了相当多的 KBD 文档,发现元数据提供以下内容作为输出。你可以在这里看到 kdb metadata
c | t f a
c-columns t-symbol f-foreign 键关联 a-attributes 与列关联
我们可以访问元数据对象(for
循环中进行交互,如下所示;
from qpython import qconnection
def fetch_metadata_from_kdb(params):
try:
col_list, metadata_list = [], []
kdb_connection_obj = qconnection.QConnection(host=params['host'], port=params['port'], username=params['username'], password=params['password'])
kdb_connection_obj.open()
PREDICATE = "meta[{}]".format(params['table'])
############# FOR LOOP ##############
for i,j in kdb_connection_obj(PREDICATE).items():
col_list.append(i[0].decode())
metadata_list.append(j[0].decode())
kdb_connection_obj.close()
return col_list, metadata_list
except Exception as error_msg:
return error_msg
parms_q = {'host':'localhost', 'port':5010,
'username':'kdb', 'password':'kdb', 'table':'testing'}
print(fetch_metadata_from_kdb(parms_q))
Output: ['time', 'sym', 'price', 'qty'], ['p', 's', 'f', 'j']
我还从文档 here 中获得了 KDB 字符类型/q 数据类型。下面是相同的实现;
import pandas as pd
from qpython import qconnection
kdb_type_char_dict = dict()
df = pd.read_html('https://code.kx.com/q4m3/2_Basic_Data_Types_Atoms/')[1].iloc[:17, 0:3][['Type', 'CharType']]
for i, j in zip(df.iloc[:, 0], df.iloc[:, 1]): kdb_type_char_dict[str(j)] = str(i)
####### Q DATA TYPES DICTIONARY #######
print("Chat types/ q data types dictionary:", kdb_type_char_dict)
def fetch_metadata_from_kdb(params):
try:
col_list, metadata_list, temp_list = [], [], []
kdb_connection_obj = qconnection.QConnection(host=params['host'], port=params['port'],
username=params['username'], password=params['password'])
kdb_connection_obj.open()
PREDICATE = "meta[{}]".format(params['table'])
for i, j in kdb_connection_obj(PREDICATE).items():
col_list.append(i[0].decode())
temp_list.append(j[0].decode())
for i in temp_list:
metadata_list.append("{}".format(kdb_type_char_dict[i]))
kdb_connection_obj.close()
return col_list, metadata_list
except Exception as error_msg:
return error_msg
params = {'host': 'localhost', 'port': 5010,
'username': 'kdb', 'password': 'kdb', 'table': 'testing'}
print(fetch_metadata_from_kdb(params))
输出:
Chat types/ q data types dictionary: {'b': 'boolean', 'x': 'byte', 'h': 'short', 'i': 'int', 'j': 'long', 'e': 'real', 'f': 'float', 'c': 'char', 's': 'symbol', 'p': 'timestamp', 'm': 'month', 'd': 'date', 'z': '(datetime)', 'n': 'timespan', 'u': 'minute', 'v': 'second', 't': 'time'}
(['time', 'sym', 'price', 'qty'], ['timestamp', 'symbol', 'float', 'long'])