Python中是否有解析完整SQL查询的函数?
Is there any function to parse a complete SQL query in Python?
我正在使用 postgreSQL 查询。我想从 SQL 查询中提取所有信息,例如
sql = " select d_year, s_nation, p_category, sum(lo_revenue - lo_supplycost) as profit from DATES, CUSTOMER, SUPPLIER, PART, LINEORDER where lo_custkey = c_custkey and lo_suppkey = s_suppkey and lo_partkey = p_partkey and lo_orderdate = d_datekey and c_region = 'AFRICA' and s_region = 'AFRICA' and (d_year = 1996 or d_year = 1997) and (p_mfgr = 'MFGR#2' or p_mfgr = 'MFGR#4') group by d_year, s_nation, p_category order by d_year, s_nation, p_category "
我想获取所有相关表、所有选择谓词和所有连接谓词,按部分分组,按部分排序。
我使用了 sqlparse
并且找到了一种只获取相关表的方法。
有没有关于如何提取此信息的示例?
该算法给出了每个关键字之间的确切元素。我用了 sqlparse
parsed = sqlparse.parse(sql)
stmt = parsed[0]
from_seen = False
select_seen = False
where_seen = False
groupby_seen = False
orderby_seen = False
for token in stmt.tokens:
if select_seen:
if isinstance(token, IdentifierList):
for identifier in token.get_identifiers():
print("{} {}\n".format("Attr = ", identifier))
elif isinstance(token, Identifier):
print("{} {}\n".format("Attr = ", token))
if from_seen:
if isinstance(token, IdentifierList):
for identifier in token.get_identifiers():
print("{} {}\n".format("TAB = ", identifier))
elif isinstance(token, Identifier):
print("{} {}\n".format("TAB = ", token))
if orderby_seen:
if isinstance(token, IdentifierList):
for identifier in token.get_identifiers():
print("{} {}\n".format("ORDERBY att = ", identifier))
elif isinstance(token, Identifier):
print("{} {}\n".format("ORDERBY att = ", token))
if groupby_seen:
if isinstance(token, IdentifierList):
for identifier in token.get_identifiers():
print("{} {}\n".format("GROUPBY att = ", identifier))
elif isinstance(token, Identifier):
print("{} {}\n".format("GROUPBY att = ", token))
if isinstance(token, Where):
select_seen = False
from_seen = False
where_seen = True
groupby_seen = False
orderby_seen = False
for where_tokens in token:
if isinstance(where_tokens, Comparison):
print("{} {}\n".format("Comparaison = ", where_tokens))
elif isinstance(where_tokens, Parenthesis):
print("{} {}\n".format("Parenthesis = ", where_tokens))
# tables.append(token)
if token.ttype is Keyword and token.value.upper() == "GROUP BY":
select_seen = False
from_seen = False
where_seen = False
groupby_seen = True
orderby_seen = False
if token.ttype is Keyword and token.value.upper() == "ORDER BY":
select_seen = False
from_seen = False
where_seen = False
groupby_seen = False
orderby_seen = True
if token.ttype is Keyword and token.value.upper() == "FROM":
select_seen = False
from_seen = True
where_seen = False
groupby_seen = False
orderby_seen = False
if token.ttype is DML and token.value.upper() == "SELECT":
select_seen = True
from_seen = False
where_seen = False
groupby_seen = False
orderby_seen = False
我正在使用 postgreSQL 查询。我想从 SQL 查询中提取所有信息,例如
sql = " select d_year, s_nation, p_category, sum(lo_revenue - lo_supplycost) as profit from DATES, CUSTOMER, SUPPLIER, PART, LINEORDER where lo_custkey = c_custkey and lo_suppkey = s_suppkey and lo_partkey = p_partkey and lo_orderdate = d_datekey and c_region = 'AFRICA' and s_region = 'AFRICA' and (d_year = 1996 or d_year = 1997) and (p_mfgr = 'MFGR#2' or p_mfgr = 'MFGR#4') group by d_year, s_nation, p_category order by d_year, s_nation, p_category "
我想获取所有相关表、所有选择谓词和所有连接谓词,按部分分组,按部分排序。
我使用了 sqlparse
并且找到了一种只获取相关表的方法。
有没有关于如何提取此信息的示例?
该算法给出了每个关键字之间的确切元素。我用了 sqlparse
parsed = sqlparse.parse(sql)
stmt = parsed[0]
from_seen = False
select_seen = False
where_seen = False
groupby_seen = False
orderby_seen = False
for token in stmt.tokens:
if select_seen:
if isinstance(token, IdentifierList):
for identifier in token.get_identifiers():
print("{} {}\n".format("Attr = ", identifier))
elif isinstance(token, Identifier):
print("{} {}\n".format("Attr = ", token))
if from_seen:
if isinstance(token, IdentifierList):
for identifier in token.get_identifiers():
print("{} {}\n".format("TAB = ", identifier))
elif isinstance(token, Identifier):
print("{} {}\n".format("TAB = ", token))
if orderby_seen:
if isinstance(token, IdentifierList):
for identifier in token.get_identifiers():
print("{} {}\n".format("ORDERBY att = ", identifier))
elif isinstance(token, Identifier):
print("{} {}\n".format("ORDERBY att = ", token))
if groupby_seen:
if isinstance(token, IdentifierList):
for identifier in token.get_identifiers():
print("{} {}\n".format("GROUPBY att = ", identifier))
elif isinstance(token, Identifier):
print("{} {}\n".format("GROUPBY att = ", token))
if isinstance(token, Where):
select_seen = False
from_seen = False
where_seen = True
groupby_seen = False
orderby_seen = False
for where_tokens in token:
if isinstance(where_tokens, Comparison):
print("{} {}\n".format("Comparaison = ", where_tokens))
elif isinstance(where_tokens, Parenthesis):
print("{} {}\n".format("Parenthesis = ", where_tokens))
# tables.append(token)
if token.ttype is Keyword and token.value.upper() == "GROUP BY":
select_seen = False
from_seen = False
where_seen = False
groupby_seen = True
orderby_seen = False
if token.ttype is Keyword and token.value.upper() == "ORDER BY":
select_seen = False
from_seen = False
where_seen = False
groupby_seen = False
orderby_seen = True
if token.ttype is Keyword and token.value.upper() == "FROM":
select_seen = False
from_seen = True
where_seen = False
groupby_seen = False
orderby_seen = False
if token.ttype is DML and token.value.upper() == "SELECT":
select_seen = True
from_seen = False
where_seen = False
groupby_seen = False
orderby_seen = False