解析 sql select 语句以获取 python 中的 where 子句条件

Parse sql select statement to fetch the where clause conditions in python

我有一个 sql 查询,我想将 where 子句中的所有条件提取到 Python 字典中。

例如,

import sqlparse

s = "select count(*) from users where employee_type = 'Employee' AND (employment_status = 'Active' OR employment_status = 'On Leave') AND (time_type='Full time' OR country_code <> 'US') AND hire_date < NOW() AND email_work IS NOT NULL AND LENGTH(email_work) > 0 AND NOT (job_profile_id IN ('8802 - Comm Ops - 1', '8801 - CityOps - 2', '10034', '10455', '21014', '21015', '21016', '21018', '21017', '21019') AND country_code = 'IE') AND job_profile_id NOT IN ('20992', '20993', '20994', '20995', '20996', '20997') AND country_code NOT IN ('CN', 'MO', 'SG', 'MY', 'TH', 'VN', 'MM', 'KH', 'PH', 'ID')"

parsed = sqlparse.parse(s)
where = parsed[0][-1]

sql_tokens = []
def get_tokens(where):
    for i in where.tokens:
        try:
            name = i.get_real_name()
            if name and not isinstance(i, sqlparse.sql.Parenthesis):
                # sql_tokens.append("{0} - {1} - {2}".format(str(i), str(name), i.value))
                sql_tokens.append({
                    'key': str(name),
                    'value': i.value,
                })
            else:
                get_tokens(i)
        except Exception as e:
            pass


get_tokens(where)
for i in sql_tokens:
    print i

输出如下

{'value': u"employee_type = 'Employee'", 'key': 'employee_type'}
{'value': u"employment_status = 'Active'", 'key': 'employment_status'}
{'value': u"employment_status = 'On Leave'", 'key': 'employment_status'}
{'value': u"time_type='Full time'", 'key': 'time_type'}
{'value': u"country_code <> 'US'", 'key': 'country_code'}
{'value': u'hire_date < NOW()', 'key': 'hire_date'}
{'value': u'email_work', 'key': 'email_work'}
{'value': u'LENGTH(email_work) > 0', 'key': 'LENGTH'}
{'value': u'job_profile_id', 'key': 'job_profile_id'}
{'value': u"country_code = 'IE'", 'key': 'country_code'}
{'value': u'job_profile_id', 'key': 'job_profile_id'}
{'value': u'country_code', 'key': 'country_code'}

这里的问题是 IN 运算符。 检查 job_profile_id,它不包含列表。

调试时,不显示列表。

我无法解决这个问题。

请帮忙。

感谢帮助。

这是因为 IN 关键字和比较的树结构不同。例如,比较包括树中它下面的整个表达式。

如果您使用 parsed[0]._pprint_tree(),您可以看到嵌套在比较节点下的所有内容:

   |- 2 Comparison 'employ...'
   |  |- 0 Identifier 'employ...'
   |  |  `- 0 Name 'employ...'
   |  |- 1 Whitespace ' '
   |  |- 2 Comparison '='
   |  |- 3 Whitespace ' '
   |  `- 4 Single ''Emplo...'

但是,NOT IN 子句是一系列顺序节点:

   |- 36 Identifier 'job_pr...'
   |  `- 0 Name 'job_pr...'
   |- 37 Whitespace ' '
   |- 38 Keyword 'NOT'
   |- 39 Whitespace ' '
   |- 40 Keyword 'IN'
   |- 41 Whitespace ' '
   |- 42 Parenthesis '('2099...'
   |  |- 0 Punctuation '('
   |  |- 1 IdentifierList ''20992...'
   |  |  |- 0 Single "'20992'"
   |  |  |- 1 Punctuation ','
   |  |  |- 2 Whitespace ' '
   |  |  |- 3 Single "'20993'"
   |  |  |- 4 Punctuation ','
   |  |  |- 5 Whitespace ' '
   |  |  |- 6 Single "'20994'"
   |  |  |- 7 Punctuation ','
   |  |  |- 8 Whitespace ' '
   |  |  |- 9 Single "'20995'"
   |  |  |- 10 Punctuation ','
   |  |  |- 11 Whitespace ' '
   |  |  |- 12 Single "'20996'"
   |  |  |- 13 Punctuation ','
   |  |  |- 14 Whitespace ' '
   |  |  `- 15 Single "'20997'"
   |  `- 2 Punctuation ')'

最好的办法是观察标识符,然后向前跳转并保存下一个括号节点的值。虽然这不能处理所有可能的情况,但它确实处理了您的 SQL 语句和 returns job_profile_id.

的值

这是我修改后的代码:

import sqlparse

s = "select count(*) from users where employee_type = 'Employee' AND (employment_status = 'Active' OR employment_status = 'On Leave') AND (time_type='Full time' OR country_code <> 'US') AND hire_date < NOW() AND email_work IS NOT NULL AND LENGTH(email_work) > 0 AND NOT (job_profile_id IN ('8802 - Comm Ops - 1', '8801 - CityOps - 2', '10034', '10455', '21014', '21015', '21016', '21018', '21017', '21019') AND country_code = 'IE') AND job_profile_id NOT IN ('20992', '20993', '20994', '20995', '20996', '20997') AND country_code NOT IN ('CN', 'MO', 'SG', 'MY', 'TH', 'VN', 'MM', 'KH', 'PH', 'ID')"

parsed = sqlparse.parse(s)
where = parsed[0][-1]

sql_tokens = []
def get_tokens(where):
    identifier = None
    for i in where.tokens:
        try:
            name = i.get_real_name()
            if name and isinstance(i, sqlparse.sql.Identifier):
                identifier = i
            elif identifier and isinstance(i, sqlparse.sql.Parenthesis):
                sql_tokens.append({
                    'key': str(identifier),
                    'value': token.value
                })
            elif name:
                identifier = None
                # sql_tokens.append("{0} - {1} - {2}".format(str(i), str(name), i.value))
                sql_tokens.append({
                    'key': str(name),
                    'value': u''.join(token.value for token in i.flatten()),
                })
            else:
                get_tokens(i)
        except Exception as e:
            pass

get_tokens(where)
print sql_tokens