如何使用Python分析SQL查询?
How to analyse SQL Query using Python?
我想分析 SQL 查询以了解哪些硬编码值属于哪一列?例如,我有以下 SQL 查询:-
SELECT *
FROM (
SELECT DISTINCT id
,substring([data], 0, 497) AS [Instructions]
,'500' AS [Care_Code]
,cast(id AS VARCHAR) + cast(number AS VARCHAR) + 'pp' AS key
FROM people
WHERE ([data] LIKE '%communicated %')
UNION ALL
SELECT DISTINCT Patientid
,substring(pp, 0, 497) AS [Instructions]
,'500' AS [Care_Code]
,cast(id AS VARCHAR) + cast(number AS VARCHAR) + 'aa' AS key
FROM people
WHERE Instructions LIKE '%[A-Z]%'
我希望输出如下:-
Harcoded_value Column_Name
500 Care_Code
%communicated % data
%[A-Z]% Instructions
示例 2:-
查询:-
select distinct eid, count(distinct d.pid)
from SOAP s
inner join demographics d on s.pid=d.pid
inner join PS p on p.providerId=s.pid
where
p.npi in ('1316987761','1437366473','1912915638','1740253822')
and Convert(datetime,Convert(varchar,EncounterDate,101)) >='08/01/2016'
and Convert(datetime,Convert(varchar,EncounterDate,101)) <= '07/31/2017'
group by eid
预期输出:-
Harcoded_value Column
('1316987761','1437366473','1912915638','1740253822') p.npi
你可以试试
import re
import pandas as pd
s = """SELECT *
FROM (
SELECT DISTINCT id
,substring([data], 0, 497) AS [Instructions]
,'500' AS [Care_Code]
,cast(id AS VARCHAR) + cast(number AS VARCHAR) + 'pp' AS key
FROM people
WHERE ([data] LIKE '%communicated %')
UNION ALL
SELECT DISTINCT Patientid
,substring(pp, 0, 497) AS [Instructions]
,'500' AS [Care_Code]
,cast(id AS VARCHAR) + cast(number AS VARCHAR) + 'aa' AS key
FROM people
WHERE Instructions LIKE '%[A-Z]%'
and p.npi in ('1316987761','1437366473','1912915638','1740253822')
"""
results = {}
for value in re.findall(r"(([A-Za-z.]+ in )*(((\[.*\]|\w*) LIKE )*\(*'%*.+%*'\)*( AS (\w|\[.*\])*)*))", s):
splited_values = value[0].split(" ")
val = "".join(splited_values[2:])
if "AS" in value[0] and splited_values[2] != "key":
results[re.sub("\'|\"", "", splited_values[0])] = re.sub(r"\W", "", val)
elif "LIKE" in value[0] or "in" in value[0]:
val = val[:-1] if val[-1] == ")" and val[0] != "(" else val
results[re.sub("\'|\"", "", val)] = re.sub(r"\[|\]", "", splited_values[0])
df = pd.DataFrame(results.items(), columns=["Harcoded_value", "Column_Name"])
print(df)
输出
Harcoded_value Column_Name
500 Care_Code
communicated% data
%[A-Z]% nstructions
(1316987761,1437366473,1912915638,1740253822) p.npi
此代码将从查询中提取所有包含 '' 的值以及 LIKE 之前或 AS 之后的单词,而不是名称 key
,并将其存储在结果字典中。
收集完所有值后,它将创建一个包含 "Harcoded_value"
和 "Column_Name"
列的 DataFrame
我想分析 SQL 查询以了解哪些硬编码值属于哪一列?例如,我有以下 SQL 查询:-
SELECT *
FROM (
SELECT DISTINCT id
,substring([data], 0, 497) AS [Instructions]
,'500' AS [Care_Code]
,cast(id AS VARCHAR) + cast(number AS VARCHAR) + 'pp' AS key
FROM people
WHERE ([data] LIKE '%communicated %')
UNION ALL
SELECT DISTINCT Patientid
,substring(pp, 0, 497) AS [Instructions]
,'500' AS [Care_Code]
,cast(id AS VARCHAR) + cast(number AS VARCHAR) + 'aa' AS key
FROM people
WHERE Instructions LIKE '%[A-Z]%'
我希望输出如下:-
Harcoded_value Column_Name
500 Care_Code
%communicated % data
%[A-Z]% Instructions
示例 2:-
查询:-
select distinct eid, count(distinct d.pid)
from SOAP s
inner join demographics d on s.pid=d.pid
inner join PS p on p.providerId=s.pid
where
p.npi in ('1316987761','1437366473','1912915638','1740253822')
and Convert(datetime,Convert(varchar,EncounterDate,101)) >='08/01/2016'
and Convert(datetime,Convert(varchar,EncounterDate,101)) <= '07/31/2017'
group by eid
预期输出:-
Harcoded_value Column
('1316987761','1437366473','1912915638','1740253822') p.npi
你可以试试
import re
import pandas as pd
s = """SELECT *
FROM (
SELECT DISTINCT id
,substring([data], 0, 497) AS [Instructions]
,'500' AS [Care_Code]
,cast(id AS VARCHAR) + cast(number AS VARCHAR) + 'pp' AS key
FROM people
WHERE ([data] LIKE '%communicated %')
UNION ALL
SELECT DISTINCT Patientid
,substring(pp, 0, 497) AS [Instructions]
,'500' AS [Care_Code]
,cast(id AS VARCHAR) + cast(number AS VARCHAR) + 'aa' AS key
FROM people
WHERE Instructions LIKE '%[A-Z]%'
and p.npi in ('1316987761','1437366473','1912915638','1740253822')
"""
results = {}
for value in re.findall(r"(([A-Za-z.]+ in )*(((\[.*\]|\w*) LIKE )*\(*'%*.+%*'\)*( AS (\w|\[.*\])*)*))", s):
splited_values = value[0].split(" ")
val = "".join(splited_values[2:])
if "AS" in value[0] and splited_values[2] != "key":
results[re.sub("\'|\"", "", splited_values[0])] = re.sub(r"\W", "", val)
elif "LIKE" in value[0] or "in" in value[0]:
val = val[:-1] if val[-1] == ")" and val[0] != "(" else val
results[re.sub("\'|\"", "", val)] = re.sub(r"\[|\]", "", splited_values[0])
df = pd.DataFrame(results.items(), columns=["Harcoded_value", "Column_Name"])
print(df)
输出
Harcoded_value Column_Name
500 Care_Code
communicated% data
%[A-Z]% nstructions
(1316987761,1437366473,1912915638,1740253822) p.npi
此代码将从查询中提取所有包含 '' 的值以及 LIKE 之前或 AS 之后的单词,而不是名称 key
,并将其存储在结果字典中。
收集完所有值后,它将创建一个包含 "Harcoded_value"
和 "Column_Name"