解析 T-SQL 以提取部分 WHERE 子句
Parsing T-SQL To Extract Part of WHERE Clause
我有一个包含 'curves' 的大型 SQL 数据库。每条曲线都有一个ID(curveid)。我正在尝试确定每条曲线的主要用户以及是否使用它。为此,DBA 提供了针对数据库执行的所有语句的日志。
这些语句可能非常复杂。我想要做的就是提取正在查询的 curveids。
示例语句如下:
WITH G AS ( SELECT [Timevalue] FROM [mc].[GranularityLookup]
WHERE [TimeValue] BETWEEN '19-Jul-2017 00:00' AND '30-Sep-2017 00:00'
AND [1 Hr] = 1),
D AS ( SELECT [CurveID], [DeliveryDate], [PublishDate], AVG([Value]) Value, MAX([PeriodNumber]) PeriodNumber
FROM mc.CURVEID_6657_1_LATEST data
JOIN
(SELECT CurveID ID, DeliveryDate dDate, MAX(PublishDate) pDate
FROM mc.CURVEID_6657_1_LATEST
WHERE CurveID = 90564
AND DeliveryDate >= '19-Jul-2017 00:00' AND DeliveryDate <= '30-Sep-2017 00:00'
GROUP BY DeliveryDate, CurveID ) Dates
ON data.DeliveryDate = dates.dDate AND data.PublishDate = dates.pDate
WHERE data.CurveID = 90564
AND data.DeliveryDate >= '19-Jul-2017 00:00' AND data.DeliveryDate <= '30-Sep-2017 00:00'
GROUP BY [CurveID], [PublishDate], [DeliveryDate] )
SELECT
G.[TimeValue] [Deliver
yDate] , D.[PublishDate], D.[Value], D.[PeriodNumber]
FROM
G
LEFT JOIN
D
ON
G.[TimeValue] = D.[DeliveryDate]
ORDER BY DeliveryDate ASC, PeriodNumber ASC, publishDate DESC
从这个语句中,我感兴趣的是提取用户查询 curveid 90564。
该语句也可能类似于以下任一内容:
SELECT * FROM anytable WHERE curveid = 123 AND deliverydate BETWEEN '2017-01-01' AND 2017-02-01'
或
SELECT * FROM mc.anytable WHERE curveid IN (1,2,3,4,5,6,7)
同样,我只想知道曲线 ID。我不关心任何其他条款。
我正在使用 Microsoft.SqlServer.TransactSql.ScriptDom 命名空间来解析 SQL 并且已经到了可以使用类似于下面的代码(从其他一些示例拼凑而成)来识别所有 WHERE 语句的地步:
string sql = @"WITH
G AS ( SELECT [Timevalue] FROM [mc].[GranularityLookup]
WHERE [TimeValue] BETWEEN '19-Jul-2017 00:00' AND '30-Sep-2017 00:00'
AND [1 Hr] = 1),
D AS ( SELECT [CurveID], [DeliveryDate], [PublishDate], AVG([Value]) Value, MAX([PeriodNumber]) PeriodNumber
FROM mc.CURVEID_6657_1_LATEST data
JOIN
(SELECT CurveID ID, DeliveryDate dDate, MAX(PublishDate) pDate
FROM mc.CURVEID_6657_1_LATEST
WHERE CurveID = 90564
AND DeliveryDate >= '19-Jul-2017 00:00' AND DeliveryDate <= '30-Sep-2017 00:00'
GROUP BY DeliveryDate, CurveID ) Dates
ON data.DeliveryDate = dates.dDate AND data.PublishDate = dates.pDate
WHERE data.CurveID = 90564
AND data.DeliveryDate >= '19-Jul-2017 00:00' AND data.DeliveryDate <= '30-Sep-2017 00:00'
GROUP BY [CurveID], [PublishDate], [DeliveryDate] )
SELECT
G.[TimeValue] [Deliver
yDate] , D.[PublishDate], D.[Value], D.[PeriodNumber]
FROM
G
LEFT JOIN
D
ON
G.[TimeValue] = D.[DeliveryDate]
ORDER BY DeliveryDate ASC, PeriodNumber ASC, publishDate DESC";
var parser = new TSql120Parser(false);
IList<ParseError> errors;
var fragment = parser.Parse(new StringReader(sql), out errors);
var whereVisitor = new WhereVisitor();
fragment.Accept(whereVisitor);
// I now have all WHERE clauses in whereVisitor.WhereStatements
class WhereVisitor : TSqlConcreteFragmentVisitor
{
public readonly List<WhereClause> WhereStatements = new List<WhereClause>();
public override void Visit(WhereClause node)
{
WhereStatements.Add(node);
}
}
whereVisitor.WhereStatements 中的每个子句(本例中为 3 个)公开一个名为 SearchCondition 的 属性。不幸的是,这是我 运行 没有想法的地方。我想要实现的是如下逻辑:
foreach (var clause in whereVisitor.WhereStatements)
{
// IF any part of the clause filters based on curveid THEN
// Capture curveIDs
// END IF
}
其他详情:
- 使用 C# (.net 4.0)
- SQL 服务器 2008
- 此 DLL 是 Microsoft.SqlServer.TransactSql.ScriptDom(在我的案例中位于 'c:\Program Files (x86)\Microsoft SQL Server0\Tools\PowerShell\Modules\SQLPS\Microsoft.SqlServer.TransactSql.ScriptDom.dll')
编辑 1
一些附加信息:
- CurveID 是另一个 table 的关键。这没有意义
对其进行操作的情况(例如 curveId+1 或 curveId <= 10)。
编辑 2(部分解决方案)
有以下访问者有助于处理 where 子句类似的情况
curveid = 123:
class CurveIdVisitor : TSqlConcreteFragmentVisitor
{
public readonly List<int> CurveIds = new List<int>();
public override void Visit(BooleanComparisonExpression exp)
{
if (exp.FirstExpression is ColumnReferenceExpression && exp.SecondExpression is IntegerLiteral )
{
// there is a possibility that this is of the ilk 'curveid = 123'
// we will look for the 'identifier'
// we take the last if there are multiple. Example:
// alias.curveid
// goives two identifiers: alias and curveid
if (
((ColumnReferenceExpression) exp.FirstExpression).MultiPartIdentifier.Identifiers.Last().Value.ToLower() ==
"curveid")
{
// this is definitely a curveid filter
// Now to find the curve id
int curveid = int.Parse(((IntegerLiteral) exp.SecondExpression).Value);
CurveIds.Add(curveid);
}
}
终于解决了这个问题,希望这对以后的其他人有所帮助。也许其他人可能会阅读 ni 次并提供更好的解决方案。
public class SqlParser
{
public List<int> GetQueriedCurveIds(string sql)
{
var parser = new TSql120Parser(false);
IList<ParseError> errors;
var fragment = parser.Parse(new StringReader(sql), out errors);
List<int> curveIds = new List<int>();
CurveIdVisitor cidv = new CurveIdVisitor();
InPredicateVisitor inpv = new InPredicateVisitor();
fragment.AcceptChildren(cidv);
fragment.AcceptChildren(inpv);
curveIds.AddRange(cidv.CurveIds);
curveIds.AddRange(inpv.CurveIds);
return curveIds.Distinct().ToList();
}
}
class CurveIdVisitor : TSqlConcreteFragmentVisitor
{
public readonly List<int> CurveIds = new List<int>();
public override void Visit(BooleanComparisonExpression exp)
{
if (exp.FirstExpression is ColumnReferenceExpression && exp.SecondExpression is IntegerLiteral )
{
// there is a possibility that this is of the ilk 'curveid = 123'
// we will look for the 'identifier'
// we take the last if there are multiple. Example:
// alias.curveid
// goives two identifiers: alias and curveid
if (
((ColumnReferenceExpression) exp.FirstExpression).MultiPartIdentifier.Identifiers.Last().Value.ToLower() ==
"curveid")
{
// this is definitely a curveid filter
// Now to find the curve id
int curveid = int.Parse(((IntegerLiteral) exp.SecondExpression).Value);
CurveIds.Add(curveid);
}
}
}
}
class InPredicateVisitor : TSqlConcreteFragmentVisitor
{
public readonly List<int> CurveIds = new List<int>();
public override void Visit(InPredicate exp)
{
if (exp.Expression is ColumnReferenceExpression)
{
if (
((ColumnReferenceExpression) exp.Expression).MultiPartIdentifier.Identifiers.Last().Value.ToLower() ==
"curveid")
{
foreach (var value in exp.Values)
{
if (value is IntegerLiteral)
{
CurveIds.Add(int.Parse(((IntegerLiteral)value).Value));
}
}
}
}
}
}
这是演示答案的缩减代码。在现实生活中,您可能希望检查 ParseError 集合并添加一些错误处理!
我有一个包含 'curves' 的大型 SQL 数据库。每条曲线都有一个ID(curveid)。我正在尝试确定每条曲线的主要用户以及是否使用它。为此,DBA 提供了针对数据库执行的所有语句的日志。
这些语句可能非常复杂。我想要做的就是提取正在查询的 curveids。
示例语句如下:
WITH G AS ( SELECT [Timevalue] FROM [mc].[GranularityLookup]
WHERE [TimeValue] BETWEEN '19-Jul-2017 00:00' AND '30-Sep-2017 00:00'
AND [1 Hr] = 1),
D AS ( SELECT [CurveID], [DeliveryDate], [PublishDate], AVG([Value]) Value, MAX([PeriodNumber]) PeriodNumber
FROM mc.CURVEID_6657_1_LATEST data
JOIN
(SELECT CurveID ID, DeliveryDate dDate, MAX(PublishDate) pDate
FROM mc.CURVEID_6657_1_LATEST
WHERE CurveID = 90564
AND DeliveryDate >= '19-Jul-2017 00:00' AND DeliveryDate <= '30-Sep-2017 00:00'
GROUP BY DeliveryDate, CurveID ) Dates
ON data.DeliveryDate = dates.dDate AND data.PublishDate = dates.pDate
WHERE data.CurveID = 90564
AND data.DeliveryDate >= '19-Jul-2017 00:00' AND data.DeliveryDate <= '30-Sep-2017 00:00'
GROUP BY [CurveID], [PublishDate], [DeliveryDate] )
SELECT
G.[TimeValue] [Deliver
yDate] , D.[PublishDate], D.[Value], D.[PeriodNumber]
FROM
G
LEFT JOIN
D
ON
G.[TimeValue] = D.[DeliveryDate]
ORDER BY DeliveryDate ASC, PeriodNumber ASC, publishDate DESC
从这个语句中,我感兴趣的是提取用户查询 curveid 90564。
该语句也可能类似于以下任一内容:
SELECT * FROM anytable WHERE curveid = 123 AND deliverydate BETWEEN '2017-01-01' AND 2017-02-01'
或
SELECT * FROM mc.anytable WHERE curveid IN (1,2,3,4,5,6,7)
同样,我只想知道曲线 ID。我不关心任何其他条款。
我正在使用 Microsoft.SqlServer.TransactSql.ScriptDom 命名空间来解析 SQL 并且已经到了可以使用类似于下面的代码(从其他一些示例拼凑而成)来识别所有 WHERE 语句的地步:
string sql = @"WITH
G AS ( SELECT [Timevalue] FROM [mc].[GranularityLookup]
WHERE [TimeValue] BETWEEN '19-Jul-2017 00:00' AND '30-Sep-2017 00:00'
AND [1 Hr] = 1),
D AS ( SELECT [CurveID], [DeliveryDate], [PublishDate], AVG([Value]) Value, MAX([PeriodNumber]) PeriodNumber
FROM mc.CURVEID_6657_1_LATEST data
JOIN
(SELECT CurveID ID, DeliveryDate dDate, MAX(PublishDate) pDate
FROM mc.CURVEID_6657_1_LATEST
WHERE CurveID = 90564
AND DeliveryDate >= '19-Jul-2017 00:00' AND DeliveryDate <= '30-Sep-2017 00:00'
GROUP BY DeliveryDate, CurveID ) Dates
ON data.DeliveryDate = dates.dDate AND data.PublishDate = dates.pDate
WHERE data.CurveID = 90564
AND data.DeliveryDate >= '19-Jul-2017 00:00' AND data.DeliveryDate <= '30-Sep-2017 00:00'
GROUP BY [CurveID], [PublishDate], [DeliveryDate] )
SELECT
G.[TimeValue] [Deliver
yDate] , D.[PublishDate], D.[Value], D.[PeriodNumber]
FROM
G
LEFT JOIN
D
ON
G.[TimeValue] = D.[DeliveryDate]
ORDER BY DeliveryDate ASC, PeriodNumber ASC, publishDate DESC";
var parser = new TSql120Parser(false);
IList<ParseError> errors;
var fragment = parser.Parse(new StringReader(sql), out errors);
var whereVisitor = new WhereVisitor();
fragment.Accept(whereVisitor);
// I now have all WHERE clauses in whereVisitor.WhereStatements
class WhereVisitor : TSqlConcreteFragmentVisitor
{
public readonly List<WhereClause> WhereStatements = new List<WhereClause>();
public override void Visit(WhereClause node)
{
WhereStatements.Add(node);
}
}
whereVisitor.WhereStatements 中的每个子句(本例中为 3 个)公开一个名为 SearchCondition 的 属性。不幸的是,这是我 运行 没有想法的地方。我想要实现的是如下逻辑:
foreach (var clause in whereVisitor.WhereStatements)
{
// IF any part of the clause filters based on curveid THEN
// Capture curveIDs
// END IF
}
其他详情:
- 使用 C# (.net 4.0)
- SQL 服务器 2008
- 此 DLL 是 Microsoft.SqlServer.TransactSql.ScriptDom(在我的案例中位于 'c:\Program Files (x86)\Microsoft SQL Server0\Tools\PowerShell\Modules\SQLPS\Microsoft.SqlServer.TransactSql.ScriptDom.dll')
编辑 1
一些附加信息:
- CurveID 是另一个 table 的关键。这没有意义 对其进行操作的情况(例如 curveId+1 或 curveId <= 10)。
编辑 2(部分解决方案)
有以下访问者有助于处理 where 子句类似的情况 curveid = 123:
class CurveIdVisitor : TSqlConcreteFragmentVisitor
{
public readonly List<int> CurveIds = new List<int>();
public override void Visit(BooleanComparisonExpression exp)
{
if (exp.FirstExpression is ColumnReferenceExpression && exp.SecondExpression is IntegerLiteral )
{
// there is a possibility that this is of the ilk 'curveid = 123'
// we will look for the 'identifier'
// we take the last if there are multiple. Example:
// alias.curveid
// goives two identifiers: alias and curveid
if (
((ColumnReferenceExpression) exp.FirstExpression).MultiPartIdentifier.Identifiers.Last().Value.ToLower() ==
"curveid")
{
// this is definitely a curveid filter
// Now to find the curve id
int curveid = int.Parse(((IntegerLiteral) exp.SecondExpression).Value);
CurveIds.Add(curveid);
}
}
终于解决了这个问题,希望这对以后的其他人有所帮助。也许其他人可能会阅读 ni 次并提供更好的解决方案。
public class SqlParser
{
public List<int> GetQueriedCurveIds(string sql)
{
var parser = new TSql120Parser(false);
IList<ParseError> errors;
var fragment = parser.Parse(new StringReader(sql), out errors);
List<int> curveIds = new List<int>();
CurveIdVisitor cidv = new CurveIdVisitor();
InPredicateVisitor inpv = new InPredicateVisitor();
fragment.AcceptChildren(cidv);
fragment.AcceptChildren(inpv);
curveIds.AddRange(cidv.CurveIds);
curveIds.AddRange(inpv.CurveIds);
return curveIds.Distinct().ToList();
}
}
class CurveIdVisitor : TSqlConcreteFragmentVisitor
{
public readonly List<int> CurveIds = new List<int>();
public override void Visit(BooleanComparisonExpression exp)
{
if (exp.FirstExpression is ColumnReferenceExpression && exp.SecondExpression is IntegerLiteral )
{
// there is a possibility that this is of the ilk 'curveid = 123'
// we will look for the 'identifier'
// we take the last if there are multiple. Example:
// alias.curveid
// goives two identifiers: alias and curveid
if (
((ColumnReferenceExpression) exp.FirstExpression).MultiPartIdentifier.Identifiers.Last().Value.ToLower() ==
"curveid")
{
// this is definitely a curveid filter
// Now to find the curve id
int curveid = int.Parse(((IntegerLiteral) exp.SecondExpression).Value);
CurveIds.Add(curveid);
}
}
}
}
class InPredicateVisitor : TSqlConcreteFragmentVisitor
{
public readonly List<int> CurveIds = new List<int>();
public override void Visit(InPredicate exp)
{
if (exp.Expression is ColumnReferenceExpression)
{
if (
((ColumnReferenceExpression) exp.Expression).MultiPartIdentifier.Identifiers.Last().Value.ToLower() ==
"curveid")
{
foreach (var value in exp.Values)
{
if (value is IntegerLiteral)
{
CurveIds.Add(int.Parse(((IntegerLiteral)value).Value));
}
}
}
}
}
}
这是演示答案的缩减代码。在现实生活中,您可能希望检查 ParseError 集合并添加一些错误处理!