在不改变查询流程的情况下对数据进行分组
Group data without changing query flow
对我来说很难解释我想要什么,所以文章的名字可能不清楚,但我希望我能用代码描述它。
我有一些数据有两个最重要的值,所以让它成为时间 t
和价值 f(t)
。它存储在table中,例如
1 - 1000
2 - 1200
3 - 1100
4 - 1500
...
我想用它画一个图,这个图应该包含N个点。如果 table 的行少于此 N,那么我们只是 return 此 table。但如果没有,我们应该将这些点分组,例如N = Count/2
,那么对于上面的例子:
1 - (1000+1200)/2 = 1100
2 - (1100+1500)/2 = 1300
...
我写了一个 SQL 脚本(它适用于 N >> 计数)(MonitoringDateTime - 是 t
,如果 f(t)
是 ResultCount)
ALTER PROCEDURE [dbo].[usp_GetRequestStatisticsData]
@ResourceTypeID bigint,
@DateFrom datetime,
@DateTo datetime,
@EstimatedPointCount int
AS
BEGIN
SET NOCOUNT ON;
SET ARITHABORT ON;
declare @groupSize int;
declare @resourceCount int;
select @resourceCount = Count(*)
from ResourceType
where ID & @ResourceTypeID > 0
SELECT d.ResultCount
,MonitoringDateTime = d.GeneratedOnUtc
,ResourceType = a.ResourceTypeID,
ROW_NUMBER() OVER(ORDER BY d.GeneratedOnUtc asc) AS Row
into #t
FROM dbo.AgentData d
INNER JOIN dbo.Agent a ON a.CheckID = d.CheckID
WHERE d.EventType = 'Result' AND
a.ResourceTypeID & @ResourceTypeID > 0 AND
d.GeneratedOnUtc between @DateFrom AND @DateTo AND
d.Result = 1
select @groupSize = Count(*) / (@EstimatedPointCount * @resourceCount)
from #t
if @groupSize = 0 -- return all points
select ResourceType, MonitoringDateTime, ResultCount
from #t
else
select ResourceType, CAST(AVG(CAST(#t.MonitoringDateTime AS DECIMAL( 18, 6))) AS DATETIME) MonitoringDateTime, AVG(ResultCount) ResultCount
from #t
where [Row] % @groupSize = 0
group by ResourceType, [Row]
order by MonitoringDateTime
END
,但是对N ~= Count就不行了,而且insert要花很多时间。
这就是为什么我想使用 CTE's
,但它不适用于 if else
语句。
所以我计算了组号的公式(用于 GroupBy 子句),因为我们有
GroupNumber = Count < N ? Row : Row*NumberOfGroups
其中 Count - table 中的行数,NumberOfGroups = Count/EstimatedPointCount
使用一些简单的数学运算我们得到一个公式
GroupNumber = Row + (Row*Count/EstimatedPointCount - Row)*MAX(Count - Count/EstimatedPointCount,0)/(Count - Count/EstimatedPointCount)
但由于 Count 聚合函数,它不起作用:
Column 'dbo.AgentData.ResultCount' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.
我的英语很差,我知道(我正在努力提高它),但希望最后破灭,所以请多多指教。
查询结果
SELECT d.ResultCount
, MonitoringDateTime = d.GeneratedOnUtc
, ResourceType = a.ResourceTypeID
FROM dbo.AgentData d
INNER JOIN dbo.Agent a ON a.CheckID = d.CheckID
WHERE d.GeneratedOnUtc between '2015-01-28' AND '2015-01-30' AND
a.ResourceTypeID & 1376256 > 0 AND
d.EventType = 'Result' AND
d.Result = 1
我没试过,但是
怎么样?
select ResourceType, CAST(AVG(CAST(#t.MonitoringDateTime AS DECIMAL( 18, 6))) AS DATETIME) MonitoringDateTime, AVG(ResultCount) ResultCount
from #t
where [Row] % @groupSize = 0
group by ResourceType, [Row]
order by MonitoringDateTime
可能是
select ResourceType, CAST(AVG(CAST(#t.MonitoringDateTime AS DECIMAL( 18, 6))) AS DATETIME) MonitoringDateTime, AVG(ResultCount) ResultCount
from #t
group by ResourceType, convert(int,[Row]/@groupSize)
order by MonitoringDateTime
也许这为您指明了新的方向?通过转换为 int 我们将截断小数点后的所有内容,所以我希望这会给您更好的分组?您可能需要将您的行号放在资源类型上才能正常工作?
下面是一个使用 NTILE
的示例,以及您问题顶部的简单示例数据:
declare @samples table (ID int, sample int)
insert into @samples (ID,sample) values
(1,1000),
(2,1200),
(3,1100),
(4,1500)
declare @results int
set @results = 2
;With grouped as (
select *,NTILE(@results) OVER (order by ID) as nt
from @samples
)
select nt,AVG(sample) from grouped
group by nt
产生:
nt
-------------------- -----------
1 1100
2 1300
如果 @results
更改为 4(或任何更高的数字),那么您只需取回原始结果集。
不幸的是,我没有你的完整数据,我也无法完全理解你想用完整的存储过程做什么,所以上面的内容可能需要做一些调整。
对我来说很难解释我想要什么,所以文章的名字可能不清楚,但我希望我能用代码描述它。
我有一些数据有两个最重要的值,所以让它成为时间 t
和价值 f(t)
。它存储在table中,例如
1 - 1000
2 - 1200
3 - 1100
4 - 1500
...
我想用它画一个图,这个图应该包含N个点。如果 table 的行少于此 N,那么我们只是 return 此 table。但如果没有,我们应该将这些点分组,例如N = Count/2
,那么对于上面的例子:
1 - (1000+1200)/2 = 1100
2 - (1100+1500)/2 = 1300
...
我写了一个 SQL 脚本(它适用于 N >> 计数)(MonitoringDateTime - 是 t
,如果 f(t)
是 ResultCount)
ALTER PROCEDURE [dbo].[usp_GetRequestStatisticsData]
@ResourceTypeID bigint,
@DateFrom datetime,
@DateTo datetime,
@EstimatedPointCount int
AS
BEGIN
SET NOCOUNT ON;
SET ARITHABORT ON;
declare @groupSize int;
declare @resourceCount int;
select @resourceCount = Count(*)
from ResourceType
where ID & @ResourceTypeID > 0
SELECT d.ResultCount
,MonitoringDateTime = d.GeneratedOnUtc
,ResourceType = a.ResourceTypeID,
ROW_NUMBER() OVER(ORDER BY d.GeneratedOnUtc asc) AS Row
into #t
FROM dbo.AgentData d
INNER JOIN dbo.Agent a ON a.CheckID = d.CheckID
WHERE d.EventType = 'Result' AND
a.ResourceTypeID & @ResourceTypeID > 0 AND
d.GeneratedOnUtc between @DateFrom AND @DateTo AND
d.Result = 1
select @groupSize = Count(*) / (@EstimatedPointCount * @resourceCount)
from #t
if @groupSize = 0 -- return all points
select ResourceType, MonitoringDateTime, ResultCount
from #t
else
select ResourceType, CAST(AVG(CAST(#t.MonitoringDateTime AS DECIMAL( 18, 6))) AS DATETIME) MonitoringDateTime, AVG(ResultCount) ResultCount
from #t
where [Row] % @groupSize = 0
group by ResourceType, [Row]
order by MonitoringDateTime
END
,但是对N ~= Count就不行了,而且insert要花很多时间。
CTE's
,但它不适用于 if else
语句。
所以我计算了组号的公式(用于 GroupBy 子句),因为我们有
GroupNumber = Count < N ? Row : Row*NumberOfGroups
其中 Count - table 中的行数,NumberOfGroups = Count/EstimatedPointCount
使用一些简单的数学运算我们得到一个公式
GroupNumber = Row + (Row*Count/EstimatedPointCount - Row)*MAX(Count - Count/EstimatedPointCount,0)/(Count - Count/EstimatedPointCount)
但由于 Count 聚合函数,它不起作用:
Column 'dbo.AgentData.ResultCount' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.
我的英语很差,我知道(我正在努力提高它),但希望最后破灭,所以请多多指教。
查询结果
SELECT d.ResultCount
, MonitoringDateTime = d.GeneratedOnUtc
, ResourceType = a.ResourceTypeID
FROM dbo.AgentData d
INNER JOIN dbo.Agent a ON a.CheckID = d.CheckID
WHERE d.GeneratedOnUtc between '2015-01-28' AND '2015-01-30' AND
a.ResourceTypeID & 1376256 > 0 AND
d.EventType = 'Result' AND
d.Result = 1
我没试过,但是
怎么样?select ResourceType, CAST(AVG(CAST(#t.MonitoringDateTime AS DECIMAL( 18, 6))) AS DATETIME) MonitoringDateTime, AVG(ResultCount) ResultCount
from #t
where [Row] % @groupSize = 0
group by ResourceType, [Row]
order by MonitoringDateTime
可能是
select ResourceType, CAST(AVG(CAST(#t.MonitoringDateTime AS DECIMAL( 18, 6))) AS DATETIME) MonitoringDateTime, AVG(ResultCount) ResultCount
from #t
group by ResourceType, convert(int,[Row]/@groupSize)
order by MonitoringDateTime
也许这为您指明了新的方向?通过转换为 int 我们将截断小数点后的所有内容,所以我希望这会给您更好的分组?您可能需要将您的行号放在资源类型上才能正常工作?
下面是一个使用 NTILE
的示例,以及您问题顶部的简单示例数据:
declare @samples table (ID int, sample int)
insert into @samples (ID,sample) values
(1,1000),
(2,1200),
(3,1100),
(4,1500)
declare @results int
set @results = 2
;With grouped as (
select *,NTILE(@results) OVER (order by ID) as nt
from @samples
)
select nt,AVG(sample) from grouped
group by nt
产生:
nt
-------------------- -----------
1 1100
2 1300
如果 @results
更改为 4(或任何更高的数字),那么您只需取回原始结果集。
不幸的是,我没有你的完整数据,我也无法完全理解你想用完整的存储过程做什么,所以上面的内容可能需要做一些调整。