SQL 服务器查询以按顺序计算列中更改值的数量
SQL Server Query to Count Number of Changing Values in a Column Sequentially
我需要按顺序计算列中变化值的数量。请查看图片以获取说明(正确或预期的输出)
此处,Area
列正在变化,计数器列应根据区域中的变化值显示顺序计数器。
我已开始使用此代码
SELECT a.tenant, a.area, a.date , a.gsc, f.counter
FROM TENANT a
inner join
(SELECT a.tenant, COUNT(DISTINCT e.Area) AS counter
FROM TENANT
GROUP BY tenant
) AS f ON a.tenant = f.tenant
order by a.tenant, a.date
并给我这个输出。计算在所有行的 Area
列中找到的不同值的数量。
这是使用 window 函数的一种方法:
SELECT tenant, area, [date], sales,
DENSE_RANK() OVER (ORDER BY grpOrder) AS counter
FROM (
SELECT tenant, area, date, sales,
MIN([date]) OVER (PARTITION BY area, grp) AS grpOrder
FROM (
SELECT tenant, area, [date], sales,
ROW_NUMBER() OVER (ORDER BY date) -
ROW_NUMBER() OVER (PARTITION BY area ORDER BY [date]) AS grp
FROM tenant ) AS t ) AS s
内部查询识别连续 area
值的孤岛。请参阅此子查询的以下部分输出中的 grp
值:
area date grp
--------------------
18 2015-01-01 0
18 2015-01-02 0
18 2015-01-05 2
18 2015-01-06 2
20 2015-01-03 2
20 2015-01-04 2
使用 MIN
的 window 版本,我们可以计算 grp
顺序:字段 grpOrder
保存每组的最小日期。
在外部查询中使用 DENSE_RANK()
我们现在可以轻松计算 counter
值:第一组的值为 1,下一组的值为 2,等等
您可以使用 window 函数这样做:
declare @data table(name varchar(10), area int, dates datetime, sales int)
insert into @data(name, area, dates, sales) values
('Little Asia', 18, '20150101', 10)
, ('Little Asia', 18, '20150102', 20)
, ('Little Asia', 20, '20150103', 30)
, ('Little Asia', 20, '20150104', 10)
, ('Little Asia', 18, '20150105', 20)
, ('Little Asia', 18, '20150106', 30)
Select name, area, dates, sales
, [counter] = DENSE_RANK() over(order by c)
, [count] = Count(*) over(partition by n ,c)
From (
Select name, area, dates, sales, n
, c = ROW_NUMBER() over(order by n, dates) - ROW_NUMBER() over(partition by area, n order by dates)
From (
Select name, area, dates, sales
, n = ROW_NUMBER() over(order by dates) - ROW_NUMBER() over(partition by area order by dates)
From @data
) as x
) as v
order by dates
输出:
name area dates sales counter count
Little Asia 18 2015-01-01 10 1 2
Little Asia 18 2015-01-02 20 1 2
Little Asia 20 2015-01-03 30 2 2
Little Asia 20 2015-01-04 10 2 2
Little Asia 18 2015-01-05 20 3 2
Little Asia 18 2015-01-06 30 3 2
只要相差至少1(@threshold)我们就会开始一个新组。这将按租户分区。
DECLARE @Table as TABLE (
Tenant varchar(20),
Area int,
[date] Date,
Sales int
)
INSERT INTO @Table
VALUES
('Little Asia',18,'1/1/2015', 10),
('Little Asia',18,'1/2/2015', 20),
('Little Asia',20,'1/3/2015', 30),
('Little Asia',20,'1/4/2015', 10),
('Little Asia',18,'1/5/2015', 20),
('Little Asia',18,'1/6/2015', 30)
/***** Begin Query *****/
DECLARE @Threshold INT = 1
;WITH C1 AS
(
SELECT Tenant, Area, [Date], Sales,
CASE WHEN ABS(Area - LAG(Area) OVER(PARTITION BY Tenant ORDER BY [Date])) <= @Threshold THEN NULL ELSE 1 END AS isstart
FROM @Table
),
C2 AS
(
SELECT Tenant, Area, [Date], Sales, COUNT(isstart) OVER( PARTITION BY Tenant ORDER BY [Date] ROWS UNBOUNDED PRECEDING) AS grp
FROM C1
)
SELECT * FROM C2
接受的答案效果很好,SQL Fiddle 演示很棒。但是,它没有考虑到多个租户的情况。
我扩展了 SQL Fiddle 答案,link 是 here 对于那些数据集包含多个租户的人,只需确保每个租户都存在分区和排序。
如果 tenant
列中有一个额外的元素
create table #tenant (tenant varchar(20), area int, date date, sales int)
insert into #tenant values
('little asia', 18, '20150101', 10),
('little asia', 18, '20150102', 20),
('little asia', 20, '20150103', 30),
('little asia', 20, '20150104', 10),
('little asia', 18, '20150105', 20),
('little asia', 18, '20150106', 30),
('little', 18, '20150101', 10),
('little', 18, '20150102', 20),
('little', 18, '20150103', 30),
('little', 18, '20150104', 10),
('little', 18, '20150105', 20),
('little', 11, '20150106', 30);
代码会这样写:
/* new code adding tenant*/
SELECT tenant, area, [date], sales,
DENSE_RANK() OVER (PARTITION BY tenant ORDER BY tenant, grpOrder) AS counter
FROM (
SELECT tenant, area, date, sales,
MIN([date]) OVER (PARTITION BY tenant, area, grp) AS grpOrder
FROM (
SELECT tenant, area, [date], sales,
ROW_NUMBER() OVER (PARTITION BY tenant ORDER BY tenant, date) -
ROW_NUMBER() OVER (PARTITION BY tenant, area ORDER BY tenant, [date]) AS grp
FROM #tenant ) AS t ) AS s
order by tenant, date
我需要按顺序计算列中变化值的数量。请查看图片以获取说明(正确或预期的输出)
此处,Area
列正在变化,计数器列应根据区域中的变化值显示顺序计数器。
我已开始使用此代码
SELECT a.tenant, a.area, a.date , a.gsc, f.counter
FROM TENANT a
inner join
(SELECT a.tenant, COUNT(DISTINCT e.Area) AS counter
FROM TENANT
GROUP BY tenant
) AS f ON a.tenant = f.tenant
order by a.tenant, a.date
并给我这个输出。计算在所有行的 Area
列中找到的不同值的数量。
这是使用 window 函数的一种方法:
SELECT tenant, area, [date], sales,
DENSE_RANK() OVER (ORDER BY grpOrder) AS counter
FROM (
SELECT tenant, area, date, sales,
MIN([date]) OVER (PARTITION BY area, grp) AS grpOrder
FROM (
SELECT tenant, area, [date], sales,
ROW_NUMBER() OVER (ORDER BY date) -
ROW_NUMBER() OVER (PARTITION BY area ORDER BY [date]) AS grp
FROM tenant ) AS t ) AS s
内部查询识别连续 area
值的孤岛。请参阅此子查询的以下部分输出中的 grp
值:
area date grp
--------------------
18 2015-01-01 0
18 2015-01-02 0
18 2015-01-05 2
18 2015-01-06 2
20 2015-01-03 2
20 2015-01-04 2
使用 MIN
的 window 版本,我们可以计算 grp
顺序:字段 grpOrder
保存每组的最小日期。
在外部查询中使用 DENSE_RANK()
我们现在可以轻松计算 counter
值:第一组的值为 1,下一组的值为 2,等等
您可以使用 window 函数这样做:
declare @data table(name varchar(10), area int, dates datetime, sales int)
insert into @data(name, area, dates, sales) values
('Little Asia', 18, '20150101', 10)
, ('Little Asia', 18, '20150102', 20)
, ('Little Asia', 20, '20150103', 30)
, ('Little Asia', 20, '20150104', 10)
, ('Little Asia', 18, '20150105', 20)
, ('Little Asia', 18, '20150106', 30)
Select name, area, dates, sales
, [counter] = DENSE_RANK() over(order by c)
, [count] = Count(*) over(partition by n ,c)
From (
Select name, area, dates, sales, n
, c = ROW_NUMBER() over(order by n, dates) - ROW_NUMBER() over(partition by area, n order by dates)
From (
Select name, area, dates, sales
, n = ROW_NUMBER() over(order by dates) - ROW_NUMBER() over(partition by area order by dates)
From @data
) as x
) as v
order by dates
输出:
name area dates sales counter count
Little Asia 18 2015-01-01 10 1 2
Little Asia 18 2015-01-02 20 1 2
Little Asia 20 2015-01-03 30 2 2
Little Asia 20 2015-01-04 10 2 2
Little Asia 18 2015-01-05 20 3 2
Little Asia 18 2015-01-06 30 3 2
只要相差至少1(@threshold)我们就会开始一个新组。这将按租户分区。
DECLARE @Table as TABLE (
Tenant varchar(20),
Area int,
[date] Date,
Sales int
)
INSERT INTO @Table
VALUES
('Little Asia',18,'1/1/2015', 10),
('Little Asia',18,'1/2/2015', 20),
('Little Asia',20,'1/3/2015', 30),
('Little Asia',20,'1/4/2015', 10),
('Little Asia',18,'1/5/2015', 20),
('Little Asia',18,'1/6/2015', 30)
/***** Begin Query *****/
DECLARE @Threshold INT = 1
;WITH C1 AS
(
SELECT Tenant, Area, [Date], Sales,
CASE WHEN ABS(Area - LAG(Area) OVER(PARTITION BY Tenant ORDER BY [Date])) <= @Threshold THEN NULL ELSE 1 END AS isstart
FROM @Table
),
C2 AS
(
SELECT Tenant, Area, [Date], Sales, COUNT(isstart) OVER( PARTITION BY Tenant ORDER BY [Date] ROWS UNBOUNDED PRECEDING) AS grp
FROM C1
)
SELECT * FROM C2
接受的答案效果很好,SQL Fiddle 演示很棒。但是,它没有考虑到多个租户的情况。
我扩展了 SQL Fiddle 答案,link 是 here 对于那些数据集包含多个租户的人,只需确保每个租户都存在分区和排序。
如果 tenant
列中有一个额外的元素
create table #tenant (tenant varchar(20), area int, date date, sales int)
insert into #tenant values
('little asia', 18, '20150101', 10),
('little asia', 18, '20150102', 20),
('little asia', 20, '20150103', 30),
('little asia', 20, '20150104', 10),
('little asia', 18, '20150105', 20),
('little asia', 18, '20150106', 30),
('little', 18, '20150101', 10),
('little', 18, '20150102', 20),
('little', 18, '20150103', 30),
('little', 18, '20150104', 10),
('little', 18, '20150105', 20),
('little', 11, '20150106', 30);
代码会这样写:
/* new code adding tenant*/
SELECT tenant, area, [date], sales,
DENSE_RANK() OVER (PARTITION BY tenant ORDER BY tenant, grpOrder) AS counter
FROM (
SELECT tenant, area, date, sales,
MIN([date]) OVER (PARTITION BY tenant, area, grp) AS grpOrder
FROM (
SELECT tenant, area, [date], sales,
ROW_NUMBER() OVER (PARTITION BY tenant ORDER BY tenant, date) -
ROW_NUMBER() OVER (PARTITION BY tenant, area ORDER BY tenant, [date]) AS grp
FROM #tenant ) AS t ) AS s
order by tenant, date