选择连续出现多次的记录
Selecting records that appear several times in a row
我的问题是我想select一些连续出现的记录。
例如,我们有这样的 table:
x
x
x
y
y
x
x
y
查询应该给出这样的答案:
x 3
y 2
x 2
y 1
SQL 表表示 无序 集。如果有一列指定顺序,您的问题才有意义。如果是这样,您可以使用行号差来确定组,然后聚合:
select col1, count(*)
from (select t.*,
row_number() over (order by <ordering col>) as seqnum,
row_number() over (partition by col1 order by <ordering col>) as seqnum_2
from t
) t
group by col1, (seqnum - seqnum_2)
房间里的大象是缺少确定行顺序的列。
SELECT col1, count(*)
FROM (
SELECT col1, order_column
, row_number() OVER (ORDER BY order_column)
- row_number() OVER (PARTITION BY col1 ORDER BY order_column) AS grp
FROM tbl
) t
GROUP BY col1, grp
ORDER BY min(order_column);
要排除只有一行的分区,请添加 HAVING
子句:
SELECT col1, count(*)
FROM (
SELECT col1, order_column
, row_number() OVER (ORDER BY order_column)
- row_number() OVER (PARTITION BY col1 ORDER BY order_column) AS grp
FROM tbl
) t
GROUP BY col1, grp
HAVING count(*) > 1
ORDER BY min(order_column);
db<>fiddle here
添加最后的 ORDER BY
以保持原始顺序(和有意义的结果)。您可能还想添加像 min(order_column)
这样的列。
相关:
- Select longest continuous sequence
- Group by repeating attribute
我做了一个SQLFiddle
http://sqlfiddle.com/#!18/f8900/5
CREATE TABLE [dbo].[SomeTable](
[data] [nchar](1) NULL,
[id] [int] IDENTITY(1,1) NOT NULL
);
INSERT INTO SomeTable
([data])
VALUES
('x'),
('x'),
('x'),
('y'),
('y'),
('x'),
('x'),
('y')
;
select * from SomeTable;
WITH SomeTable_CTE (Data, total, BaseId, NextId)
AS
(
SELECT
Data,
1 as total,
Id as BaseId,
Id+1 as NextId
FROM SomeTable
where not exists(
Select * from SomeTable Previous
where Previous.Id+1 = SomeTable.Id
and Previous.Data = SomeTable.Data)
UNION ALL
select SomeTable_CTE.Data, SomeTable_CTE.total+1, SomeTable_CTE.BaseId as BaseId, SomeTable.Id+1 as NextId
from SomeTable_CTE inner join SomeTable on
SomeTable.Data = SomeTable_CTE.Data
and
SomeTable.Id = SomeTable_CTE.NextId
)
SELECT Data, max(total) as total
FROM SomeTable_CTE
group by Data, BaseId
order by BaseId
我的问题是我想select一些连续出现的记录。 例如,我们有这样的 table:
x
x
x
y
y
x
x
y
查询应该给出这样的答案:
x 3
y 2
x 2
y 1
SQL 表表示 无序 集。如果有一列指定顺序,您的问题才有意义。如果是这样,您可以使用行号差来确定组,然后聚合:
select col1, count(*)
from (select t.*,
row_number() over (order by <ordering col>) as seqnum,
row_number() over (partition by col1 order by <ordering col>) as seqnum_2
from t
) t
group by col1, (seqnum - seqnum_2)
房间里的大象是缺少确定行顺序的列。
SELECT col1, count(*)
FROM (
SELECT col1, order_column
, row_number() OVER (ORDER BY order_column)
- row_number() OVER (PARTITION BY col1 ORDER BY order_column) AS grp
FROM tbl
) t
GROUP BY col1, grp
ORDER BY min(order_column);
要排除只有一行的分区,请添加 HAVING
子句:
SELECT col1, count(*)
FROM (
SELECT col1, order_column
, row_number() OVER (ORDER BY order_column)
- row_number() OVER (PARTITION BY col1 ORDER BY order_column) AS grp
FROM tbl
) t
GROUP BY col1, grp
HAVING count(*) > 1
ORDER BY min(order_column);
db<>fiddle here
添加最后的 ORDER BY
以保持原始顺序(和有意义的结果)。您可能还想添加像 min(order_column)
这样的列。
相关:
- Select longest continuous sequence
- Group by repeating attribute
我做了一个SQLFiddle http://sqlfiddle.com/#!18/f8900/5
CREATE TABLE [dbo].[SomeTable](
[data] [nchar](1) NULL,
[id] [int] IDENTITY(1,1) NOT NULL
);
INSERT INTO SomeTable
([data])
VALUES
('x'),
('x'),
('x'),
('y'),
('y'),
('x'),
('x'),
('y')
;
select * from SomeTable;
WITH SomeTable_CTE (Data, total, BaseId, NextId)
AS
(
SELECT
Data,
1 as total,
Id as BaseId,
Id+1 as NextId
FROM SomeTable
where not exists(
Select * from SomeTable Previous
where Previous.Id+1 = SomeTable.Id
and Previous.Data = SomeTable.Data)
UNION ALL
select SomeTable_CTE.Data, SomeTable_CTE.total+1, SomeTable_CTE.BaseId as BaseId, SomeTable.Id+1 as NextId
from SomeTable_CTE inner join SomeTable on
SomeTable.Data = SomeTable_CTE.Data
and
SomeTable.Id = SomeTable_CTE.NextId
)
SELECT Data, max(total) as total
FROM SomeTable_CTE
group by Data, BaseId
order by BaseId