如何不扫描所有记录以按复杂条件查询前几行
How do not scan all records to query top rows by complex condition
我有一个 table 和这样的数据:
create table AmountObjects
(
objectId integer,
unixTimestamp integer,
amount integer,
PRIMARY KEY
(
[objectId] ASC,
[unixTimestamp] ASC
)
);
insert into AmountObjects values (1, 1, 33);
insert into AmountObjects values (1, 2, 33);
insert into AmountObjects values (1, 3, 33);
insert into AmountObjects values (1, 4, 33);
insert into AmountObjects values (1, 5, 33);
insert into AmountObjects values (1, 6, 33);
insert into AmountObjects values (1, 7, 33);
insert into AmountObjects values (1, 8, 33);
insert into AmountObjects values (1, 9, 33);
insert into AmountObjects values (1, 10, 33);
我想查询按日期和累计金额过滤的最后记录,但像这样的查询按对象扫描所有记录:
select
a.objectId,
a.unixTimestamp,
a.amount,
s.total
from AmountObjects a
cross apply
(
select sum(amount) total from AmountObjects stat
where a.unixTimestamp <= stat.unixTimestamp and a.objectId = stat.objectId
) s
where
unixTimestamp >= 9
or s.total <= 150
我的问题是:如何在不按对象扫描所有数据的情况下查询数据?
谢谢
这应该实现相同的逻辑并且更有效:
select a.*
from (select a.objectId, a.unixTimestamp, a.amount,
sum(a.amount) over (partition by a.objectId order by a.unixTimeStamp desc) as total
from AmountObjects a
) a
where unixTimestamp >= 9 or total <= 150;
但是,它仍然会扫描所有行。
我能想到的唯一方法是 运行 向后 运行 总共两次。一次获取低于该值的目标时间戳(使用 TOP 1
短路),然后再次获取高于该值的 运行 总计(使用搜索仅获取上述行的范围那个)。
除非您有很高比例的行要忽略,否则这不太可能比仅计算所有内容的 运行 总数并丢弃不需要的内容的更简单方法有所改进。
WITH DistinctObjects
AS (SELECT DISTINCT objectId
FROM AmountObjects a),
MinTimeStampsByObjectId
AS (SELECT do.objectId,
ca.minUnixTimeStamp
FROM DistinctObjects do
CROSS APPLY (SELECT ISNULL((SELECT TOP 1 unixTimeStamp
FROM (SELECT *,
SUM(ao.amount)
OVER (
ORDER BY ao.unixTimeStamp DESC) AS total
FROM AmountObjects ao
WHERE ao.objectId = do.objectId) d
WHERE total > 150
ORDER BY d.unixTimeStamp DESC), -1))ca(minUnixTimeStamp))
SELECT ca2.*
FROM MinTimeStampsByObjectId mts
CROSS APPLY (SELECT *,
SUM(ao.amount)
OVER (
ORDER BY ao.unixTimeStamp DESC) AS total
FROM AmountObjects ao
WHERE ao.objectId = mts.objectId
AND ao.unixTimeStamp > IIF(mts.minUnixTimeStamp > 8,8,mts.minUnixTimeStamp)) ca2
我有一个 table 和这样的数据:
create table AmountObjects
(
objectId integer,
unixTimestamp integer,
amount integer,
PRIMARY KEY
(
[objectId] ASC,
[unixTimestamp] ASC
)
);
insert into AmountObjects values (1, 1, 33);
insert into AmountObjects values (1, 2, 33);
insert into AmountObjects values (1, 3, 33);
insert into AmountObjects values (1, 4, 33);
insert into AmountObjects values (1, 5, 33);
insert into AmountObjects values (1, 6, 33);
insert into AmountObjects values (1, 7, 33);
insert into AmountObjects values (1, 8, 33);
insert into AmountObjects values (1, 9, 33);
insert into AmountObjects values (1, 10, 33);
我想查询按日期和累计金额过滤的最后记录,但像这样的查询按对象扫描所有记录:
select
a.objectId,
a.unixTimestamp,
a.amount,
s.total
from AmountObjects a
cross apply
(
select sum(amount) total from AmountObjects stat
where a.unixTimestamp <= stat.unixTimestamp and a.objectId = stat.objectId
) s
where
unixTimestamp >= 9
or s.total <= 150
我的问题是:如何在不按对象扫描所有数据的情况下查询数据?
谢谢
这应该实现相同的逻辑并且更有效:
select a.*
from (select a.objectId, a.unixTimestamp, a.amount,
sum(a.amount) over (partition by a.objectId order by a.unixTimeStamp desc) as total
from AmountObjects a
) a
where unixTimestamp >= 9 or total <= 150;
但是,它仍然会扫描所有行。
我能想到的唯一方法是 运行 向后 运行 总共两次。一次获取低于该值的目标时间戳(使用 TOP 1
短路),然后再次获取高于该值的 运行 总计(使用搜索仅获取上述行的范围那个)。
除非您有很高比例的行要忽略,否则这不太可能比仅计算所有内容的 运行 总数并丢弃不需要的内容的更简单方法有所改进。
WITH DistinctObjects
AS (SELECT DISTINCT objectId
FROM AmountObjects a),
MinTimeStampsByObjectId
AS (SELECT do.objectId,
ca.minUnixTimeStamp
FROM DistinctObjects do
CROSS APPLY (SELECT ISNULL((SELECT TOP 1 unixTimeStamp
FROM (SELECT *,
SUM(ao.amount)
OVER (
ORDER BY ao.unixTimeStamp DESC) AS total
FROM AmountObjects ao
WHERE ao.objectId = do.objectId) d
WHERE total > 150
ORDER BY d.unixTimeStamp DESC), -1))ca(minUnixTimeStamp))
SELECT ca2.*
FROM MinTimeStampsByObjectId mts
CROSS APPLY (SELECT *,
SUM(ao.amount)
OVER (
ORDER BY ao.unixTimeStamp DESC) AS total
FROM AmountObjects ao
WHERE ao.objectId = mts.objectId
AND ao.unixTimeStamp > IIF(mts.minUnixTimeStamp > 8,8,mts.minUnixTimeStamp)) ca2