Postgres:获取最大值和最小值,以及它们发生的时间戳
Postgres: getting the maximum and minimum values, and timestamps they occur
我正在 运行ning Postgres 9.2,并且有一个 table 温度和时间戳,每分钟一个时间戳,以毫秒纪元时间表示:
weather=# \d weather_data
Table "public.weather_data"
Column | Type | Modifiers
-------------+--------------+-----------
timestamp | bigint | not null
sensor_id | integer | not null
temperature | numeric(4,1) |
humidity | integer |
date | date | not null
Indexes:
"weather_data_pkey" PRIMARY KEY, btree ("timestamp", sensor_id)
"weather_data_date_idx" btree (date)
"weather_data_humidity_idx" btree (humidity)
"weather_data_sensor_id_idx" btree (sensor_id)
"weather_data_temperature_idx" btree (temperature)
"weather_data_time_idx" btree ("timestamp")
Foreign-key constraints:
"weather_data_sensor_id_fkey" FOREIGN KEY (sensor_id) REFERENCES weather_sensors(sensor_id)
weather=# select * from weather_data order by timestamp desc;
timestamp | sensor_id | temperature | humidity | date
---------------+-----------+-------------+----------+------------
1483272420000 | 2 | 22.3 | 57 | 2017-01-01
1483272420000 | 1 | 24.9 | 53 | 2017-01-01
1483272360000 | 2 | 22.3 | 57 | 2017-01-01
1483272360000 | 1 | 24.9 | 58 | 2017-01-01
1483272300000 | 2 | 22.4 | 57 | 2017-01-01
1483272300000 | 1 | 24.9 | 57 | 2017-01-01
[...]
我有这个现有的查询,它获取每天的高点和低点,但不是高点或低点出现的特定时间:
WITH t AS (
SELECT date, highest, lowest
FROM (
SELECT date, max(temperature) AS highest
FROM weather_data
WHERE sensor_id = (SELECT sensor_id FROM weather_sensors WHERE sensor_name = 'outdoor')
GROUP BY date
ORDER BY date ASC
) h
INNER JOIN (
SELECT date, min(temperature) AS lowest
FROM weather_data
WHERE sensor_id = (SELECT sensor_id FROM weather_sensors WHERE sensor_name = 'outdoor')
GROUP BY date
ORDER BY date ASC
) l
USING (date)
ORDER BY date DESC
)
SELECT * from t ORDER BY date ASC;
数据库中有超过 200 万行,运行 大约需要 1.2 秒,这还不错。我现在想知道高点或低点的具体时间,我想出了这个使用 window 函数, 确实 工作但需要 ~5.6 秒:
SELECT h.date, high_time, high_temp, low_time, low_temp FROM (
SELECT date, high_temp, high_time FROM (
SELECT date, temperature AS high_temp, timestamp AS high_time, row_number()
OVER (PARTITION BY date ORDER BY temperature DESC, timestamp DESC)
FROM weather_data
WHERE sensor_id = (SELECT sensor_id FROM weather_sensors WHERE sensor_name = 'outdoor')
) highs
WHERE row_number = 1
) h
INNER JOIN (
SELECT * FROM (
SELECT date, temperature AS low_temp, timestamp AS low_time, row_number()
OVER (PARTITION BY date ORDER BY temperature ASC, timestamp DESC)
FROM weather_data
WHERE sensor_id = (SELECT sensor_id FROM weather_sensors WHERE sensor_name = 'outdoor')
) lows
WHERE row_number = 1
) l
ON h.date = l.date
ORDER BY h.date ASC;
我可以对第一个查询进行一些相对简单的添加,而不会增加大量的执行时间吗?我假设有,但我想我已经在这个问题上研究了太久了!
这与您的第二个查询相同,但只需要对 weather_data table:
进行一次扫描
select date,
max(case when high_rn = 1 then timestamp end) as high_time,
max(case when high_rn = 1 then temperature end) as high_temp,
max(case when low_rn = 1 then timestamp end) as low_time,
max(case when low_rn = 1 then temperature end) as low_temp
from (
select timestamp, temperature, date,
row_number() OVER (PARTITION BY date ORDER BY temperature DESC, timestamp DESC) as high_rn,
row_number() OVER (PARTITION BY date ORDER BY temperature ASC, timestamp DESC) as low_rn
from weather_data
where sensor_id = ...
) t
where (high_rn = 1 or low_rn = 1)
group by date;
它使用条件聚合对仅包含最低和最高温度的结果进行交叉表(a.k.a。"pivot")查询。
无关,但是:date
和 timestamp
是列的可怕名称。一方面是因为它们是关键字,但更重要的是因为它们没有记录列的实际含义。是 "due date" 吗?一个"reading date"?一个"processing date"?
SELECT
DISTINCT ON (zdate) zdate
, first_value(ztimestamp) OVER www AS stamp_at_min
, first_value(temperature) OVER www AS tmin
, last_value(ztimestamp) OVER www AS stamp_at_max
, last_value(temperature) OVER www AS tmax
FROM weather_data
WHERE sensor_id = 2
WINDOW www AS (PARTITION BY zdate ORDER BY temperature, ztimestamp
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
;
- 前缀 z日期和 z时间戳
- 我将 ztimestamp 添加到订单中作为决胜局
我正在 运行ning Postgres 9.2,并且有一个 table 温度和时间戳,每分钟一个时间戳,以毫秒纪元时间表示:
weather=# \d weather_data
Table "public.weather_data"
Column | Type | Modifiers
-------------+--------------+-----------
timestamp | bigint | not null
sensor_id | integer | not null
temperature | numeric(4,1) |
humidity | integer |
date | date | not null
Indexes:
"weather_data_pkey" PRIMARY KEY, btree ("timestamp", sensor_id)
"weather_data_date_idx" btree (date)
"weather_data_humidity_idx" btree (humidity)
"weather_data_sensor_id_idx" btree (sensor_id)
"weather_data_temperature_idx" btree (temperature)
"weather_data_time_idx" btree ("timestamp")
Foreign-key constraints:
"weather_data_sensor_id_fkey" FOREIGN KEY (sensor_id) REFERENCES weather_sensors(sensor_id)
weather=# select * from weather_data order by timestamp desc;
timestamp | sensor_id | temperature | humidity | date
---------------+-----------+-------------+----------+------------
1483272420000 | 2 | 22.3 | 57 | 2017-01-01
1483272420000 | 1 | 24.9 | 53 | 2017-01-01
1483272360000 | 2 | 22.3 | 57 | 2017-01-01
1483272360000 | 1 | 24.9 | 58 | 2017-01-01
1483272300000 | 2 | 22.4 | 57 | 2017-01-01
1483272300000 | 1 | 24.9 | 57 | 2017-01-01
[...]
我有这个现有的查询,它获取每天的高点和低点,但不是高点或低点出现的特定时间:
WITH t AS (
SELECT date, highest, lowest
FROM (
SELECT date, max(temperature) AS highest
FROM weather_data
WHERE sensor_id = (SELECT sensor_id FROM weather_sensors WHERE sensor_name = 'outdoor')
GROUP BY date
ORDER BY date ASC
) h
INNER JOIN (
SELECT date, min(temperature) AS lowest
FROM weather_data
WHERE sensor_id = (SELECT sensor_id FROM weather_sensors WHERE sensor_name = 'outdoor')
GROUP BY date
ORDER BY date ASC
) l
USING (date)
ORDER BY date DESC
)
SELECT * from t ORDER BY date ASC;
数据库中有超过 200 万行,运行 大约需要 1.2 秒,这还不错。我现在想知道高点或低点的具体时间,我想出了这个使用 window 函数, 确实 工作但需要 ~5.6 秒:
SELECT h.date, high_time, high_temp, low_time, low_temp FROM (
SELECT date, high_temp, high_time FROM (
SELECT date, temperature AS high_temp, timestamp AS high_time, row_number()
OVER (PARTITION BY date ORDER BY temperature DESC, timestamp DESC)
FROM weather_data
WHERE sensor_id = (SELECT sensor_id FROM weather_sensors WHERE sensor_name = 'outdoor')
) highs
WHERE row_number = 1
) h
INNER JOIN (
SELECT * FROM (
SELECT date, temperature AS low_temp, timestamp AS low_time, row_number()
OVER (PARTITION BY date ORDER BY temperature ASC, timestamp DESC)
FROM weather_data
WHERE sensor_id = (SELECT sensor_id FROM weather_sensors WHERE sensor_name = 'outdoor')
) lows
WHERE row_number = 1
) l
ON h.date = l.date
ORDER BY h.date ASC;
我可以对第一个查询进行一些相对简单的添加,而不会增加大量的执行时间吗?我假设有,但我想我已经在这个问题上研究了太久了!
这与您的第二个查询相同,但只需要对 weather_data table:
进行一次扫描select date,
max(case when high_rn = 1 then timestamp end) as high_time,
max(case when high_rn = 1 then temperature end) as high_temp,
max(case when low_rn = 1 then timestamp end) as low_time,
max(case when low_rn = 1 then temperature end) as low_temp
from (
select timestamp, temperature, date,
row_number() OVER (PARTITION BY date ORDER BY temperature DESC, timestamp DESC) as high_rn,
row_number() OVER (PARTITION BY date ORDER BY temperature ASC, timestamp DESC) as low_rn
from weather_data
where sensor_id = ...
) t
where (high_rn = 1 or low_rn = 1)
group by date;
它使用条件聚合对仅包含最低和最高温度的结果进行交叉表(a.k.a。"pivot")查询。
无关,但是:date
和 timestamp
是列的可怕名称。一方面是因为它们是关键字,但更重要的是因为它们没有记录列的实际含义。是 "due date" 吗?一个"reading date"?一个"processing date"?
SELECT
DISTINCT ON (zdate) zdate
, first_value(ztimestamp) OVER www AS stamp_at_min
, first_value(temperature) OVER www AS tmin
, last_value(ztimestamp) OVER www AS stamp_at_max
, last_value(temperature) OVER www AS tmax
FROM weather_data
WHERE sensor_id = 2
WINDOW www AS (PARTITION BY zdate ORDER BY temperature, ztimestamp
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
;
- 前缀 z日期和 z时间戳
- 我将 ztimestamp 添加到订单中作为决胜局