使用 PostgreSql 以 5 分钟为间隔插入最后已知价格
Inserting last known price on a 5 minute interval using PostgreSql
我有这个 table 用于逐笔存储阿根廷债券数据:
bonos=# \d preciosrt
Table "public.preciosrt"
Column | Type | Collation | Nullable | Default
------------+------------------------+-----------+----------+-----------------------------------------------
idpreciort | integer | | not null | nextval('preciosrt_idpreciort_seq'::regclass)
fecha | date | | not null | now()::date
ticker | text | | not null |
bid | numeric | | |
ask | numeric | | |
hora | time without time zone | | not null |
bidqty | integer | | |
askqty | integer | | |
last | numeric | | |
volume | integer | | |
settlement | character varying(4) | | |
通过此查询,我可以获得 13:55 和 14:30 之间的“AL29”债券的所有最新价格:
select
row_number() over(), hora, last
from preciosrt
where
ticker='AL29' and
settlement='48hs' and
fecha=now()::date - interval '1 day' and
hora >= '13:55:00' and hora <= '14:30:00'
order by hora;
row_number | hora | last
------------+----------+--------
1 | 13:55:18 | 6260.0
2 | 13:56:00 | 6264.5
3 | 13:56:29 | 6264.5
4 | 13:57:08 | 6265.0
5 | 14:03:07 | 6266.0
6 | 14:05:16 | 6266.0
7 | 14:06:19 | 6266.0
8 | 14:06:33 | 6270.0
9 | 14:07:08 | 6276.5
10 | 14:07:24 | 6299.5
11 | 14:09:56 | 6318.5
12 | 14:11:24 | 6300.0
13 | 14:20:58 | 6315.0
14 | 14:24:58 | 6315.0
15 | 14:27:54 | 6315.0
16 | 14:28:06 | 6319.0
(16 rows)
让我们注意第 12 行和第 13 行,其中一个刻度位于 14:11:24
,下一个刻度位于 14:20:58
。
现在,通过这个查询,我可以以 5 分钟为间隔显示报价(从 14:00:00
到 15:00:00
):
select
ticker,
last,
intervalo
from(
select
ticker, last,
row_number() over (
partition by to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC'
order by hora desc
) as rownum,
to_char(to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC', 'HH24:MI:SS') as intervalo
from preciosrt p1
where
p1.ticker='AL29' and p1.settlement='48hs' and
p1.fecha=now()::date - interval '1 day'
) x
where x.rownum=1 and x.intervalo >= '14:00:00' and x.intervalo <= '15:00:00';
ticker | last | intervalo
--------+--------+-----------
AL29 | 6266.0 | 14:00:00
AL29 | 6318.5 | 14:05:00
AL29 | 6300.0 | 14:10:00
AL29 | 6315.0 | 14:20:00
AL29 | 6319.0 | 14:25:00
AL29 | 6323.0 | 14:30:00
AL29 | 6320.0 | 14:35:00
AL29 | 6318.0 | 14:40:00
AL29 | 6329.5 | 14:50:00
AL29 | 6315.0 | 14:55:00
AL29 | 6316.0 | 15:00:00
(11 rows)
如您所见,在 14:10 的最后一个价格是 6300.0
,下一个价格在 14:20 6315
。
AL29 | 6300.0 | 14:10:00
AL29 | 6315.0 | 14:20:00
现在我想在 14:15:00
添加缺失的数据,其中包含最后一个已知报价的值,即 6300
。我如何使用 PostgreSql 查询来执行此操作?
编辑
使用 generate_series
我可以创建整个区间,然后左连接到我的数据,但你可以猜到差距仍然存在:
with minutes as (
select generate_series(
timestamp '2000-01-01 11:00:00',
timestamp '2000-01-01 17:00:00',
'5 minute'::interval
) as tt
)
select
to_char(minutes.tt, 'HH24:MI:SS'),
x2.*
from minutes
left join
(
select
ticker,
last,
intervalo
from(
select
ticker, last,
row_number() over (
partition by to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC'
order by hora desc
) as rownum,
to_char(to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC', 'HH24:MI:SS') as intervalo
from preciosrt p1
where
p1.ticker='AL29' and p1.settlement='48hs' and
p1.fecha=now()::date - interval '1 day'
) x
where x.rownum=1
) x2 on x2.intervalo = to_char(minutes.tt, 'HH24:MI:SS');
14:05:00 | AL29 | 6318.5 | 14:05:00
14:10:00 | AL29 | 6300.0 | 14:10:00
14:15:00 | | |
14:20:00 | AL29 | 6315.0 | 14:20:00
14:25:00 | AL29 | 6319.0 | 14:25:00
14:30:00 | AL29 | 6323.0 | 14:30:00
14:35:00 | AL29 | 6320.0 | 14:35:00
14:40:00 | AL29 | 6318.0 | 14:40:00
14:45:00 | | |
14:50:00 | AL29 | 6329.5 | 14:50:00
14:55:00 | AL29 | 6315.0 | 14:55:00
15:00:00 | AL29 | 6316.0 | 15:00:00
15:05:00 | AL29 | 6320.5 | 15:05:00
15:10:00 | AL29 | 6374.5 | 15:10:00
15:15:00 | AL29 | 6344.0 | 15:15:00
15:20:00 | AL29 | 6395.0 | 15:20:00
15:25:00 | AL29 | 6400.0 | 15:25:00
15:30:00 | AL29 | 6355.0 | 15:30:00
15:35:00 | AL29 | 6400.0 | 15:35:00
15:40:00 | AL29 | 6395.0 | 15:40:00
编辑 2
我想我找到了:
with minutes as (
select generate_series(
timestamp '2000-01-01 11:00:00',
timestamp '2000-01-01 17:00:00',
'5 minute'::interval
) as tt
)
select
to_char(minutes.tt, 'HH24:MI:SS'),
coalesce(x2.last, lag(last) over()) as last
from minutes
left join
(
select
ticker,
last,
intervalo
from(
select
ticker, last,
row_number() over (
partition by to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC'
order by hora desc
) as rownum,
to_char(to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC', 'HH24:MI:SS') as intervalo
from preciosrt p1
where
p1.ticker='AL29' and p1.settlement='48hs' and
p1.fecha=now()::date - interval '1 day'
) x
where x.rownum=1
) x2 on x2.intervalo = to_char(minutes.tt, 'HH24:MI:SS');
给出这个结果:
to_char | last
----------+--------
11:00:00 |
11:05:00 |
11:10:00 |
11:15:00 |
11:20:00 |
11:25:00 |
11:30:00 |
11:35:00 | 6225.0
11:40:00 | 6225.0
11:45:00 | 6205.0
11:50:00 | 6233.5
11:55:00 | 6233.0
12:00:00 | 6225.0
12:05:00 | 6222.0
12:10:00 | 6222.0
12:15:00 | 6228.5
12:20:00 | 6220.0
12:25:00 | 6249.0
12:30:00 | 6258.5
12:35:00 | 6257.5
12:40:00 | 6265.0
12:45:00 | 6280.0
12:50:00 | 6296.0
12:55:00 | 6277.0
13:00:00 | 6270.0
13:05:00 | 6270.0
13:10:00 | 6269.0
13:15:00 | 6250.0
13:20:00 | 6260.0
13:25:00 | 6250.0
13:30:00 | 6250.0
13:35:00 | 6250.0
13:40:00 | 6250.0
13:45:00 | 6250.0
13:50:00 | 6260.0
13:55:00 | 6265.0
14:00:00 | 6266.0
14:05:00 | 6318.5
14:10:00 | 6300.0
14:15:00 | 6300.0
14:20:00 | 6315.0
14:25:00 | 6319.0
14:30:00 | 6323.0
14:35:00 | 6320.0
14:40:00 | 6318.0
14:45:00 | 6318.0
14:50:00 | 6329.5
14:55:00 | 6315.0
15:00:00 | 6316.0
15:05:00 | 6320.5
15:10:00 | 6374.5
15:15:00 | 6344.0
15:20:00 | 6395.0
15:25:00 | 6400.0
15:30:00 | 6355.0
15:35:00 | 6400.0
15:40:00 | 6395.0
15:45:00 | 6372.0
15:50:00 | 6405.0
15:55:00 | 6420.0
16:00:00 | 6420.0
16:05:00 | 6410.5
16:10:00 | 6400.0
16:15:00 | 6417.0
16:20:00 | 6395.0
16:25:00 | 6390.0
16:30:00 | 6388.5
16:35:00 | 6390.0
16:40:00 | 6390.0
16:45:00 | 6381.0
16:50:00 | 6380.5
16:55:00 | 6380.5
17:00:00 | 6379.0
编辑 3
改编自 Tomáš Záluský 的回答:
with minutes as (
select generate_series(
timestamp '2000-01-01 11:00:00',
timestamp '2000-01-01 17:00:00',
'5 minute'::interval
) as tt
), t as (
select
to_char(minutes.tt, 'HH24:MI:SS'),
x2.*,
minutes.tt
from minutes
left join
(
select
ticker,
last,
intervalo
from(
select
ticker, last,
row_number() over (
partition by to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC'
order by hora desc
) as rownum,
to_char(to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC', 'HH24:MI:SS') as intervalo
from preciosrt p1
where
p1.ticker='AL29' and p1.settlement='48hs' and
p1.fecha=now()::date - interval '1 day'
) x
where x.rownum=1
) x2 on x2.intervalo = to_char(minutes.tt, 'HH24:MI:SS')
), x as (
select t.*, count(ticker) over(order by tt) as parttn from t
)
select
tt::time,
first_value(ticker) over(partition by parttn order by tt),
first_value(last) over(partition by parttn order by tt)
from x;
您正在解决填充前几行缺失值的问题。
左连接是好的开始。您需要将 ticker
和 last
列的任何 null
值替换为按 minutes.tt
排序的最后一个非空值。这通常可以使用 lag
window 函数来实现。来自上一行的 lag
函数 returns 值。虽然它适用于您的具体情况,但我假设通常会有许多连续 5m 间隔的间隙。在这种情况下,lag
只能与 ignore nulls
子句一起使用:case when ticker is null then lag(ticker) ignore nulls over (order by tt) else ticker end
。不幸的是,Postgres 目前不支持它。
解决方法:使用 count
分析函数获取按 tt
排序的仅非空值序列(即通过左连接连接的值)。函数 returns 数字随每个非空值增加而随每个 null
值保持不变。这个数字描述了由第一个非空值和可能的许多后续 null
值组成的块。因此,您可以使用此数字来定义分区。您想要的数据是该分区上的 first_value
。
with minutes as (
...YOUR SERIES...
), t as (
select
to_char(minutes.tt, 'HH24:MI:SS') as tt,
...YOUR QUERY...
), x as (
select t.*, count(ticker) over (order by tt) as parttn from t
)
select tt
, first_value(ticker) over (partition by parttn order by tt)
, first_value(last) over (partition by parttn order by tt)
from x
Simplified fiddle here. 如果我在你的数据中忽略了一些重要的东西,那么请你自己滚动 fiddle。
我有这个 table 用于逐笔存储阿根廷债券数据:
bonos=# \d preciosrt
Table "public.preciosrt"
Column | Type | Collation | Nullable | Default
------------+------------------------+-----------+----------+-----------------------------------------------
idpreciort | integer | | not null | nextval('preciosrt_idpreciort_seq'::regclass)
fecha | date | | not null | now()::date
ticker | text | | not null |
bid | numeric | | |
ask | numeric | | |
hora | time without time zone | | not null |
bidqty | integer | | |
askqty | integer | | |
last | numeric | | |
volume | integer | | |
settlement | character varying(4) | | |
通过此查询,我可以获得 13:55 和 14:30 之间的“AL29”债券的所有最新价格:
select
row_number() over(), hora, last
from preciosrt
where
ticker='AL29' and
settlement='48hs' and
fecha=now()::date - interval '1 day' and
hora >= '13:55:00' and hora <= '14:30:00'
order by hora;
row_number | hora | last
------------+----------+--------
1 | 13:55:18 | 6260.0
2 | 13:56:00 | 6264.5
3 | 13:56:29 | 6264.5
4 | 13:57:08 | 6265.0
5 | 14:03:07 | 6266.0
6 | 14:05:16 | 6266.0
7 | 14:06:19 | 6266.0
8 | 14:06:33 | 6270.0
9 | 14:07:08 | 6276.5
10 | 14:07:24 | 6299.5
11 | 14:09:56 | 6318.5
12 | 14:11:24 | 6300.0
13 | 14:20:58 | 6315.0
14 | 14:24:58 | 6315.0
15 | 14:27:54 | 6315.0
16 | 14:28:06 | 6319.0
(16 rows)
让我们注意第 12 行和第 13 行,其中一个刻度位于 14:11:24
,下一个刻度位于 14:20:58
。
现在,通过这个查询,我可以以 5 分钟为间隔显示报价(从 14:00:00
到 15:00:00
):
select
ticker,
last,
intervalo
from(
select
ticker, last,
row_number() over (
partition by to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC'
order by hora desc
) as rownum,
to_char(to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC', 'HH24:MI:SS') as intervalo
from preciosrt p1
where
p1.ticker='AL29' and p1.settlement='48hs' and
p1.fecha=now()::date - interval '1 day'
) x
where x.rownum=1 and x.intervalo >= '14:00:00' and x.intervalo <= '15:00:00';
ticker | last | intervalo
--------+--------+-----------
AL29 | 6266.0 | 14:00:00
AL29 | 6318.5 | 14:05:00
AL29 | 6300.0 | 14:10:00
AL29 | 6315.0 | 14:20:00
AL29 | 6319.0 | 14:25:00
AL29 | 6323.0 | 14:30:00
AL29 | 6320.0 | 14:35:00
AL29 | 6318.0 | 14:40:00
AL29 | 6329.5 | 14:50:00
AL29 | 6315.0 | 14:55:00
AL29 | 6316.0 | 15:00:00
(11 rows)
如您所见,在 14:10 的最后一个价格是 6300.0
,下一个价格在 14:20 6315
。
AL29 | 6300.0 | 14:10:00
AL29 | 6315.0 | 14:20:00
现在我想在 14:15:00
添加缺失的数据,其中包含最后一个已知报价的值,即 6300
。我如何使用 PostgreSql 查询来执行此操作?
编辑
使用 generate_series
我可以创建整个区间,然后左连接到我的数据,但你可以猜到差距仍然存在:
with minutes as (
select generate_series(
timestamp '2000-01-01 11:00:00',
timestamp '2000-01-01 17:00:00',
'5 minute'::interval
) as tt
)
select
to_char(minutes.tt, 'HH24:MI:SS'),
x2.*
from minutes
left join
(
select
ticker,
last,
intervalo
from(
select
ticker, last,
row_number() over (
partition by to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC'
order by hora desc
) as rownum,
to_char(to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC', 'HH24:MI:SS') as intervalo
from preciosrt p1
where
p1.ticker='AL29' and p1.settlement='48hs' and
p1.fecha=now()::date - interval '1 day'
) x
where x.rownum=1
) x2 on x2.intervalo = to_char(minutes.tt, 'HH24:MI:SS');
14:05:00 | AL29 | 6318.5 | 14:05:00
14:10:00 | AL29 | 6300.0 | 14:10:00
14:15:00 | | |
14:20:00 | AL29 | 6315.0 | 14:20:00
14:25:00 | AL29 | 6319.0 | 14:25:00
14:30:00 | AL29 | 6323.0 | 14:30:00
14:35:00 | AL29 | 6320.0 | 14:35:00
14:40:00 | AL29 | 6318.0 | 14:40:00
14:45:00 | | |
14:50:00 | AL29 | 6329.5 | 14:50:00
14:55:00 | AL29 | 6315.0 | 14:55:00
15:00:00 | AL29 | 6316.0 | 15:00:00
15:05:00 | AL29 | 6320.5 | 15:05:00
15:10:00 | AL29 | 6374.5 | 15:10:00
15:15:00 | AL29 | 6344.0 | 15:15:00
15:20:00 | AL29 | 6395.0 | 15:20:00
15:25:00 | AL29 | 6400.0 | 15:25:00
15:30:00 | AL29 | 6355.0 | 15:30:00
15:35:00 | AL29 | 6400.0 | 15:35:00
15:40:00 | AL29 | 6395.0 | 15:40:00
编辑 2
我想我找到了:
with minutes as (
select generate_series(
timestamp '2000-01-01 11:00:00',
timestamp '2000-01-01 17:00:00',
'5 minute'::interval
) as tt
)
select
to_char(minutes.tt, 'HH24:MI:SS'),
coalesce(x2.last, lag(last) over()) as last
from minutes
left join
(
select
ticker,
last,
intervalo
from(
select
ticker, last,
row_number() over (
partition by to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC'
order by hora desc
) as rownum,
to_char(to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC', 'HH24:MI:SS') as intervalo
from preciosrt p1
where
p1.ticker='AL29' and p1.settlement='48hs' and
p1.fecha=now()::date - interval '1 day'
) x
where x.rownum=1
) x2 on x2.intervalo = to_char(minutes.tt, 'HH24:MI:SS');
给出这个结果:
to_char | last
----------+--------
11:00:00 |
11:05:00 |
11:10:00 |
11:15:00 |
11:20:00 |
11:25:00 |
11:30:00 |
11:35:00 | 6225.0
11:40:00 | 6225.0
11:45:00 | 6205.0
11:50:00 | 6233.5
11:55:00 | 6233.0
12:00:00 | 6225.0
12:05:00 | 6222.0
12:10:00 | 6222.0
12:15:00 | 6228.5
12:20:00 | 6220.0
12:25:00 | 6249.0
12:30:00 | 6258.5
12:35:00 | 6257.5
12:40:00 | 6265.0
12:45:00 | 6280.0
12:50:00 | 6296.0
12:55:00 | 6277.0
13:00:00 | 6270.0
13:05:00 | 6270.0
13:10:00 | 6269.0
13:15:00 | 6250.0
13:20:00 | 6260.0
13:25:00 | 6250.0
13:30:00 | 6250.0
13:35:00 | 6250.0
13:40:00 | 6250.0
13:45:00 | 6250.0
13:50:00 | 6260.0
13:55:00 | 6265.0
14:00:00 | 6266.0
14:05:00 | 6318.5
14:10:00 | 6300.0
14:15:00 | 6300.0
14:20:00 | 6315.0
14:25:00 | 6319.0
14:30:00 | 6323.0
14:35:00 | 6320.0
14:40:00 | 6318.0
14:45:00 | 6318.0
14:50:00 | 6329.5
14:55:00 | 6315.0
15:00:00 | 6316.0
15:05:00 | 6320.5
15:10:00 | 6374.5
15:15:00 | 6344.0
15:20:00 | 6395.0
15:25:00 | 6400.0
15:30:00 | 6355.0
15:35:00 | 6400.0
15:40:00 | 6395.0
15:45:00 | 6372.0
15:50:00 | 6405.0
15:55:00 | 6420.0
16:00:00 | 6420.0
16:05:00 | 6410.5
16:10:00 | 6400.0
16:15:00 | 6417.0
16:20:00 | 6395.0
16:25:00 | 6390.0
16:30:00 | 6388.5
16:35:00 | 6390.0
16:40:00 | 6390.0
16:45:00 | 6381.0
16:50:00 | 6380.5
16:55:00 | 6380.5
17:00:00 | 6379.0
编辑 3
改编自 Tomáš Záluský 的回答:
with minutes as (
select generate_series(
timestamp '2000-01-01 11:00:00',
timestamp '2000-01-01 17:00:00',
'5 minute'::interval
) as tt
), t as (
select
to_char(minutes.tt, 'HH24:MI:SS'),
x2.*,
minutes.tt
from minutes
left join
(
select
ticker,
last,
intervalo
from(
select
ticker, last,
row_number() over (
partition by to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC'
order by hora desc
) as rownum,
to_char(to_timestamp(floor((extract('epoch' from hora) / 300 )) * 300) at time zone 'UTC', 'HH24:MI:SS') as intervalo
from preciosrt p1
where
p1.ticker='AL29' and p1.settlement='48hs' and
p1.fecha=now()::date - interval '1 day'
) x
where x.rownum=1
) x2 on x2.intervalo = to_char(minutes.tt, 'HH24:MI:SS')
), x as (
select t.*, count(ticker) over(order by tt) as parttn from t
)
select
tt::time,
first_value(ticker) over(partition by parttn order by tt),
first_value(last) over(partition by parttn order by tt)
from x;
您正在解决填充前几行缺失值的问题。
左连接是好的开始。您需要将 ticker
和 last
列的任何 null
值替换为按 minutes.tt
排序的最后一个非空值。这通常可以使用 lag
window 函数来实现。来自上一行的 lag
函数 returns 值。虽然它适用于您的具体情况,但我假设通常会有许多连续 5m 间隔的间隙。在这种情况下,lag
只能与 ignore nulls
子句一起使用:case when ticker is null then lag(ticker) ignore nulls over (order by tt) else ticker end
。不幸的是,Postgres 目前不支持它。
解决方法:使用 count
分析函数获取按 tt
排序的仅非空值序列(即通过左连接连接的值)。函数 returns 数字随每个非空值增加而随每个 null
值保持不变。这个数字描述了由第一个非空值和可能的许多后续 null
值组成的块。因此,您可以使用此数字来定义分区。您想要的数据是该分区上的 first_value
。
with minutes as (
...YOUR SERIES...
), t as (
select
to_char(minutes.tt, 'HH24:MI:SS') as tt,
...YOUR QUERY...
), x as (
select t.*, count(ticker) over (order by tt) as parttn from t
)
select tt
, first_value(ticker) over (partition by parttn order by tt)
, first_value(last) over (partition by parttn order by tt)
from x
Simplified fiddle here. 如果我在你的数据中忽略了一些重要的东西,那么请你自己滚动 fiddle。