在postgres中按范围内的多列分组
Grouping by multiple columns in a range in postgres
我有几个表(pega.race 也存在,我们只是不需要任何数据)
CREATE TABLE pega.pega_race (
id bigserial NOT NULL,
race_id int8 NOT NULL,
pega_id int8 NOT NULL,
"result" int4 NOT NULL,
ts timestamp NULL DEFAULT now_utc(),
CONSTRAINT pega_race_pk PRIMARY KEY (id)
)
CREATE TABLE pega.pega (
id int8 NOT NULL,
speed numeric NULL,
strength numeric NULL,
lightning numeric NULL,
wind numeric NULL,
water numeric NULL,
fire numeric NULL,
CONSTRAINT pega_pk PRIMARY KEY (id)
);
我想要的是获得每个组合(排列?)的平均结果
speed/strength/wind/lightning/water/fire 在一个范围内。所以从 0-2.25、2.25-4.5、4.5-6.75 和 6.75-9。它们必须在 0-9 之间。除此之外不存在任何值。
所以我想要 6.75-9 速度、2.25-4.5 强度、4.5-6.75 风、2.25-4.5 闪电、0-2.25 水、0-2.25 火和所有其他组合的平均结果。我没有足够的数据来简单地四舍五入
SELECT round(speed) speed, round(pega.strength) strength, round(pega.lightning) lightning, round(pega.wind) wind, round(pega.water) water, round(pega.fire) fire,
avg(result),
count(*)
FROM pega.pega
JOIN pega.pega_race pr ON pega.id=pr.pega_id
GROUP BY 1, 2, 3, 4, 5, 6
HAVING count(*) > 20
ORDER BY avg(result)
所以我想扩大这些范围,将可能性的数量减少几个因素。
查看与此类似的其他帖子,我找到了在单个列上按范围分组的解决方案,我正在努力将其扩展到多个列。
这是我的尝试
with series as (
SELECT generate_series(0, 9-2.25, 2.25) as r_from
), range as (
SELECT r_from, r_from + 2.25 as r_to FROM series
), pega_data as (
SELECT speed, strength, lightning, wind, water, pega.fire, result
FROM pega.pega
JOIN pega.pega_race pr ON pega.id=pr.pega_id
JOIN pega.race ON pr.race_id=race.id
)
SELECT r_from, r_to,
(SELECT count(*) speed_count FROM pega_data WHERE speed between r_from and r_to),
(SELECT count(*) strength_count FROM pega_data WHERE strength between r_from and r_to),
(SELECT count(*) lightning_count FROM pega_data WHERE lightning between r_from and r_to),
(SELECT count(*) wind_count FROM pega_data WHERE wind between r_from and r_to),
(SELECT count(*) water_count FROM pega_data WHERE water between r_from and r_to),
(SELECT count(*) fire_count FROM pega_data WHERE fire between r_from and r_to),
(SELECT AVG(result) FROM pega_data WHERE speed between r_from and r_to AND strength between r_from and r_to AND lightning between r_from and r_to AND wind between r_from and r_to AND water between r_from and r_to AND fire between r_from and r_to)
FROM range
我想我已经快完成了吗?但不确定如何将所有内容分解为每个组合。
谢谢。
另外,如果你能引导我走上获得第一名的道路,加分。 (结果 == 1)。
如果你能引导我走一条更具统计意义的路线,找出众多列和较低的 result/higher 1st % 之间的相关性,则额外加分。这个我什至不知道如何开始..最后(也是唯一的)统计数据 class 是 5 年前的现在..
由于常规数据结构,您无需显式生成系列并检查范围。尝试使用算术运算符按范围索引 0..3 获取统计信息。例如
select floor(speed/2.25)::int r_speed, floor(strength/2.25)::int r_strength, floor(lightning/2.25)::int r_lightning, floor(wind/2.25)::int r_wind, count(*) n
from pega_data
group by r_speed, r_strength, r_lightning, r_wind
order by r_speed, r_strength, r_lightning, r_wind
我有几个表(pega.race 也存在,我们只是不需要任何数据)
CREATE TABLE pega.pega_race (
id bigserial NOT NULL,
race_id int8 NOT NULL,
pega_id int8 NOT NULL,
"result" int4 NOT NULL,
ts timestamp NULL DEFAULT now_utc(),
CONSTRAINT pega_race_pk PRIMARY KEY (id)
)
CREATE TABLE pega.pega (
id int8 NOT NULL,
speed numeric NULL,
strength numeric NULL,
lightning numeric NULL,
wind numeric NULL,
water numeric NULL,
fire numeric NULL,
CONSTRAINT pega_pk PRIMARY KEY (id)
);
我想要的是获得每个组合(排列?)的平均结果 speed/strength/wind/lightning/water/fire 在一个范围内。所以从 0-2.25、2.25-4.5、4.5-6.75 和 6.75-9。它们必须在 0-9 之间。除此之外不存在任何值。
所以我想要 6.75-9 速度、2.25-4.5 强度、4.5-6.75 风、2.25-4.5 闪电、0-2.25 水、0-2.25 火和所有其他组合的平均结果。我没有足够的数据来简单地四舍五入
SELECT round(speed) speed, round(pega.strength) strength, round(pega.lightning) lightning, round(pega.wind) wind, round(pega.water) water, round(pega.fire) fire,
avg(result),
count(*)
FROM pega.pega
JOIN pega.pega_race pr ON pega.id=pr.pega_id
GROUP BY 1, 2, 3, 4, 5, 6
HAVING count(*) > 20
ORDER BY avg(result)
所以我想扩大这些范围,将可能性的数量减少几个因素。
查看与此类似的其他帖子,我找到了在单个列上按范围分组的解决方案,我正在努力将其扩展到多个列。
这是我的尝试
with series as (
SELECT generate_series(0, 9-2.25, 2.25) as r_from
), range as (
SELECT r_from, r_from + 2.25 as r_to FROM series
), pega_data as (
SELECT speed, strength, lightning, wind, water, pega.fire, result
FROM pega.pega
JOIN pega.pega_race pr ON pega.id=pr.pega_id
JOIN pega.race ON pr.race_id=race.id
)
SELECT r_from, r_to,
(SELECT count(*) speed_count FROM pega_data WHERE speed between r_from and r_to),
(SELECT count(*) strength_count FROM pega_data WHERE strength between r_from and r_to),
(SELECT count(*) lightning_count FROM pega_data WHERE lightning between r_from and r_to),
(SELECT count(*) wind_count FROM pega_data WHERE wind between r_from and r_to),
(SELECT count(*) water_count FROM pega_data WHERE water between r_from and r_to),
(SELECT count(*) fire_count FROM pega_data WHERE fire between r_from and r_to),
(SELECT AVG(result) FROM pega_data WHERE speed between r_from and r_to AND strength between r_from and r_to AND lightning between r_from and r_to AND wind between r_from and r_to AND water between r_from and r_to AND fire between r_from and r_to)
FROM range
我想我已经快完成了吗?但不确定如何将所有内容分解为每个组合。
谢谢。
另外,如果你能引导我走上获得第一名的道路,加分。 (结果 == 1)。 如果你能引导我走一条更具统计意义的路线,找出众多列和较低的 result/higher 1st % 之间的相关性,则额外加分。这个我什至不知道如何开始..最后(也是唯一的)统计数据 class 是 5 年前的现在..
由于常规数据结构,您无需显式生成系列并检查范围。尝试使用算术运算符按范围索引 0..3 获取统计信息。例如
select floor(speed/2.25)::int r_speed, floor(strength/2.25)::int r_strength, floor(lightning/2.25)::int r_lightning, floor(wind/2.25)::int r_wind, count(*) n
from pega_data
group by r_speed, r_strength, r_lightning, r_wind
order by r_speed, r_strength, r_lightning, r_wind