四舍五入数列以获得尽可能多的 "round" 个数字,总和为 100
Rounding series of numbers to get as much "round" numbers as possible and sum up to 100
我有一系列数字加起来接近 100(它们是百分比),例如:
A
B
C
49.99
9.99
40.01
我想调整这些以获得类似的东西:
A
B
C
50.00
10.00
40.00
约束条件:
- 人数可能会有所不同,
- 初始总和可以有点over/under 100(我使用阈值 0.1:如果总和低于 99.9 或高于 100.1,那么我不会调整),
- 有时可能没有“nice/round”数字。在那种情况下,我想分配 missing/exceeding 1/100 以支持最低数字,如下所示:
A
B
C
D
E
F
G
33.33
16.66
8.33
8.33
9.99
12.50
10.83
33.33
16.66
8.34
8.34
10.00
12.50
10.83
我目前使用的函数非常适合在循环中分配 missing/exceeding 1.100s,但没有解决 49.99 → 50.00 的问题。
它使用的原理是:
- 将所有 % 乘以 100(得到整数)
- 根据尾随零的数量为每个整数分配一个“权重”(以便我最好调整非整数):
- 30 当 (int mod 1000)=0
- 20 当 (int mod 100)=0
- 10 当 (int mod 10)=0
- 5 当 (int mod 5) = 0
- 其他0.
- 计算最低权重的missing/exceeding 1/100s
- 对整数进行排序以首先获得最小的整数(排序考虑了权重)
- add/substract 1 到每个 int,直到我得到 100 的总和。
生成的函数将用 Postgres 的 Pl/PgSql 编写,但我最感兴趣的是知道是否存在这样的算法以及它们是如何命名的(当然,link 到一个工作函数将不胜感激)。
我最终将问题分成了 2 个部分:
- 调整原始股票以获得“更好”的数字 (49.99 → 50.00) 和
- 在这里和那里添加几个 1/100 以获得 100% 的总数。
结果令人满意,我还为普通分数(200/3、100/3、100/6、100/12...)整合了一些特殊值,这样 3x33.33 就不会结束了33.35、33.35 和 33.30,少了“nice/round”,但更公平。请注意,最后的调整只有一个循环。性能是可以接受的:100000 行 3.5 秒。
以下 SO question and the included Wikipedia article 帮助我理解了可能的偏见及其 pros/cons。
感兴趣的朋友可以参考以下代码:
CREATE OR REPLACE FUNCTION public.normalizeshares_weight(share INT) RETURNS INT AS
$BODY$
SELECT CASE
WHEN share % 10000 = 0 THEN 40
WHEN share % 1000 = 0 THEN 30
WHEN share % 100 = 0 THEN 20
WHEN share % 50 = 0 OR (share = ANY('{6666,3333,1666,833,416}')) THEN 15
WHEN (share % 10 = 0) THEN 10
WHEN share % 5 = 0 THEN 5
ELSE 0 END;
$BODY$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION public.normalizeshares(shares NUMERIC[]) RETURNS NUMERIC(5,2)[] AS
$BODY$
DECLARE
intshares INT[];
adjshares INT[];
weight INT[];
result NUMERIC[];
nb0 INT = 0;
nb5 INT = 0;
nb10 INT = 0;
nb15 INT = 0;
nb20 INT = 0;
nb30 INT = 0;
nb40 INT = 0;
initot INT = 0;
tot INT = 0;
nb INT = 0;
w INT = 0;
diff INT;
each INT;
bestweight INT;
BEGIN
FOR i IN 1..ARRAY_LENGTH(shares,1) LOOP
intshares[i] := FLOOR(COALESCE(shares[i],0)*100);
weight[i] := normalizeshares_weight(intshares[i]);
bestweight := weight[i];
adjshares[i] := intshares[i];
IF normalizeshares_weight(intshares[i]+1) > bestweight THEN adjshares[i] := intshares[i]+1; bestweight := normalizeshares_weight(intshares[i]+1); END IF;
IF normalizeshares_weight(intshares[i]+2) > bestweight THEN adjshares[i] := intshares[i]+2; bestweight := normalizeshares_weight(intshares[i]+2); END IF;
IF normalizeshares_weight(intshares[i]+3) > bestweight THEN adjshares[i] := intshares[i]+2; bestweight := normalizeshares_weight(intshares[i]+3); END IF;
IF normalizeshares_weight(intshares[i]-1) > bestweight THEN adjshares[i] := intshares[i]-1; bestweight := normalizeshares_weight(intshares[i]-1); END IF;
IF normalizeshares_weight(intshares[i]-2) > bestweight THEN adjshares[i] := intshares[i]-2; bestweight := normalizeshares_weight(intshares[i]-2); END IF;
IF normalizeshares_weight(intshares[i]-3) > bestweight THEN adjshares[i] := intshares[i]-2; bestweight := normalizeshares_weight(intshares[i]-3); END IF;
tot := tot + adjshares[i];
initot := initot + intshares[i];
weight[i] := bestweight; -- normalizeshares_weight(adjshares[i]); already calculated
IF weight[i]=0 THEN nb0 := nb0 + 1;
ELSIF weight[i]=5 THEN nb5 := nb5 + 1;
ELSIF weight[i]=10 THEN nb10 := nb10 + 1;
ELSIF weight[i]=15 THEN nb15 := nb15 + 1;
ELSIF weight[i]=20 THEN nb20 := nb20 + 1;
ELSIF weight[i]=30 THEN nb30 := nb30 + 1;
ELSIF weight[i]=40 THEN nb40 := nb40 + 1;
END IF;
result[i] := (intshares[i]::NUMERIC / 100)::NUMERIC(5,2);
END LOOP;
IF tot=10000 THEN
-- RAISE NOTICE 'adjtot=100.00 : %',adjshares::numeric[];
FOR i IN 1..ARRAY_LENGTH(shares,1) LOOP
result[i] := (adjshares[i]::NUMERIC / 100)::NUMERIC(5,2);
END LOOP;
ELSIF (initot=10000) OR (ABS(10000-tot)>90) THEN
-- RAISE NOTICE 'No adj needed, initot=%, tot=%',initot,tot;
ELSE
IF nb0 > 0 THEN nb := nb0; w := 0;
ELSIF nb5 > 0 THEN nb := nb5; w := 5;
ELSIF nb10 > 0 THEN nb := nb10; w := 10;
ELSIF nb15 > 0 THEN nb := nb15; w := 15;
ELSIF nb20 > 0 THEN nb := nb20; w := 20;
ELSIF nb30 > 0 THEN nb := nb30; w := 30;
ELSIF nb40 > 0 THEN nb := nb40; w := 40;
END IF;
diff := 10000 - tot;
each := diff/nb+diff/abs(diff);
-- RAISE NOTICE 'nb=%, w=%, diff=%, tot=%, adj=%',nb,w,diff,tot,adjshares::numeric[];
FOR i IN 1..ARRAY_LENGTH(shares,1) LOOP
IF weight[i]=w THEN
IF diff=0 THEN
ELSIF nb=1 THEN
adjshares[i] := adjshares[i] + diff;
ELSIF nb>1 THEN
adjshares[i] := adjshares[i] + each;
diff := diff - each;
END IF;
nb := nb -1;
END IF;
result[i] := (adjshares[i]::NUMERIC / 100)::NUMERIC(5,2);
END LOOP;
END IF;
RETURN result;
END;
$BODY$ LANGUAGE plpgsql IMMUTABLE;
还有一些结果:
% select normalizeshares('{49.99,9.99,40.01}');
normalizeshares
---------------------
{50.00,10.00,40.00}
% select normalizeshares('{33.33,16.66,8.33,8.33,9.99,12.5,10.83}');
normalizeshares
-------------------------------------------
{33.33,16.66,8.33,8.33,10.00,12.50,10.85}
我有一系列数字加起来接近 100(它们是百分比),例如:
A | B | C |
---|---|---|
49.99 | 9.99 | 40.01 |
我想调整这些以获得类似的东西:
A | B | C |
---|---|---|
50.00 | 10.00 | 40.00 |
约束条件:
- 人数可能会有所不同,
- 初始总和可以有点over/under 100(我使用阈值 0.1:如果总和低于 99.9 或高于 100.1,那么我不会调整),
- 有时可能没有“nice/round”数字。在那种情况下,我想分配 missing/exceeding 1/100 以支持最低数字,如下所示:
A | B | C | D | E | F | G |
---|---|---|---|---|---|---|
33.33 | 16.66 | 8.33 | 8.33 | 9.99 | 12.50 | 10.83 |
33.33 | 16.66 | 8.34 | 8.34 | 10.00 | 12.50 | 10.83 |
我目前使用的函数非常适合在循环中分配 missing/exceeding 1.100s,但没有解决 49.99 → 50.00 的问题。
它使用的原理是:
- 将所有 % 乘以 100(得到整数)
- 根据尾随零的数量为每个整数分配一个“权重”(以便我最好调整非整数):
- 30 当 (int mod 1000)=0
- 20 当 (int mod 100)=0
- 10 当 (int mod 10)=0
- 5 当 (int mod 5) = 0
- 其他0.
- 计算最低权重的missing/exceeding 1/100s
- 对整数进行排序以首先获得最小的整数(排序考虑了权重)
- add/substract 1 到每个 int,直到我得到 100 的总和。
生成的函数将用 Postgres 的 Pl/PgSql 编写,但我最感兴趣的是知道是否存在这样的算法以及它们是如何命名的(当然,link 到一个工作函数将不胜感激)。
我最终将问题分成了 2 个部分:
- 调整原始股票以获得“更好”的数字 (49.99 → 50.00) 和
- 在这里和那里添加几个 1/100 以获得 100% 的总数。
结果令人满意,我还为普通分数(200/3、100/3、100/6、100/12...)整合了一些特殊值,这样 3x33.33 就不会结束了33.35、33.35 和 33.30,少了“nice/round”,但更公平。请注意,最后的调整只有一个循环。性能是可以接受的:100000 行 3.5 秒。
以下 SO question and the included Wikipedia article 帮助我理解了可能的偏见及其 pros/cons。
感兴趣的朋友可以参考以下代码:
CREATE OR REPLACE FUNCTION public.normalizeshares_weight(share INT) RETURNS INT AS
$BODY$
SELECT CASE
WHEN share % 10000 = 0 THEN 40
WHEN share % 1000 = 0 THEN 30
WHEN share % 100 = 0 THEN 20
WHEN share % 50 = 0 OR (share = ANY('{6666,3333,1666,833,416}')) THEN 15
WHEN (share % 10 = 0) THEN 10
WHEN share % 5 = 0 THEN 5
ELSE 0 END;
$BODY$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION public.normalizeshares(shares NUMERIC[]) RETURNS NUMERIC(5,2)[] AS
$BODY$
DECLARE
intshares INT[];
adjshares INT[];
weight INT[];
result NUMERIC[];
nb0 INT = 0;
nb5 INT = 0;
nb10 INT = 0;
nb15 INT = 0;
nb20 INT = 0;
nb30 INT = 0;
nb40 INT = 0;
initot INT = 0;
tot INT = 0;
nb INT = 0;
w INT = 0;
diff INT;
each INT;
bestweight INT;
BEGIN
FOR i IN 1..ARRAY_LENGTH(shares,1) LOOP
intshares[i] := FLOOR(COALESCE(shares[i],0)*100);
weight[i] := normalizeshares_weight(intshares[i]);
bestweight := weight[i];
adjshares[i] := intshares[i];
IF normalizeshares_weight(intshares[i]+1) > bestweight THEN adjshares[i] := intshares[i]+1; bestweight := normalizeshares_weight(intshares[i]+1); END IF;
IF normalizeshares_weight(intshares[i]+2) > bestweight THEN adjshares[i] := intshares[i]+2; bestweight := normalizeshares_weight(intshares[i]+2); END IF;
IF normalizeshares_weight(intshares[i]+3) > bestweight THEN adjshares[i] := intshares[i]+2; bestweight := normalizeshares_weight(intshares[i]+3); END IF;
IF normalizeshares_weight(intshares[i]-1) > bestweight THEN adjshares[i] := intshares[i]-1; bestweight := normalizeshares_weight(intshares[i]-1); END IF;
IF normalizeshares_weight(intshares[i]-2) > bestweight THEN adjshares[i] := intshares[i]-2; bestweight := normalizeshares_weight(intshares[i]-2); END IF;
IF normalizeshares_weight(intshares[i]-3) > bestweight THEN adjshares[i] := intshares[i]-2; bestweight := normalizeshares_weight(intshares[i]-3); END IF;
tot := tot + adjshares[i];
initot := initot + intshares[i];
weight[i] := bestweight; -- normalizeshares_weight(adjshares[i]); already calculated
IF weight[i]=0 THEN nb0 := nb0 + 1;
ELSIF weight[i]=5 THEN nb5 := nb5 + 1;
ELSIF weight[i]=10 THEN nb10 := nb10 + 1;
ELSIF weight[i]=15 THEN nb15 := nb15 + 1;
ELSIF weight[i]=20 THEN nb20 := nb20 + 1;
ELSIF weight[i]=30 THEN nb30 := nb30 + 1;
ELSIF weight[i]=40 THEN nb40 := nb40 + 1;
END IF;
result[i] := (intshares[i]::NUMERIC / 100)::NUMERIC(5,2);
END LOOP;
IF tot=10000 THEN
-- RAISE NOTICE 'adjtot=100.00 : %',adjshares::numeric[];
FOR i IN 1..ARRAY_LENGTH(shares,1) LOOP
result[i] := (adjshares[i]::NUMERIC / 100)::NUMERIC(5,2);
END LOOP;
ELSIF (initot=10000) OR (ABS(10000-tot)>90) THEN
-- RAISE NOTICE 'No adj needed, initot=%, tot=%',initot,tot;
ELSE
IF nb0 > 0 THEN nb := nb0; w := 0;
ELSIF nb5 > 0 THEN nb := nb5; w := 5;
ELSIF nb10 > 0 THEN nb := nb10; w := 10;
ELSIF nb15 > 0 THEN nb := nb15; w := 15;
ELSIF nb20 > 0 THEN nb := nb20; w := 20;
ELSIF nb30 > 0 THEN nb := nb30; w := 30;
ELSIF nb40 > 0 THEN nb := nb40; w := 40;
END IF;
diff := 10000 - tot;
each := diff/nb+diff/abs(diff);
-- RAISE NOTICE 'nb=%, w=%, diff=%, tot=%, adj=%',nb,w,diff,tot,adjshares::numeric[];
FOR i IN 1..ARRAY_LENGTH(shares,1) LOOP
IF weight[i]=w THEN
IF diff=0 THEN
ELSIF nb=1 THEN
adjshares[i] := adjshares[i] + diff;
ELSIF nb>1 THEN
adjshares[i] := adjshares[i] + each;
diff := diff - each;
END IF;
nb := nb -1;
END IF;
result[i] := (adjshares[i]::NUMERIC / 100)::NUMERIC(5,2);
END LOOP;
END IF;
RETURN result;
END;
$BODY$ LANGUAGE plpgsql IMMUTABLE;
还有一些结果:
% select normalizeshares('{49.99,9.99,40.01}');
normalizeshares
---------------------
{50.00,10.00,40.00}
% select normalizeshares('{33.33,16.66,8.33,8.33,9.99,12.5,10.83}');
normalizeshares
-------------------------------------------
{33.33,16.66,8.33,8.33,10.00,12.50,10.85}