四舍五入数列以获得尽可能多的 "round" 个数字,总和为 100

Rounding series of numbers to get as much "round" numbers as possible and sum up to 100

我有一系列数字加起来接近 100(它们是百分比),例如:

A B C
49.99 9.99 40.01

我想调整这些以获得类似的东西:

A B C
50.00 10.00 40.00

约束条件:

A B C D E F G
33.33 16.66 8.33 8.33 9.99 12.50 10.83
33.33 16.66 8.34 8.34 10.00 12.50 10.83

我目前使用的函数非常适合在循环中分配 missing/exceeding 1.100s,但没有解决 49.99 → 50.00 的问题。

它使用的原理是:

生成的函数将用 Postgres 的 Pl/PgSql 编写,但我最感兴趣的是知道是否存在这样的算法以及它们是如何命名的(当然,link 到一个工作函数将不胜感激)。

我最终将问题分成了 2 个部分:

  1. 调整原始股票以获得“更好”的数字 (49.99 → 50.00) 和
  2. 在这里和那里添加几个 1/100 以获得 100% 的总数。

结果令人满意,我还为普通分数(200/3、100/3、100/6、100/12...)整合了一些特殊值,这样 3x33.33 就不会结束了33.35、33.35 和 33.30,少了“nice/round”,但更公平。请注意,最后的调整只有一个循环。性能是可以接受的:100000 行 3.5 秒。

以下 SO question and the included Wikipedia article 帮助我理解了可能的偏见及其 pros/cons。

感兴趣的朋友可以参考以下代码:

CREATE OR REPLACE FUNCTION public.normalizeshares_weight(share INT) RETURNS INT AS
$BODY$
    SELECT CASE 
        WHEN share % 10000 = 0 THEN 40
        WHEN share % 1000 = 0 THEN 30
        WHEN share % 100 = 0  THEN 20
        WHEN share % 50 = 0 OR (share = ANY('{6666,3333,1666,833,416}')) THEN 15
        WHEN (share % 10 = 0) THEN 10
        WHEN share % 5 = 0 THEN 5
        ELSE 0 END;
$BODY$ LANGUAGE SQL IMMUTABLE;


CREATE OR REPLACE FUNCTION public.normalizeshares(shares NUMERIC[]) RETURNS NUMERIC(5,2)[] AS
$BODY$
DECLARE
    intshares INT[];
    adjshares INT[];
    weight   INT[];
    result NUMERIC[];
    nb0 INT = 0;
    nb5 INT = 0;
    nb10 INT = 0;
    nb15 INT = 0;
    nb20 INT = 0;
    nb30 INT = 0;
    nb40 INT = 0;
    initot INT = 0;
    tot INT = 0;
    nb INT = 0;
    w INT = 0;
    diff INT;
    each INT;
    bestweight INT;
BEGIN
    FOR i IN 1..ARRAY_LENGTH(shares,1) LOOP
        intshares[i] := FLOOR(COALESCE(shares[i],0)*100);
        weight[i] := normalizeshares_weight(intshares[i]);
        bestweight := weight[i];
        adjshares[i] := intshares[i];
        IF normalizeshares_weight(intshares[i]+1) > bestweight THEN adjshares[i] := intshares[i]+1; bestweight := normalizeshares_weight(intshares[i]+1); END IF;
        IF normalizeshares_weight(intshares[i]+2) > bestweight THEN adjshares[i] := intshares[i]+2; bestweight := normalizeshares_weight(intshares[i]+2); END IF;
        IF normalizeshares_weight(intshares[i]+3) > bestweight THEN adjshares[i] := intshares[i]+2; bestweight := normalizeshares_weight(intshares[i]+3); END IF;
        IF normalizeshares_weight(intshares[i]-1) > bestweight THEN adjshares[i] := intshares[i]-1; bestweight := normalizeshares_weight(intshares[i]-1); END IF;
        IF normalizeshares_weight(intshares[i]-2) > bestweight THEN adjshares[i] := intshares[i]-2; bestweight := normalizeshares_weight(intshares[i]-2); END IF;
        IF normalizeshares_weight(intshares[i]-3) > bestweight THEN adjshares[i] := intshares[i]-2; bestweight := normalizeshares_weight(intshares[i]-3); END IF;
        tot := tot + adjshares[i];
        initot := initot + intshares[i];
        weight[i] := bestweight; -- normalizeshares_weight(adjshares[i]);   already calculated
        IF    weight[i]=0  THEN nb0  := nb0  + 1;
        ELSIF weight[i]=5  THEN nb5  := nb5  + 1;
        ELSIF weight[i]=10 THEN nb10 := nb10 + 1;
        ELSIF weight[i]=15 THEN nb15 := nb15 + 1;
        ELSIF weight[i]=20 THEN nb20 := nb20 + 1;
        ELSIF weight[i]=30 THEN nb30 := nb30 + 1;
        ELSIF weight[i]=40 THEN nb40 := nb40 + 1;
        END IF;
        result[i] := (intshares[i]::NUMERIC / 100)::NUMERIC(5,2);
    END LOOP;
    IF tot=10000 THEN
        -- RAISE NOTICE 'adjtot=100.00 : %',adjshares::numeric[];
        FOR i IN 1..ARRAY_LENGTH(shares,1) LOOP
            result[i] := (adjshares[i]::NUMERIC / 100)::NUMERIC(5,2);
        END LOOP;
    ELSIF (initot=10000) OR (ABS(10000-tot)>90) THEN
        -- RAISE NOTICE 'No adj needed, initot=%, tot=%',initot,tot;    
    ELSE
        IF    nb0  > 0 THEN nb := nb0;  w := 0;
        ELSIF nb5  > 0 THEN nb := nb5;  w := 5;
        ELSIF nb10 > 0 THEN nb := nb10; w := 10;
        ELSIF nb15 > 0 THEN nb := nb15; w := 15;
        ELSIF nb20 > 0 THEN nb := nb20; w := 20;
        ELSIF nb30 > 0 THEN nb := nb30; w := 30;
        ELSIF nb40 > 0 THEN nb := nb40; w := 40;
        END IF;
        diff := 10000 - tot;
        each := diff/nb+diff/abs(diff);
        -- RAISE NOTICE 'nb=%, w=%, diff=%, tot=%, adj=%',nb,w,diff,tot,adjshares::numeric[];
        FOR i IN 1..ARRAY_LENGTH(shares,1) LOOP
            IF weight[i]=w THEN
                IF diff=0 THEN
                ELSIF nb=1 THEN
                    adjshares[i] := adjshares[i] + diff;
                ELSIF nb>1 THEN
                    adjshares[i] := adjshares[i] + each;
                    diff := diff - each;
                END IF;
                nb := nb -1;
            END IF;
            result[i] := (adjshares[i]::NUMERIC / 100)::NUMERIC(5,2);
        END LOOP;
    END IF;
    RETURN result;
END;
$BODY$ LANGUAGE plpgsql IMMUTABLE;

还有一些结果:

% select normalizeshares('{49.99,9.99,40.01}');
   normalizeshares   
---------------------
 {50.00,10.00,40.00}

% select normalizeshares('{33.33,16.66,8.33,8.33,9.99,12.5,10.83}');
              normalizeshares              
-------------------------------------------
 {33.33,16.66,8.33,8.33,10.00,12.50,10.85}