使用 R 中的人口比率和 min/max 标准将价值分配给区域
Distribute value to zones using population ratio and min/max criteria in R
我有以下数据:
require("data.table")
dt1 <- data.table(ZONE = c("A34","G345","H62","D563","T63","P983","S24","J54","W953","L97","V56","R99"), POPULATION = c(40,110,80,70,90,90,130,140,80,30,80,50), MIN = c(1,0,0,1,0,1,0,1,1,0,1,1), MAX = c(10,9,2,11,12,8,5,3,2,0,8,8))
我想向这些按人口加权的区域分发 50 顶帽子。但是,其中一些区域至少需要 1 顶帽子,而其他区域只能收到很少的帽子或根本不需要帽子。
有没有一种方法可以根据人口分配 50 顶帽子(尽可能按比例分配),但要考虑最小和最大标准,并在一个区域无法分配时将帽子分配重新分配给其他区域'没有收到 any/anymore?例如如果根据精确的比例分配,一个区域应分配 20 顶帽子但只能接受 10 顶,则应将其他 10 顶分配给按其人口加权的其他区域。
此函数执行您描述的算法。
它首先检查您是否有足够的帽子来满足最低要求。如果不是,它会抛出一个错误。
然后它会查看是否有足够多的帽子进行循环,在这种情况下它会给出最多的帽子。
否则,它分配最小数量的帽子,并从剩余的帽子中减去这个总和。然后它会循环,给剩余行一顶帽子,当前帽子和最大帽子之间的最大差距乘以人口规模,直到没有帽子可以分配。
distribute_hats <- function(df, hats)
{
if (hats < sum(df$MIN)) stop("Not enough hats to go round!")
if (hats >= sum(df$MAX)) {df$HATS <- df$MAX; return(df)}
df$HATS <- df$MIN
hats <- hats - sum(df$MIN)
while(hats)
{
weights <- df$HATS/df$POPULATION
allowed <- which(df$HATS < df$MAX)
smallest <- which.min(weights[allowed])[1]
df$HATS[allowed][smallest] <- df$HATS[allowed][smallest] + 1
hats <- hats - 1
}
return(df)
}
现在我们用合理的数字试试:
dt1 %>% distribute_hats(50)
#> ZONE POPULATION MIN MAX HATS
#> 1: A34 40 1 10 3
#> 2: G345 110 0 9 8
#> 3: H62 80 0 2 2
#> 4: D563 70 1 11 5
#> 5: T63 90 0 12 6
#> 6: P983 90 1 8 6
#> 7: S24 130 0 5 5
#> 8: J54 140 1 3 3
#> 9: W953 80 1 2 2
#> 10: L97 30 0 0 0
#> 11: V56 80 1 8 6
#> 12: R99 50 1 8 4
dt1 %>% distribute_hats(10)
#> ZONE POPULATION MIN MAX HATS
#> 1: A34 40 1 10 1
#> 2: G345 110 0 9 1
#> 3: H62 80 0 2 1
#> 4: D563 70 1 11 1
#> 5: T63 90 0 12 1
#> 6: P983 90 1 8 1
#> 7: S24 130 0 5 0
#> 8: J54 140 1 3 1
#> 9: W953 80 1 2 1
#> 10: L97 30 0 0 0
#> 11: V56 80 1 8 1
#> 12: R99 50 1 8 1
和边缘情况:
dt1 %>% distribute_hats(1000)
#> ZONE POPULATION MIN MAX HATS
#> 1: A34 40 1 10 10
#> 2: G345 110 0 9 9
#> 3: H62 80 0 2 2
#> 4: D563 70 1 11 11
#> 5: T63 90 0 12 12
#> 6: P983 90 1 8 8
#> 7: S24 130 0 5 5
#> 8: J54 140 1 3 3
#> 9: W953 80 1 2 2
#> 10: L97 30 0 0 0
#> 11: V56 80 1 8 8
#> 12: R99 50 1 8 8
由 reprex package (v0.3.0)
于 2020-05-09 创建
我不确定。听起来像是优化或线性规划任务
函数如下:
allocate <- function(dt, N){
if(N>dt[,sum(MAX)])
stop("Too many hats to go around")
if(N<dt[,sum(MIN)])
stop("Not enough hats to go around")
# Allocate hats initially based on proportion but use clamping
dt[, HATS := pmax(MIN, pmin(MAX, round(N * +(MAX>0) * POPULATION / sum(POPULATION[MAX>0]))))]
n <- N - dt[,sum(HATS)]
if(n==0) # All hats accouted for
return(dt)
if(n>0){ # Allocate the extra hats, again proportional to pop with clamping
dt[HATS<MAX, HATS := HATS + pmax(MIN, pmin(MAX,
round(n * +(MAX>0) * POPULATION / sum(POPULATION[MAX>0]))))]
} else { # Or subtract the superfluous hats, according to pop
dt[HATS>MIN, HATS := HATS - pmax(MIN, pmin(MAX,
round(abs(n) * +(MAX>0) * POPULATION / sum(POPULATION[MAX>0]))))]
}
n <- N - dt[,sum(HATS)] # Check again
if(n==0) # All hats accouted for
return(dt)
if(n>0){ # This time, just add 1 hat to those that require them
dt[HATS<MAX, i:=.I][i<=n, HATS := HATS + 1]
} else { # Or reduce the number of hats by one
dt[HATS>MIN, i:=.I][i<=abs(n), HATS := HATS - 1]
}
dt[, i:=NULL] # Remove this guy
return(dt)
}
测试 50:
dt2 <- allocate(dt1, 50)
dt2
ZONE POPULATION MIN MAX HATS
1: A34 40 1 10 2
2: G345 110 0 9 8
3: H62 80 0 2 2
4: D563 70 1 11 5
5: T63 90 0 12 7
6: P983 90 1 8 7
7: S24 130 0 5 5
8: J54 140 1 3 3
9: W953 80 1 2 2
10: L97 30 0 0 0
11: V56 80 1 8 5
12: R99 50 1 8 4
分配了 50 顶帽子。
它可能不优雅或数学上不合理,但 that 是我对 what 的尝试,它是值得的。希望能有点用。
将此公式化为整数规划,其中 objective 函数根据最小和最大分配约束最小化分配和目标分配之间的平方和:
dt1[, TARGET := POPULATION / sum(POPULATION) * TOTAL]
system.time({
library(CVXR)
x <- Variable(nrow(dt1), integer=TRUE)
mini <- dt1$MIN
maxi <- dt1$MAX
target <- dt1$TARGET
obj <- Minimize(sum_squares(x - target))
constr <- list(mini <= x, x <= maxi, sum(x) == TOTAL)
prob <- Problem(obj, constr)
result <- solve(prob)
})
# user system elapsed
# 1.60 0.17 1.76
dt1[, ALLOCATION := as.integer(round(result$getValue(x)))]
输出:
ZONE POPULATION MIN MAX TARGET ALLOCATION
1: A34 40 1 10 2.020202 4
2: G345 110 0 9 5.555556 7
3: H62 80 0 2 4.040404 2
4: D563 70 1 11 3.535354 5
5: T63 90 0 12 4.545455 6
6: P983 90 1 8 4.545455 6
7: S24 130 0 5 6.565657 5
8: J54 140 1 3 7.070707 3
9: W953 80 1 2 4.040404 2
10: L97 30 0 0 1.515152 0
11: V56 80 1 8 4.040404 6
12: R99 50 1 8 2.525253 4
数据:
library(data.table)
dt1 <- data.table(ZONE = c("A34","G345","H62","D563","T63","P983","S24","J54","W953","L97","V56","R99"),
POPULATION = c(40,110,80,70,90,90,130,140,80,30,80,50),
MIN = c(1,0,0,1,0,1,0,1,1,0,1,1),
MAX = c(10,9,2,11,12,8,5,3,2,0,8,8))
TOTAL <- 50L
我有以下数据:
require("data.table")
dt1 <- data.table(ZONE = c("A34","G345","H62","D563","T63","P983","S24","J54","W953","L97","V56","R99"), POPULATION = c(40,110,80,70,90,90,130,140,80,30,80,50), MIN = c(1,0,0,1,0,1,0,1,1,0,1,1), MAX = c(10,9,2,11,12,8,5,3,2,0,8,8))
我想向这些按人口加权的区域分发 50 顶帽子。但是,其中一些区域至少需要 1 顶帽子,而其他区域只能收到很少的帽子或根本不需要帽子。
有没有一种方法可以根据人口分配 50 顶帽子(尽可能按比例分配),但要考虑最小和最大标准,并在一个区域无法分配时将帽子分配重新分配给其他区域'没有收到 any/anymore?例如如果根据精确的比例分配,一个区域应分配 20 顶帽子但只能接受 10 顶,则应将其他 10 顶分配给按其人口加权的其他区域。
此函数执行您描述的算法。
它首先检查您是否有足够的帽子来满足最低要求。如果不是,它会抛出一个错误。
然后它会查看是否有足够多的帽子进行循环,在这种情况下它会给出最多的帽子。
否则,它分配最小数量的帽子,并从剩余的帽子中减去这个总和。然后它会循环,给剩余行一顶帽子,当前帽子和最大帽子之间的最大差距乘以人口规模,直到没有帽子可以分配。
distribute_hats <- function(df, hats)
{
if (hats < sum(df$MIN)) stop("Not enough hats to go round!")
if (hats >= sum(df$MAX)) {df$HATS <- df$MAX; return(df)}
df$HATS <- df$MIN
hats <- hats - sum(df$MIN)
while(hats)
{
weights <- df$HATS/df$POPULATION
allowed <- which(df$HATS < df$MAX)
smallest <- which.min(weights[allowed])[1]
df$HATS[allowed][smallest] <- df$HATS[allowed][smallest] + 1
hats <- hats - 1
}
return(df)
}
现在我们用合理的数字试试:
dt1 %>% distribute_hats(50)
#> ZONE POPULATION MIN MAX HATS
#> 1: A34 40 1 10 3
#> 2: G345 110 0 9 8
#> 3: H62 80 0 2 2
#> 4: D563 70 1 11 5
#> 5: T63 90 0 12 6
#> 6: P983 90 1 8 6
#> 7: S24 130 0 5 5
#> 8: J54 140 1 3 3
#> 9: W953 80 1 2 2
#> 10: L97 30 0 0 0
#> 11: V56 80 1 8 6
#> 12: R99 50 1 8 4
dt1 %>% distribute_hats(10)
#> ZONE POPULATION MIN MAX HATS
#> 1: A34 40 1 10 1
#> 2: G345 110 0 9 1
#> 3: H62 80 0 2 1
#> 4: D563 70 1 11 1
#> 5: T63 90 0 12 1
#> 6: P983 90 1 8 1
#> 7: S24 130 0 5 0
#> 8: J54 140 1 3 1
#> 9: W953 80 1 2 1
#> 10: L97 30 0 0 0
#> 11: V56 80 1 8 1
#> 12: R99 50 1 8 1
和边缘情况:
dt1 %>% distribute_hats(1000)
#> ZONE POPULATION MIN MAX HATS
#> 1: A34 40 1 10 10
#> 2: G345 110 0 9 9
#> 3: H62 80 0 2 2
#> 4: D563 70 1 11 11
#> 5: T63 90 0 12 12
#> 6: P983 90 1 8 8
#> 7: S24 130 0 5 5
#> 8: J54 140 1 3 3
#> 9: W953 80 1 2 2
#> 10: L97 30 0 0 0
#> 11: V56 80 1 8 8
#> 12: R99 50 1 8 8
由 reprex package (v0.3.0)
于 2020-05-09 创建我不确定。听起来像是优化或线性规划任务
函数如下:
allocate <- function(dt, N){
if(N>dt[,sum(MAX)])
stop("Too many hats to go around")
if(N<dt[,sum(MIN)])
stop("Not enough hats to go around")
# Allocate hats initially based on proportion but use clamping
dt[, HATS := pmax(MIN, pmin(MAX, round(N * +(MAX>0) * POPULATION / sum(POPULATION[MAX>0]))))]
n <- N - dt[,sum(HATS)]
if(n==0) # All hats accouted for
return(dt)
if(n>0){ # Allocate the extra hats, again proportional to pop with clamping
dt[HATS<MAX, HATS := HATS + pmax(MIN, pmin(MAX,
round(n * +(MAX>0) * POPULATION / sum(POPULATION[MAX>0]))))]
} else { # Or subtract the superfluous hats, according to pop
dt[HATS>MIN, HATS := HATS - pmax(MIN, pmin(MAX,
round(abs(n) * +(MAX>0) * POPULATION / sum(POPULATION[MAX>0]))))]
}
n <- N - dt[,sum(HATS)] # Check again
if(n==0) # All hats accouted for
return(dt)
if(n>0){ # This time, just add 1 hat to those that require them
dt[HATS<MAX, i:=.I][i<=n, HATS := HATS + 1]
} else { # Or reduce the number of hats by one
dt[HATS>MIN, i:=.I][i<=abs(n), HATS := HATS - 1]
}
dt[, i:=NULL] # Remove this guy
return(dt)
}
测试 50:
dt2 <- allocate(dt1, 50)
dt2
ZONE POPULATION MIN MAX HATS
1: A34 40 1 10 2
2: G345 110 0 9 8
3: H62 80 0 2 2
4: D563 70 1 11 5
5: T63 90 0 12 7
6: P983 90 1 8 7
7: S24 130 0 5 5
8: J54 140 1 3 3
9: W953 80 1 2 2
10: L97 30 0 0 0
11: V56 80 1 8 5
12: R99 50 1 8 4
分配了 50 顶帽子。
它可能不优雅或数学上不合理,但 that 是我对 what 的尝试,它是值得的。希望能有点用。
将此公式化为整数规划,其中 objective 函数根据最小和最大分配约束最小化分配和目标分配之间的平方和:
dt1[, TARGET := POPULATION / sum(POPULATION) * TOTAL]
system.time({
library(CVXR)
x <- Variable(nrow(dt1), integer=TRUE)
mini <- dt1$MIN
maxi <- dt1$MAX
target <- dt1$TARGET
obj <- Minimize(sum_squares(x - target))
constr <- list(mini <= x, x <= maxi, sum(x) == TOTAL)
prob <- Problem(obj, constr)
result <- solve(prob)
})
# user system elapsed
# 1.60 0.17 1.76
dt1[, ALLOCATION := as.integer(round(result$getValue(x)))]
输出:
ZONE POPULATION MIN MAX TARGET ALLOCATION
1: A34 40 1 10 2.020202 4
2: G345 110 0 9 5.555556 7
3: H62 80 0 2 4.040404 2
4: D563 70 1 11 3.535354 5
5: T63 90 0 12 4.545455 6
6: P983 90 1 8 4.545455 6
7: S24 130 0 5 6.565657 5
8: J54 140 1 3 7.070707 3
9: W953 80 1 2 4.040404 2
10: L97 30 0 0 1.515152 0
11: V56 80 1 8 4.040404 6
12: R99 50 1 8 2.525253 4
数据:
library(data.table)
dt1 <- data.table(ZONE = c("A34","G345","H62","D563","T63","P983","S24","J54","W953","L97","V56","R99"),
POPULATION = c(40,110,80,70,90,90,130,140,80,30,80,50),
MIN = c(1,0,0,1,0,1,0,1,1,0,1,1),
MAX = c(10,9,2,11,12,8,5,3,2,0,8,8))
TOTAL <- 50L