使用 optim 求最小值，同时强制参数总和为 1

Question

我正在尝试使用 R 和 optim 来计算混合比例。因此，例如，假设我的岩石成分为 60% SiO2 和 40% CaO。我想知道我必须混合多少两种不同的相才能生产出我的岩石。假设 phase2 是 35% SiO2 和 65% CaO，Phase2 是 80% SiO2 和 20% CaO。

***编辑：我更新了代码以包含第 3 阶段，并且我尝试使用合成 package.I 也尝试设置最佳搜索范围的界限。

#Telling R the composition of both phases and the target rock

library(compositions)

Phase1 <- c(35, 65)
Phase2 <- c(80, 20)
Phase3 <- c(10, 90)
Target_Composition <- c(60, 40)

#My function to minimize
my.function <- function(n){
    n <- clo(n) #I though this would make 1 = n[1] + n[2] + n[3]
    (Target_Composition[1] - (n[1]*Phase1[1] + n[2]*Phase2[1] + n[3]*Phase3[1]))^2 + 
    (Target_Composition[2] - (n[1]*Phase1[2] + n[2]*Phase2[2] + n[3]*Phase3[2]))^2 
}

然后我运行优化使用：

optim(c(.54,.46), my.function)

当我运行这个时，我得到 0.536 和 0.487，这确实是最小值，但是，我需要设置一个额外的参数 n[1] + n[2] = 1 。反正有没有使用optim来做到这一点？这就是让我困惑的地方。作为旁注，我要解决的实际问题有更多的阶段，每个阶段都更复杂，但是，一旦我让这部分工作，我会扩大规模。

我现在使用：

optim(c(.5, .4, .1), my.function, lower=0, upper=1, method="L-BFGS-B")

我现在得到 0.4434595、0.4986680 和 0.2371141 作为结果，它们的总和不为 1。我该如何解决这个问题？另外，我将搜索范围设置在 0 到 1 之间的新方法是否可行？

感谢您的帮助。

Answer 1

对于一个参数估计 - optimize() 函数用于最小化函数。

对于两个或多个参数估计，optim()函数用于最小化一个函数。基 R 中还有另一个函数，称为 constrOptim()，可用于执行具有不等式约束的参数估计。在您的问题中，您打算应用框约束。 L-BFGS-B 求解器是合适的。

注意： 并不是所有的求解器都会收敛，但是它们都会根据求解器中实现的一些条件终止（你可以控制它们使用 control 参数），因此您必须检查从求解器获得的最小值是否通过 KKT（Karush、Kuhn 和 Tucker）条件，以确保您的求解器实际收敛，同时最优地估计参数。

KKT条件

至少，梯度（一阶导数）必须为“零”
至少，Hessian（二阶偏导数）必须是正定对称矩阵。

您还可以使用 hessian 矩阵的行列式来查找最小值是局部、全局还是鞍点。

在这里，我展示了两种为给定表达式创建 objective 函数的方法，例如 符号 - 代数形式 和 矩阵形式。我还比较了这两个函数的输出，但是，我使用矩阵形式 objective 函数来显示优化步骤。请注意，优化步骤满足您在问题中提到的 constraints。

n1 + n2 + n3 = 1
n1 = (-2, 2)
n2 = (-1, 1)
n3 = (-1, 1)

您可能想尝试使用库 optimx，这会使工作变得更容易。

# load library
library('compositions')

## function for getting algebraic expressions 
get_fexpr <- function( phase_len, n_ingredients ) # len = length of n or phase1 or phase2 or target composition
{
  ## phase_len = number of phases (eg: phase1, phase2, phase3: = 3)
  ## n_ingredients = number of ingredients that form a composition (Eg: sio2 and cao: = 2)
  ## y = target composition
  ## x = phase data as c(phase1[1], phase2[1], phase1[2], phase2[2])
  ## n = parameters to be estimated

  p_n <- paste( rep("n", phase_len), 1:phase_len, sep = '')  # n
  p_x <- paste( rep("x", phase_len), paste( "[", 1:( phase_len * n_ingredients ), "]", sep = ''), sep = '' ) # x
  p_y <- paste( "y[", 1:n_ingredients, "]", sep = ""  )    # y

  # combine n, x, and y
  p_n_x <- paste( "(", paste( p_n, p_x, sep = "*" ), ")", sep = '')
  p_n_x <- lapply( split( p_n_x, ceiling( seq_along( p_n_x ) / phase_len ) ), paste, collapse = " + " )
  p_n_x <- lapply( p_n_x, function( x ) paste( "(", x, ")", sep = "" ) )

  # get deviations and sum of squares
  dev    <- mapply( paste, p_y, p_n_x, sep = " - " )  # deviations
  dev_sq <- paste( "(", dev, ")**2", sep = '', collapse = " + ")  # sum of squares of deviations

  return( dev_sq )
}

get_fexpr( phase_len = 1, n_ingredients = 1 )
# [1] "(y[1] - ((n1*x[1])))**2"
get_fexpr( phase_len = 1, n_ingredients = 2 )
# [1] "(y[1] - ((n1*x[1])))**2 + (y[2] - ((n1*x[2])))**2"
get_fexpr( phase_len = 2, n_ingredients = 2 )
# [1] "(y[1] - ((n1*x[1]) + (n2*x[2])))**2 + (y[2] - ((n1*x[3]) + (n2*x[4])))**2"
get_fexpr( phase_len = 3, n_ingredients = 2 )
# [1] "(y[1] - ((n1*x[1]) + (n2*x[2]) + (n3*x[3])))**2 + (y[2] - ((n1*x[4]) + (n2*x[5]) + (n3*x[6])))**2"
get_fexpr( phase_len = 4, n_ingredients = 2 )
# [1] "(y[1] - ((n1*x[1]) + (n2*x[2]) + (n3*x[3]) + (n4*x[4])))**2 + (y[2] - ((n1*x[5]) + (n2*x[6]) + (n3*x[7]) + (n4*x[8])))**2"

## objective functions for max/min
## matrix form
myfun1 <- function( n, phase_data, Target_Composition )
{
  print(n)
  Target_Composition <- matrix(Target_Composition, ncol = length(Target_Composition), byrow = TRUE)
  dot_product <- n %*% phase_data   # get dot product
  sum((Target_Composition - dot_product)^2)
}

## algebraic expression form
myfun2 <- function( y, x, n, fexpr )
{
  names(n) <- paste( "n", 1:length( n ), sep = "" ) # assign names to n
  list2env( as.list(n), envir = environment() )     # create new variables with a list of n
  return( eval( parse( text = fexpr ) ) )
} 

## Comparison of functions of matrix and algebriac forms
## 1. raw data for two parameters estimation
Target_Composition <- clo( c(60, 40), total = 1 )
Phase1 <- clo( c(35, 65), total = 1 )
Phase2 <- clo( c(80, 20), total = 1 )
stopifnot( length( Phase1 ) == length( Phase2 ))  
n      <- clo( c(0.54, 0.46) , total = 1 )

## data for matrix form
phase_data_concat <- c(Phase1, Phase2)
n_row <- length(phase_data_concat) / length(Phase1)
n_col <- length(phase_data_concat) / n_row
phase_data <- matrix( data = phase_data_concat, 
                      nrow = n_row, 
                      ncol = n_col, 
                      byrow = TRUE,
                      dimnames = list(c('phase1', 'phase2'), 
                                      c('sio2', 'cao')))

## data for algebraic form
y <- Target_Composition
x <- c(matrix( data = phase_data_concat, nrow = nrow( phase_data ), ncol = ncol( phase_data ), byrow = TRUE ))
n <- n
fexpr <- get_fexpr( phase_len = length( n ), n_ingredients = 2 )


# compare algebraic form and matrix form functions
myfun1(n, phase_data, Target_Composition)
# [1] 0.0036979999999999969
myfun2( y = y, x = x, n = n, fexpr = fexpr )
# [1] 0.0036979999999999969

## 2. raw data for three parameters estimation
Target_Composition <- clo( c(60, 40), total = 1)
Phase1 <- clo( c(35, 65), total = 1)
Phase2 <- clo( c(80, 20), total = 1)
Phase3 <- clo( c(10, 90), total = 1)
stopifnot( length( Phase1 ) == length( Phase2 ) && length( Phase1 ) == length( Phase3 ))  
n <- clo( c(0.5, 0.4, 0.1) )   # starting guess for n1, n2, and n3

## data for matrix form
phase_data_concat <- c(Phase1, Phase2, Phase3)
n_row <- length(phase_data_concat) / length(Phase1)
n_col <- length(phase_data_concat) / n_row
phase_data <- matrix( data = phase_data_concat, 
                      nrow = n_row, 
                      ncol = n_col, 
                      byrow = TRUE,
                      dimnames = list(c('phase1', 'phase2', 'phase3'), 
                                      c('sio2', 'cao')))
## data for algebraic form
y <- Target_Composition
x <- c( matrix( phase_data_concat, nrow = nrow( phase_data), ncol = ncol(phase_data), byrow = TRUE ) )
n <- n
fexpr <- get_fexpr( phase_len = length( n ), n_ingredients = 2 )

# compare algebraic form and matrix form functions
myfun1(n, phase_data, Target_Composition)
# [1] 0.01805
myfun2( y = y, x = x, n = n, fexpr = fexpr )
# [1] 0.01805


## Optimization using matrix form objective function (myfun1)
# three parameter estimation
phase_data
#                       sio2                 cao
# phase1 0.34999999999999998 0.65000000000000002
# phase2 0.80000000000000004 0.20000000000000001
# phase3 0.10000000000000001 0.90000000000000002

# target data
Target_Composition <- clo( c(60, 40), total = 1 )
# [1] 0.59999999999999998 0.40000000000000002

n <- c(0.5, 0.4, 0.1)
# [1] 0.50000000000000000 0.40000000000000002 0.10000000000000001

## Harker diagram; also called scatterplot of two componenets without any transformation
plot( phase_data, type = "b", main = "Harker Diagram" )   

optim_model <- optim(par = n, 
                     fn = myfun1,
                     method = "L-BFGS-B", 
                     lower = c(-2, -1, -1),  # lower bounds: n1 = -2; n2 = -1; n3 = -1 
                     upper = c( 2, 1, 1 ),   # upper bounds: n1 = 2;  n2 = 1   
                     phase_data = phase_data,
                     Target_Composition = Target_Composition)

# [1]  0.50000000000000000   0.40000000000000002   0.10000000000000001
# [1]  0.50100000000000000   0.40000000000000002   0.10000000000000001
# [1]  0.49900000000000000   0.40000000000000002   0.10000000000000001
# [1]  0.50000000000000000   0.40100000000000002   0.10000000000000001
# [1]  0.50000000000000000   0.39900000000000002   0.10000000000000001
# [1]  0.50000000000000000   0.40000000000000002   0.10100000000000001
# [1]  0.500000000000000000  0.400000000000000022  0.099000000000000005
# [1]  0.443000000000007166  0.514000000000010004 -0.051999999999988944
# [1]  0.444000000000007167  0.514000000000010004 -0.051999999999988944
# [1]  0.442000000000007165  0.514000000000010004 -0.051999999999988944
# [1]  0.443000000000007166  0.515000000000010005 -0.051999999999988944
# [1]  0.443000000000007166  0.513000000000010004 -0.051999999999988944
# [1]  0.443000000000007166  0.514000000000010004 -0.050999999999988943
# [1]  0.443000000000007166  0.514000000000010004 -0.052999999999988945
# [1]  0.479721654922847740  0.560497432130581008 -0.020709332414779191
# [1]  0.480721654922847741  0.560497432130581008 -0.020709332414779191
# [1]  0.478721654922847739  0.560497432130581008 -0.020709332414779191
# [1]  0.479721654922847740  0.561497432130581009 -0.020709332414779191
# [1]  0.479721654922847740  0.559497432130581007 -0.020709332414779191
# [1]  0.47972165492284774   0.56049743213058101  -0.01970933241477919
# [1]  0.479721654922847740  0.560497432130581008 -0.021709332414779191
# [1]  0.474768384608834304  0.545177697419992557 -0.019903455841806163
# [1]  0.475768384608834305  0.545177697419992557 -0.019903455841806163
# [1]  0.473768384608834303  0.545177697419992557 -0.019903455841806163
# [1]  0.474768384608834304  0.546177697419992558 -0.019903455841806163
# [1]  0.474768384608834304  0.544177697419992557 -0.019903455841806163
# [1]  0.474768384608834304  0.545177697419992557 -0.018903455841806163
# [1]  0.474768384608834304  0.545177697419992557 -0.020903455841806164
# [1]  0.474833910147636595  0.544703104470840138 -0.019537864476362268
# [1]  0.475833910147636596  0.544703104470840138 -0.019537864476362268
# [1]  0.473833910147636594  0.544703104470840138 -0.019537864476362268
# [1]  0.474833910147636595  0.545703104470840139 -0.019537864476362268
# [1]  0.474833910147636595  0.543703104470840137 -0.019537864476362268
# [1]  0.474833910147636595  0.544703104470840138 -0.018537864476362267
# [1]  0.474833910147636595  0.544703104470840138 -0.020537864476362269
# [1]  0.474834452107517901  0.544702005703077585 -0.019536411001123268
# [1]  0.475834452107517902  0.544702005703077585 -0.019536411001123268
# [1]  0.473834452107517901  0.544702005703077585 -0.019536411001123268
# [1]  0.474834452107517901  0.545702005703077586 -0.019536411001123268
# [1]  0.474834452107517901  0.543702005703077584 -0.019536411001123268
# [1]  0.474834452107517901  0.544702005703077585 -0.018536411001123267
# [1]  0.474834452107517901  0.544702005703077585 -0.020536411001123269

optim_model$par   # values of n after minimization of function my.function using starting guess of n1 = 0.54, n2 = 0.46
# [1]  0.474834452107517901  0.544702005703077585 -0.019536411001123268

sum(optim_model$par)
# [1] 1.0000000468094723

# different starting guess - n
n <- c(0.2, 0.2, 0.6)

optim_model <- optim(par = n, 
                     fn = myfun1,
                     method = "L-BFGS-B", 
                     lower = c(-2, -1, -1),  # lower bounds: n1 = -2; n2 = -1; n3 = -1 
                     upper = c( 2, 1, 1 ),   # upper bounds: n1 = 2;  n2 = 1   
                     phase_data = phase_data,
                     Target_Composition = Target_Composition)

# [1] 0.20000000000000001 0.20000000000000001 0.59999999999999998
# [1] 0.20100000000000001 0.20000000000000001 0.59999999999999998
# [1] 0.19900000000000001 0.20000000000000001 0.59999999999999998
# [1] 0.20000000000000001 0.20100000000000001 0.59999999999999998
# [1] 0.20000000000000001 0.19900000000000001 0.59999999999999998
# [1] 0.20000000000000001 0.20000000000000001 0.60099999999999998
# [1] 0.20000000000000001 0.20000000000000001 0.59899999999999998
# [1] 0.014000000000008284 0.571999999999969644 0.103999999999989656
# [1] 0.015000000000008284 0.571999999999969644 0.103999999999989656
# [1] 0.013000000000008283 0.571999999999969644 0.103999999999989656
# [1] 0.014000000000008284 0.572999999999969645 0.103999999999989656
# [1] 0.014000000000008284 0.570999999999969643 0.103999999999989656
# [1] 0.014000000000008284 0.571999999999969644 0.104999999999989657
# [1] 0.014000000000008284 0.571999999999969644 0.102999999999989655
# [1] 0.13382855816928069 0.72372846274181657 0.20610638896226857
# [1] 0.13482855816928069 0.72372846274181657 0.20610638896226857
# [1] 0.13282855816928069 0.72372846274181657 0.20610638896226857
# [1] 0.13382855816928069 0.72472846274181657 0.20610638896226857
# [1] 0.13382855816928069 0.72272846274181657 0.20610638896226857
# [1] 0.13382855816928069 0.72372846274181657 0.20710638896226857
# [1] 0.13382855816928069 0.72372846274181657 0.20510638896226857
# [1] 0.11766525503937687 0.67373774947575715 0.20873609146356592
# [1] 0.11866525503937687 0.67373774947575715 0.20873609146356592
# [1] 0.11666525503937687 0.67373774947575715 0.20873609146356592
# [1] 0.11766525503937687 0.67473774947575715 0.20873609146356592
# [1] 0.11766525503937687 0.67273774947575715 0.20873609146356592
# [1] 0.11766525503937687 0.67373774947575715 0.20973609146356592
# [1] 0.11766525503937687 0.67373774947575715 0.20773609146356592
# [1] 0.11787907521862623 0.67218907774694359 0.20992907381396153
# [1] 0.11887907521862623 0.67218907774694359 0.20992907381396153
# [1] 0.11687907521862623 0.67218907774694359 0.20992907381396153
# [1] 0.11787907521862623 0.67318907774694359 0.20992907381396153
# [1] 0.11787907521862623 0.67118907774694359 0.20992907381396153
# [1] 0.11787907521862623 0.67218907774694359 0.21092907381396153
# [1] 0.11787907521862623 0.67218907774694359 0.20892907381396153
# [1] 0.11788084371929158 0.67218549229424496 0.20993381673316230
# [1] 0.11888084371929158 0.67218549229424496 0.20993381673316230
# [1] 0.11688084371929158 0.67218549229424496 0.20993381673316230
# [1] 0.11788084371929158 0.67318549229424496 0.20993381673316230
# [1] 0.11788084371929158 0.67118549229424496 0.20993381673316230
# [1] 0.11788084371929158 0.67218549229424496 0.21093381673316230
# [1] 0.11788084371929158 0.67218549229424496 0.20893381673316230

optim_model$par   # values of n after minimization of function my.function using starting guess of n1 = 0.54, n2 = 0.46
# [1] 0.11788084371929158 0.67218549229424496 0.20993381673316230

sum(optim_model$par)
# [1] 1.0000001527466988

可视化： 我将 myfun1 修改为 myfun3 以使用框约束可视化 objective 函数。我使用了两个参数估计模型。

# modified function for visualization
myfun3 <- function( n1, n2, phase_data, Target_Composition )
{
  Target_Composition <- matrix(Target_Composition, ncol = length(Target_Composition), byrow = TRUE)
  dot_product <- c(n1, n2) %*% phase_data   # get dot product
  sum((Target_Composition - dot_product)^2)
}
myfun3 <- Vectorize(FUN = myfun3, vectorize.args = list( "n1", "n2"))

Target_Composition <- clo( c(60, 40), total = 1 )
Phase1 <- clo( c(35, 65), total = 1 )
Phase2 <- clo( c(80, 20), total = 1 )
stopifnot( length( Phase1 ) == length( Phase2 ))  
n      <- clo( c(0.54, 0.46) , total = 1 )

## data for matrix form
phase_data_concat <- c(Phase1, Phase2)
n_row <- length(phase_data_concat) / length(Phase1)
n_col <- length(phase_data_concat) / n_row
phase_data <- matrix( data = phase_data_concat, 
                      nrow = n_row, 
                      ncol = n_col, 
                      byrow = TRUE,
                      dimnames = list(c('phase1', 'phase2'), 
                                      c('sio2', 'cao')))


n1 = seq(-2, 2, 0.1)  # bounds for n1
n2 = seq(-1, 1, 0.1)  # bounds for n2
z <- outer( n1, n2, phase_data, Target_Composition, FUN = myfun3)
persp(n1, n2, z,theta=150)

使用 optim 求最小值，同时强制参数总和为 1

Using optim to find minimization while also forcing the parameters to sum to 1

optimization

r

least-squares