scipy.optimize.least_squares 使用来自 Python 的 C++ 程序时没有给出正确的结果

Question

我正在尝试使用 scipy.optimize.least_squares 函数来最小化从 C++ 代码生成的模型。

这是一个演示问题的最小示例 (taken from the Scipy docs)。

我使用了两个函数，一个是纯函数 Python，另一个调用 C++ 代码来生成模型。两个函数 return 在测试时结果相同，但在 scipy.optimize.least_squares.

中使用时结果不同

import numpy as np
from scipy.optimize import least_squares
import csv
import os
import time

def gen_data(t, a, b, c, noise=0, n_outliers=0, random_state=0):
    y = a + b * np.exp(t * c)
    rnd = np.random.RandomState(random_state)
    error = noise * rnd.randn(t.size)
    outliers = rnd.randint(0, t.size, n_outliers)
    error[outliers] *= 10
    return y + error

a = 0.5
b = 2.0
c = -1
t_min = 0
t_max = 10
n_points = 15
t_train = np.linspace(t_min, t_max, n_points)
y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)

def fun_cpp(x, t, y):
    a = os.popen("./a.out "+str(x[0])+" "+str(x[1])+" "+str(x[2])).readlines()
    model = [float(i) for i in a[0].split(' ')[:-1]]

    return model - y

def fun_python(x, t, y):
    return (x[0] + x[1] * np.exp(x[2] * t)) - y

x0 = np.array([1.0, 1.0, 0.0])

res_lsq = least_squares(fun_python, x0, args=(t_train, y_train), verbose = 2)
res_lsq2 = least_squares(fun_cpp, x0, args=(t_train, y_train), verbose = 2)

t_test = np.linspace(t_min, t_max, n_points * 10)
y_true = gen_data(t_test, a, b, c)
y_lsq = gen_data(t_test, *res_lsq.x)
y_lsq2 = gen_data(t_test, *res_lsq2.x)

import matplotlib.pyplot as plt
plt.ion() 
plt.plot(t_train, y_train, 'o')
plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
plt.plot(t_test, y_lsq, label='pure Python')
plt.plot(t_test, y_lsq2, label='C++/Python')

plt.legend()
plt.show()

这是 C++ 代码（到运行 :./a.out 0.5 2 -1）：

#include<iostream> 
#include <stdlib.h>
#include <math.h>
#include <vector>
using std::vector;

vector<double> linspace(double a, double b, int n) {
    vector<double> array;
    double step = (b-a) / (n-1);

    while(a <= b) {
        array.push_back(a);
        a += step;  
    }
    return array;
}

int main(int argc, char** argv) {  // argv values must be ints or floats. For example 0.5, 2.0, -1
  using namespace std;

  vector<double> x = linspace(0, 10, 15);
  vector<double> model;
  vector<double> parameters (3);

  if(argc<4) {
    parameters[0] = 0.5;
    parameters[1] = 2.0;
    parameters[2] = -1;
  }
  else{
    parameters[0] = atof(argv[1]);
    parameters[1] = atof(argv[2]);
    parameters[2] = atof(argv[3]);
  }

  for (int i=0; i<x.size(); i++){
    model.push_back(parameters[0]+ parameters[1] * exp(parameters[2] * x[i]));
    cout << model[i]<< " ";
  }
}

这段代码的结果如下图：

关于如何进行的任何建议？

谢谢

Answer 1

好的，我想我已经确定了我的问题。

输入函数需要是可推导的，fun_python 是，而 fun_cpp 不是，因为它调用外部程序。

为了解决我的问题，可以使用黑盒优化算法，例如：https://github.com/paulknysh/blackbox

上面的示例代码可以修改为：

import numpy as np
from scipy.optimize import least_squares
import csv
import os
import time

def gen_data(t, a, b, c, noise=0, n_outliers=0, random_state=0):
    y = a + b * np.exp(t * c)
    rnd = np.random.RandomState(random_state)
    error = noise * rnd.randn(t.size)
    outliers = rnd.randint(0, t.size, n_outliers)
    error[outliers] *= 10
    return y + error

a = 0.5
b = 2.0
c = -1
t_min = 0
t_max = 10
n_points = 15
t_train = np.linspace(t_min, t_max, n_points)
y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)

def fun_cpp(x):
    global y_train
    a = os.popen("./a.out "+str(x[0])+" "+str(x[1])+" "+str(x[2])).readlines()
    model = [float(i) for i in a[0].split(' ')[:-1]]
    chisquared = np.sum([(y1-y2)**2. for y1,y2 in zip(y_train, model)])
    return chisquared

def fun_python(x, t, y):
    return (x[0] + x[1] * np.exp(x[2] * t)) - y

x0 = np.array([1.0, 1.0, 0.0])

res_lsq = least_squares(fun_python, x0, args=(t_train, y_train))

import blackbox as bb

bb.search(f=fun_cpp,  # given function
              box=[[0., 1.], [0., 3], [-2., 1.]],  # range of values for each parameter
              n=40,  # number of function calls on initial stage (global search)
              m=40,  # number of function calls on subsequent stage (local search)
              batch=4,  # number of calls that will be evaluated in parallel
              resfile='output.csv')  # text file where results will be saved

rowSelect = []
with open('output.csv') as csvfile:
    readCSV = csv.reader(csvfile, delimiter=',')
    for row in readCSV:
        rowSelect.append(row)

bestFitParam = [float(x) for x in rowSelect[1][0:-1]]

t_test = np.linspace(t_min, t_max, n_points * 10)
y_true = gen_data(t_test, a, b, c)
y_lsq = gen_data(t_test, *res_lsq.x)
y_lsq2 = gen_data(t_test, *bestFitParam)

import matplotlib.pyplot as plt
plt.ion() 
plt.plot(t_train, y_train, 'o')
plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
plt.plot(t_test, y_lsq, label='pure Python')
plt.plot(t_test, y_lsq2, label='Black-box C++/Python')

plt.legend()
plt.show()

结果如下图：

黑盒优化的结果更好，但计算时间更长。

scipy.optimize.least_squares 使用来自 Python 的 C++ 程序时没有给出正确的结果

scipy.optimize.least_squares not giving correct result when using C++ program from Python

c++

python

scipy

least-squares