scipy.optimize.least_squares 使用来自 Python 的 C++ 程序时没有给出正确的结果
scipy.optimize.least_squares not giving correct result when using C++ program from Python
我正在尝试使用 scipy.optimize.least_squares
函数来最小化从 C++ 代码生成的模型。
这是一个演示问题的最小示例 (taken from the Scipy docs)。
我使用了两个函数,一个是纯函数 Python,另一个调用 C++ 代码来生成模型。两个函数 return 在测试时结果相同,但在 scipy.optimize.least_squares
.
中使用时结果不同
import numpy as np
from scipy.optimize import least_squares
import csv
import os
import time
def gen_data(t, a, b, c, noise=0, n_outliers=0, random_state=0):
y = a + b * np.exp(t * c)
rnd = np.random.RandomState(random_state)
error = noise * rnd.randn(t.size)
outliers = rnd.randint(0, t.size, n_outliers)
error[outliers] *= 10
return y + error
a = 0.5
b = 2.0
c = -1
t_min = 0
t_max = 10
n_points = 15
t_train = np.linspace(t_min, t_max, n_points)
y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)
def fun_cpp(x, t, y):
a = os.popen("./a.out "+str(x[0])+" "+str(x[1])+" "+str(x[2])).readlines()
model = [float(i) for i in a[0].split(' ')[:-1]]
return model - y
def fun_python(x, t, y):
return (x[0] + x[1] * np.exp(x[2] * t)) - y
x0 = np.array([1.0, 1.0, 0.0])
res_lsq = least_squares(fun_python, x0, args=(t_train, y_train), verbose = 2)
res_lsq2 = least_squares(fun_cpp, x0, args=(t_train, y_train), verbose = 2)
t_test = np.linspace(t_min, t_max, n_points * 10)
y_true = gen_data(t_test, a, b, c)
y_lsq = gen_data(t_test, *res_lsq.x)
y_lsq2 = gen_data(t_test, *res_lsq2.x)
import matplotlib.pyplot as plt
plt.ion()
plt.plot(t_train, y_train, 'o')
plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
plt.plot(t_test, y_lsq, label='pure Python')
plt.plot(t_test, y_lsq2, label='C++/Python')
plt.legend()
plt.show()
这是 C++ 代码(到 运行 :./a.out 0.5 2 -1
):
#include<iostream>
#include <stdlib.h>
#include <math.h>
#include <vector>
using std::vector;
vector<double> linspace(double a, double b, int n) {
vector<double> array;
double step = (b-a) / (n-1);
while(a <= b) {
array.push_back(a);
a += step;
}
return array;
}
int main(int argc, char** argv) { // argv values must be ints or floats. For example 0.5, 2.0, -1
using namespace std;
vector<double> x = linspace(0, 10, 15);
vector<double> model;
vector<double> parameters (3);
if(argc<4) {
parameters[0] = 0.5;
parameters[1] = 2.0;
parameters[2] = -1;
}
else{
parameters[0] = atof(argv[1]);
parameters[1] = atof(argv[2]);
parameters[2] = atof(argv[3]);
}
for (int i=0; i<x.size(); i++){
model.push_back(parameters[0]+ parameters[1] * exp(parameters[2] * x[i]));
cout << model[i]<< " ";
}
}
这段代码的结果如下图:
关于如何进行的任何建议?
谢谢
好的,我想我已经确定了我的问题。
输入函数需要是可推导的,fun_python
是,而 fun_cpp
不是,因为它调用外部程序。
为了解决我的问题,可以使用黑盒优化算法,例如:https://github.com/paulknysh/blackbox
上面的示例代码可以修改为:
import numpy as np
from scipy.optimize import least_squares
import csv
import os
import time
def gen_data(t, a, b, c, noise=0, n_outliers=0, random_state=0):
y = a + b * np.exp(t * c)
rnd = np.random.RandomState(random_state)
error = noise * rnd.randn(t.size)
outliers = rnd.randint(0, t.size, n_outliers)
error[outliers] *= 10
return y + error
a = 0.5
b = 2.0
c = -1
t_min = 0
t_max = 10
n_points = 15
t_train = np.linspace(t_min, t_max, n_points)
y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)
def fun_cpp(x):
global y_train
a = os.popen("./a.out "+str(x[0])+" "+str(x[1])+" "+str(x[2])).readlines()
model = [float(i) for i in a[0].split(' ')[:-1]]
chisquared = np.sum([(y1-y2)**2. for y1,y2 in zip(y_train, model)])
return chisquared
def fun_python(x, t, y):
return (x[0] + x[1] * np.exp(x[2] * t)) - y
x0 = np.array([1.0, 1.0, 0.0])
res_lsq = least_squares(fun_python, x0, args=(t_train, y_train))
import blackbox as bb
bb.search(f=fun_cpp, # given function
box=[[0., 1.], [0., 3], [-2., 1.]], # range of values for each parameter
n=40, # number of function calls on initial stage (global search)
m=40, # number of function calls on subsequent stage (local search)
batch=4, # number of calls that will be evaluated in parallel
resfile='output.csv') # text file where results will be saved
rowSelect = []
with open('output.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
for row in readCSV:
rowSelect.append(row)
bestFitParam = [float(x) for x in rowSelect[1][0:-1]]
t_test = np.linspace(t_min, t_max, n_points * 10)
y_true = gen_data(t_test, a, b, c)
y_lsq = gen_data(t_test, *res_lsq.x)
y_lsq2 = gen_data(t_test, *bestFitParam)
import matplotlib.pyplot as plt
plt.ion()
plt.plot(t_train, y_train, 'o')
plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
plt.plot(t_test, y_lsq, label='pure Python')
plt.plot(t_test, y_lsq2, label='Black-box C++/Python')
plt.legend()
plt.show()
结果如下图:
黑盒优化的结果更好,但计算时间更长。
我正在尝试使用 scipy.optimize.least_squares
函数来最小化从 C++ 代码生成的模型。
这是一个演示问题的最小示例 (taken from the Scipy docs)。
我使用了两个函数,一个是纯函数 Python,另一个调用 C++ 代码来生成模型。两个函数 return 在测试时结果相同,但在 scipy.optimize.least_squares
.
import numpy as np
from scipy.optimize import least_squares
import csv
import os
import time
def gen_data(t, a, b, c, noise=0, n_outliers=0, random_state=0):
y = a + b * np.exp(t * c)
rnd = np.random.RandomState(random_state)
error = noise * rnd.randn(t.size)
outliers = rnd.randint(0, t.size, n_outliers)
error[outliers] *= 10
return y + error
a = 0.5
b = 2.0
c = -1
t_min = 0
t_max = 10
n_points = 15
t_train = np.linspace(t_min, t_max, n_points)
y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)
def fun_cpp(x, t, y):
a = os.popen("./a.out "+str(x[0])+" "+str(x[1])+" "+str(x[2])).readlines()
model = [float(i) for i in a[0].split(' ')[:-1]]
return model - y
def fun_python(x, t, y):
return (x[0] + x[1] * np.exp(x[2] * t)) - y
x0 = np.array([1.0, 1.0, 0.0])
res_lsq = least_squares(fun_python, x0, args=(t_train, y_train), verbose = 2)
res_lsq2 = least_squares(fun_cpp, x0, args=(t_train, y_train), verbose = 2)
t_test = np.linspace(t_min, t_max, n_points * 10)
y_true = gen_data(t_test, a, b, c)
y_lsq = gen_data(t_test, *res_lsq.x)
y_lsq2 = gen_data(t_test, *res_lsq2.x)
import matplotlib.pyplot as plt
plt.ion()
plt.plot(t_train, y_train, 'o')
plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
plt.plot(t_test, y_lsq, label='pure Python')
plt.plot(t_test, y_lsq2, label='C++/Python')
plt.legend()
plt.show()
这是 C++ 代码(到 运行 :./a.out 0.5 2 -1
):
#include<iostream>
#include <stdlib.h>
#include <math.h>
#include <vector>
using std::vector;
vector<double> linspace(double a, double b, int n) {
vector<double> array;
double step = (b-a) / (n-1);
while(a <= b) {
array.push_back(a);
a += step;
}
return array;
}
int main(int argc, char** argv) { // argv values must be ints or floats. For example 0.5, 2.0, -1
using namespace std;
vector<double> x = linspace(0, 10, 15);
vector<double> model;
vector<double> parameters (3);
if(argc<4) {
parameters[0] = 0.5;
parameters[1] = 2.0;
parameters[2] = -1;
}
else{
parameters[0] = atof(argv[1]);
parameters[1] = atof(argv[2]);
parameters[2] = atof(argv[3]);
}
for (int i=0; i<x.size(); i++){
model.push_back(parameters[0]+ parameters[1] * exp(parameters[2] * x[i]));
cout << model[i]<< " ";
}
}
这段代码的结果如下图:
关于如何进行的任何建议?
谢谢
好的,我想我已经确定了我的问题。
输入函数需要是可推导的,fun_python
是,而 fun_cpp
不是,因为它调用外部程序。
为了解决我的问题,可以使用黑盒优化算法,例如:https://github.com/paulknysh/blackbox
上面的示例代码可以修改为:
import numpy as np
from scipy.optimize import least_squares
import csv
import os
import time
def gen_data(t, a, b, c, noise=0, n_outliers=0, random_state=0):
y = a + b * np.exp(t * c)
rnd = np.random.RandomState(random_state)
error = noise * rnd.randn(t.size)
outliers = rnd.randint(0, t.size, n_outliers)
error[outliers] *= 10
return y + error
a = 0.5
b = 2.0
c = -1
t_min = 0
t_max = 10
n_points = 15
t_train = np.linspace(t_min, t_max, n_points)
y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3)
def fun_cpp(x):
global y_train
a = os.popen("./a.out "+str(x[0])+" "+str(x[1])+" "+str(x[2])).readlines()
model = [float(i) for i in a[0].split(' ')[:-1]]
chisquared = np.sum([(y1-y2)**2. for y1,y2 in zip(y_train, model)])
return chisquared
def fun_python(x, t, y):
return (x[0] + x[1] * np.exp(x[2] * t)) - y
x0 = np.array([1.0, 1.0, 0.0])
res_lsq = least_squares(fun_python, x0, args=(t_train, y_train))
import blackbox as bb
bb.search(f=fun_cpp, # given function
box=[[0., 1.], [0., 3], [-2., 1.]], # range of values for each parameter
n=40, # number of function calls on initial stage (global search)
m=40, # number of function calls on subsequent stage (local search)
batch=4, # number of calls that will be evaluated in parallel
resfile='output.csv') # text file where results will be saved
rowSelect = []
with open('output.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
for row in readCSV:
rowSelect.append(row)
bestFitParam = [float(x) for x in rowSelect[1][0:-1]]
t_test = np.linspace(t_min, t_max, n_points * 10)
y_true = gen_data(t_test, a, b, c)
y_lsq = gen_data(t_test, *res_lsq.x)
y_lsq2 = gen_data(t_test, *bestFitParam)
import matplotlib.pyplot as plt
plt.ion()
plt.plot(t_train, y_train, 'o')
plt.plot(t_test, y_true, 'k', linewidth=2, label='true')
plt.plot(t_test, y_lsq, label='pure Python')
plt.plot(t_test, y_lsq2, label='Black-box C++/Python')
plt.legend()
plt.show()
结果如下图:
黑盒优化的结果更好,但计算时间更长。