Python: CVXPY SolverError
Python: CVXPY SolverError
目的:我试图在 python 中使用 cvxpy 来最大化 dual_func
,但是我得到以下 SolverError,我相信它可能是 Variable
具有不同的维度,但似乎无法弄清楚这个问题。我试过使用其他求解器,如 ECOS,但无济于事。我是凸优化的新手,如果有人能帮助我指出代码中错误的正确方向,那就太好了。
问题:代码在 cvxpy 版本 0.4 中有效,但在最新的 cvxpy 版本中无效,给我错误:
SolverError: Either candidate conic solvers (['CVXOPT']) do not support the
cones output by the problem (SOC, ExpCone, PSD), or there are not enough
constraints in the problem.
dual_func
是凸的,是DCP,所以我还是不知道为什么解不出来
附加问题:是否可以在 cvxpy 中使用严格的不等式约束?文档指出这是不可能的,因为它在现实世界中没有意义,但是,我真的不明白为什么会这样。
from cvxpy import *
alpha_k = Variable()
mu_k_1 = Variable(shape=(int(len(h_undl_list))))
mu_k_2 = Variable(shape=(int(len(h_undl_list))))
covar_k_1 = Variable(shape=(int(len(h_undl_list)), int(len(h_undl_list))), PSD=True)
covar_k_2 = Variable(shape=(int(len(h_undl_list)), int(len(h_undl_list))), PSD=True)
# fitting the first 1000 returns with normal GMM
from sklearn import mixture
clf = mixture.GaussianMixture(n_components=2, max_iter=1000, tol=1e-7).fit(ret_df_period)
clf = mixture.GaussianMixture(n_components=2, max_iter=500, tol=1e-7, weights_init=[0.2, 0.8]).fit(ret_df_period)
print('weights')
print(clf.weights_)
# sorting the values to the normal regime or the stressed regime
if clf.weights_[0] > clf.weights_[1]:
alpha_k.value = clf.weights_[0]
mu_k_1.value = clf.means_[0]
mu_k_2.value = clf.means_[1]
covar_k_1.value = clf.covariances_[0]
covar_k_2.value = clf.covariances_[1]
else:
alpha_k.value = clf.weights_[1]
mu_k_1.value = clf.means_[1]
mu_k_2.value = clf.means_[0]
covar_k_1.value = clf.covariances_[1]
covar_k_2.value = clf.covariances_[0]
iter_t = 3
N = ret_df_period.shape[0] #time step, e.g. return at t
K = 2
for t_iter in range(iter_t):
q_k = [alpha_k.value, 1 - alpha_k.value]
mu_sk_list = [np.array(mu_k_1.value).flatten(), np.array(mu_k_2.value).flatten()]
covar_sk_list = [covar_k_1.value, covar_k_2.value]
q_posterior_list = []
q_posterior_map = {}
# E-Step
import math
# marginal probability, lambda_k * multivariate_normal.pdf, returns a matrix with K rows, N columns
mar_prob_list = np.zeros((K,N))
for k_idx_temp in range(0, K):
for j_temp in range(0, N):
mar_prob_list[k_idx_temp, j_temp] = (q_k[k_idx_temp] * multivariate_normal.pdf(ret_df_period.iloc[j_temp].values, mean = mu_sk_list[k_idx_temp], cov = covar_sk_list[k_idx_temp]))
# lambda_k_n, q_n, q_posterior_distribution, returns a matrix with K rows, N columns
for k_idx in range(0, K):
for j in range(0, N):
joint_prob_k = q_k[k_idx] * multivariate_normal.pdf(ret_df_period.iloc[j], mean = mu_sk_list[k_idx], cov = covar_sk_list[k_idx])
q_posterior = joint_prob_k / np.sum(mar_prob_list[:, j])
q_posterior_list.append(q_posterior)
q_posterior_map[k_idx] = q_posterior_list
q_posterior_list = []
#####a priori after E-Step, i.e. M-Step
# Empirical probability, returns the two lambda_sk values
alpha_sk_list = []
for k_idx in range(0, K):
alpha_sk = 0
for j in range(0, N):
alpha_sk += q_posterior_map[k_idx][j]
alpha_sk_list.append(alpha_sk / N)
alpha_sk_list[1] = 1 - alpha_sk_list[0]
# Mean, returns two sets of means
mu_sk_list = []
weighted_xs_map = {}
for k_idx in range(0, K):
weighted_xs_map[k_idx] = np.empty([int(len(h_undl_list)), N])
for k_idx in range(0, K):
mu_sk = 0
for j in range(0, N):
weighted_x = (q_posterior_map[k_idx][j]) * ret_df_period.iloc[j].values
mu_sk += weighted_x
weighted_xs_map[k_idx][:, j] = weighted_x / alpha_sk_list[k_idx] # should divide twice of alpha_sk here? Come back and double check later!!!
mu_sk_list.append(mu_sk / N / alpha_sk_list[k_idx])
# Cov, returns two cov
covar_sk_list = []
for k_idx in range(0, K):
sum_covar = 0
for j in range(0, N):
w0_temp = (ret_df_period.iloc[j] - mu_sk_list[k_idx]).values
w0_covar = np.outer(w0_temp, w0_temp)
weighted_covar = (q_posterior_map[k_idx][j]) * w0_covar
sum_covar += weighted_covar
sum_covar = sum_covar / (alpha_sk_list[k_idx] * N)
covar_sk_list.append(sum_covar)
#####
#information params
n_sk_list = []
for k_idx in range(0, K - 1):
n_sk = np.log(alpha_sk_list[k_idx] / (1 - (np.sum(alpha_sk_list[: -1]))))
n_sk_list.append(n_sk)
m_sk_list = []
for k_idx in range(0, K):
m_sk = np.dot(np.linalg.inv(covar_sk_list[k_idx]), mu_sk_list[k_idx])
m_sk_list.append(m_sk)
S_sk_list = []
for k_idx in range(0, K):
S_sk = np.linalg.inv(covar_sk_list[k_idx])
S_sk_list.append(S_sk)
alpha_sk_1 = Parameter(nonneg=True)
alpha_sk_2 = Parameter(nonneg=True)
n_sk_pr = Parameter()
dim = Parameter(nonneg=True)
m_sk_1 = Parameter(shape=(int(len(h_undl_list)),))
m_sk_2 = Parameter(shape=(int(len(h_undl_list)),))
alpha_sk_1.value = alpha_sk_list[0]
alpha_sk_2.value = alpha_sk_list[1]
n_sk_pr.value = n_sk_list[0]
m_sk_1.value = m_sk_list[0]
m_sk_2.value = m_sk_list[1]
dim.value = int(len(h_undl_list))
covar_k_tilde = Variable(shape=(int(len(h_undl_list)), int(len(h_undl_list))), PSD=True)
dual_func = entr(alpha_k) + entr(1 - alpha_k) + \
alpha_sk_1*0.5*log_det(covar_k_1) + alpha_sk_1*dim*0.5*log(2*np.e*np.pi) + alpha_sk_2*0.5*log_det(covar_k_1 + covar_k_tilde) + alpha_sk_2*dim*0.5*log(2*np.e*np.pi) + alpha_k*n_sk_pr + \
alpha_sk_1*(mu_k_1.T)*m_sk_1 - alpha_sk_1*0.5*trace(covar_k_1*S_sk_list[0]) - alpha_sk_1*0.5*quad_form(mu_k_1, S_sk_list[0]) + \
alpha_sk_2*(mu_k_1.T)*m_sk_2 - alpha_sk_2*0.5*trace((covar_k_1 + covar_k_tilde)*S_sk_list[1]) - alpha_sk_2*0.5*quad_form(mu_k_2, S_sk_list[1])
prob = Problem(Maximize(dual_func), [alpha_k == 0.96, mu_k_1 >= 0, mu_k_2 - mu_k_1 <= 0, mu_k_2 >= - mu_k_1, mu_k_2 <= 0])
print(prob.solve(solver=CVXOPT, verbose=True, kktsolver='robust', max_iters=1000))
print(t_iter)
covar_k_2.value = covar_k_1.value + covar_k_tilde.value
print(is_pos_def(covar_k_1.value))
print(is_pos_def(covar_k_tilde.value))
print(is_pos_def(covar_k_2.value))
covar_k_1_temp = np.array(covar_k_1.value)
covar_k_2_temp = np.array(covar_k_2.value)
covar_k_tilde_temp = np.array(covar_k_tilde.value)
mu_k_1_temp = np.array(mu_k_1.value).flatten()
mu_k_2_temp = np.array(mu_k_2.value).flatten()
D_1 = np.diag(np.sqrt(np.diag(covar_k_1_temp)))
correl_k_1 = np.dot(np.dot(np.linalg.inv(D_1), covar_k_1_temp), np.linalg.inv(D_1))
D2 = np.diag(np.sqrt(np.diag(covar_k_2_temp)))
correl_k_2 = np.dot(np.dot(np.linalg.inv(D_2), covar_k_2_temp), np.linalg.inv(D_2))
错误:
---------------------------------------------------------------------------
SolverError Traceback (most recent call last)
<ipython-input-28-1e1e9c7b0d73> in <module>
109 prob = Problem(Maximize(dual_func), [alpha_k == 0.96, mu_k_1 >= 0, mu_k_2 - mu_k_1 <= 0, mu_k_2 >= - mu_k_1, mu_k_2 <= 0])
110
--> 111 print(prob.solve(solver=CVXOPT, verbose=True, kktsolver='robust', max_iters=1000))
112 print(t_iter)
113 covar_k_2.value = covar_k_1.value + covar_k_tilde.value
~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in solve(self, *args, **kwargs)
287 else:
288 solve_func = Problem._solve
--> 289 return solve_func(self, *args, **kwargs)
290
291 @classmethod
~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _solve(self, solver, warm_start, verbose, parallel, gp, qcp, **kwargs)
565 solver, warm_start, verbose, **kwargs)
566
--> 567 self._construct_chains(solver=solver, gp=gp)
568 data, solving_inverse_data = self._solving_chain.apply(
569 self._intermediate_problem)
~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _construct_chains(self, solver, gp)
508
509 except Exception as e:
--> 510 raise e
511
512 def _solve(self,
~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _construct_chains(self, solver, gp)
503 self._solving_chain = \
504 construct_solving_chain(self._intermediate_problem,
--> 505 candidate_solvers)
506
507 self._cached_chain_key = chain_key
~\Anaconda3\lib\site-packages\cvxpy\reductions\solvers\solving_chain.py in construct_solving_chain(problem, candidates)
93 "enough constraints in the problem." % (
94 candidates['conic_solvers'],
---> 95 ", ".join([cone.__name__ for cone in cones])))
96
97
SolverError: Either candidate conic solvers (['CVXOPT']) do not support the cones output by the problem (SOC, ExpCone, PSD), or there are not enough constraints in the problem.
代码引用自:https://www.maths.ox.ac.uk/system/files/attachments/TT18_dissertation_Vu_0.pdf
您的 objective 包含 PSD 矩阵,log_det 和 entr,因此您需要一个支持半定锥和指数锥的求解器。据我所知,只有 MOSEK 9 和 SCS 可以在 CVXPY 中为您完成这两项工作。
严格的不等式没有实际意义,因为浮点运算的精度和未达到的问题。考虑最小化 x 服从 x>0。具有严格不等式的问题往往在极限中有最佳点,这意味着您无论如何都在合并非严格闭包。
目的:我试图在 python 中使用 cvxpy 来最大化 dual_func
,但是我得到以下 SolverError,我相信它可能是 Variable
具有不同的维度,但似乎无法弄清楚这个问题。我试过使用其他求解器,如 ECOS,但无济于事。我是凸优化的新手,如果有人能帮助我指出代码中错误的正确方向,那就太好了。
问题:代码在 cvxpy 版本 0.4 中有效,但在最新的 cvxpy 版本中无效,给我错误:
SolverError: Either candidate conic solvers (['CVXOPT']) do not support the
cones output by the problem (SOC, ExpCone, PSD), or there are not enough
constraints in the problem.
dual_func
是凸的,是DCP,所以我还是不知道为什么解不出来
附加问题:是否可以在 cvxpy 中使用严格的不等式约束?文档指出这是不可能的,因为它在现实世界中没有意义,但是,我真的不明白为什么会这样。
from cvxpy import *
alpha_k = Variable()
mu_k_1 = Variable(shape=(int(len(h_undl_list))))
mu_k_2 = Variable(shape=(int(len(h_undl_list))))
covar_k_1 = Variable(shape=(int(len(h_undl_list)), int(len(h_undl_list))), PSD=True)
covar_k_2 = Variable(shape=(int(len(h_undl_list)), int(len(h_undl_list))), PSD=True)
# fitting the first 1000 returns with normal GMM
from sklearn import mixture
clf = mixture.GaussianMixture(n_components=2, max_iter=1000, tol=1e-7).fit(ret_df_period)
clf = mixture.GaussianMixture(n_components=2, max_iter=500, tol=1e-7, weights_init=[0.2, 0.8]).fit(ret_df_period)
print('weights')
print(clf.weights_)
# sorting the values to the normal regime or the stressed regime
if clf.weights_[0] > clf.weights_[1]:
alpha_k.value = clf.weights_[0]
mu_k_1.value = clf.means_[0]
mu_k_2.value = clf.means_[1]
covar_k_1.value = clf.covariances_[0]
covar_k_2.value = clf.covariances_[1]
else:
alpha_k.value = clf.weights_[1]
mu_k_1.value = clf.means_[1]
mu_k_2.value = clf.means_[0]
covar_k_1.value = clf.covariances_[1]
covar_k_2.value = clf.covariances_[0]
iter_t = 3
N = ret_df_period.shape[0] #time step, e.g. return at t
K = 2
for t_iter in range(iter_t):
q_k = [alpha_k.value, 1 - alpha_k.value]
mu_sk_list = [np.array(mu_k_1.value).flatten(), np.array(mu_k_2.value).flatten()]
covar_sk_list = [covar_k_1.value, covar_k_2.value]
q_posterior_list = []
q_posterior_map = {}
# E-Step
import math
# marginal probability, lambda_k * multivariate_normal.pdf, returns a matrix with K rows, N columns
mar_prob_list = np.zeros((K,N))
for k_idx_temp in range(0, K):
for j_temp in range(0, N):
mar_prob_list[k_idx_temp, j_temp] = (q_k[k_idx_temp] * multivariate_normal.pdf(ret_df_period.iloc[j_temp].values, mean = mu_sk_list[k_idx_temp], cov = covar_sk_list[k_idx_temp]))
# lambda_k_n, q_n, q_posterior_distribution, returns a matrix with K rows, N columns
for k_idx in range(0, K):
for j in range(0, N):
joint_prob_k = q_k[k_idx] * multivariate_normal.pdf(ret_df_period.iloc[j], mean = mu_sk_list[k_idx], cov = covar_sk_list[k_idx])
q_posterior = joint_prob_k / np.sum(mar_prob_list[:, j])
q_posterior_list.append(q_posterior)
q_posterior_map[k_idx] = q_posterior_list
q_posterior_list = []
#####a priori after E-Step, i.e. M-Step
# Empirical probability, returns the two lambda_sk values
alpha_sk_list = []
for k_idx in range(0, K):
alpha_sk = 0
for j in range(0, N):
alpha_sk += q_posterior_map[k_idx][j]
alpha_sk_list.append(alpha_sk / N)
alpha_sk_list[1] = 1 - alpha_sk_list[0]
# Mean, returns two sets of means
mu_sk_list = []
weighted_xs_map = {}
for k_idx in range(0, K):
weighted_xs_map[k_idx] = np.empty([int(len(h_undl_list)), N])
for k_idx in range(0, K):
mu_sk = 0
for j in range(0, N):
weighted_x = (q_posterior_map[k_idx][j]) * ret_df_period.iloc[j].values
mu_sk += weighted_x
weighted_xs_map[k_idx][:, j] = weighted_x / alpha_sk_list[k_idx] # should divide twice of alpha_sk here? Come back and double check later!!!
mu_sk_list.append(mu_sk / N / alpha_sk_list[k_idx])
# Cov, returns two cov
covar_sk_list = []
for k_idx in range(0, K):
sum_covar = 0
for j in range(0, N):
w0_temp = (ret_df_period.iloc[j] - mu_sk_list[k_idx]).values
w0_covar = np.outer(w0_temp, w0_temp)
weighted_covar = (q_posterior_map[k_idx][j]) * w0_covar
sum_covar += weighted_covar
sum_covar = sum_covar / (alpha_sk_list[k_idx] * N)
covar_sk_list.append(sum_covar)
#####
#information params
n_sk_list = []
for k_idx in range(0, K - 1):
n_sk = np.log(alpha_sk_list[k_idx] / (1 - (np.sum(alpha_sk_list[: -1]))))
n_sk_list.append(n_sk)
m_sk_list = []
for k_idx in range(0, K):
m_sk = np.dot(np.linalg.inv(covar_sk_list[k_idx]), mu_sk_list[k_idx])
m_sk_list.append(m_sk)
S_sk_list = []
for k_idx in range(0, K):
S_sk = np.linalg.inv(covar_sk_list[k_idx])
S_sk_list.append(S_sk)
alpha_sk_1 = Parameter(nonneg=True)
alpha_sk_2 = Parameter(nonneg=True)
n_sk_pr = Parameter()
dim = Parameter(nonneg=True)
m_sk_1 = Parameter(shape=(int(len(h_undl_list)),))
m_sk_2 = Parameter(shape=(int(len(h_undl_list)),))
alpha_sk_1.value = alpha_sk_list[0]
alpha_sk_2.value = alpha_sk_list[1]
n_sk_pr.value = n_sk_list[0]
m_sk_1.value = m_sk_list[0]
m_sk_2.value = m_sk_list[1]
dim.value = int(len(h_undl_list))
covar_k_tilde = Variable(shape=(int(len(h_undl_list)), int(len(h_undl_list))), PSD=True)
dual_func = entr(alpha_k) + entr(1 - alpha_k) + \
alpha_sk_1*0.5*log_det(covar_k_1) + alpha_sk_1*dim*0.5*log(2*np.e*np.pi) + alpha_sk_2*0.5*log_det(covar_k_1 + covar_k_tilde) + alpha_sk_2*dim*0.5*log(2*np.e*np.pi) + alpha_k*n_sk_pr + \
alpha_sk_1*(mu_k_1.T)*m_sk_1 - alpha_sk_1*0.5*trace(covar_k_1*S_sk_list[0]) - alpha_sk_1*0.5*quad_form(mu_k_1, S_sk_list[0]) + \
alpha_sk_2*(mu_k_1.T)*m_sk_2 - alpha_sk_2*0.5*trace((covar_k_1 + covar_k_tilde)*S_sk_list[1]) - alpha_sk_2*0.5*quad_form(mu_k_2, S_sk_list[1])
prob = Problem(Maximize(dual_func), [alpha_k == 0.96, mu_k_1 >= 0, mu_k_2 - mu_k_1 <= 0, mu_k_2 >= - mu_k_1, mu_k_2 <= 0])
print(prob.solve(solver=CVXOPT, verbose=True, kktsolver='robust', max_iters=1000))
print(t_iter)
covar_k_2.value = covar_k_1.value + covar_k_tilde.value
print(is_pos_def(covar_k_1.value))
print(is_pos_def(covar_k_tilde.value))
print(is_pos_def(covar_k_2.value))
covar_k_1_temp = np.array(covar_k_1.value)
covar_k_2_temp = np.array(covar_k_2.value)
covar_k_tilde_temp = np.array(covar_k_tilde.value)
mu_k_1_temp = np.array(mu_k_1.value).flatten()
mu_k_2_temp = np.array(mu_k_2.value).flatten()
D_1 = np.diag(np.sqrt(np.diag(covar_k_1_temp)))
correl_k_1 = np.dot(np.dot(np.linalg.inv(D_1), covar_k_1_temp), np.linalg.inv(D_1))
D2 = np.diag(np.sqrt(np.diag(covar_k_2_temp)))
correl_k_2 = np.dot(np.dot(np.linalg.inv(D_2), covar_k_2_temp), np.linalg.inv(D_2))
错误:
---------------------------------------------------------------------------
SolverError Traceback (most recent call last)
<ipython-input-28-1e1e9c7b0d73> in <module>
109 prob = Problem(Maximize(dual_func), [alpha_k == 0.96, mu_k_1 >= 0, mu_k_2 - mu_k_1 <= 0, mu_k_2 >= - mu_k_1, mu_k_2 <= 0])
110
--> 111 print(prob.solve(solver=CVXOPT, verbose=True, kktsolver='robust', max_iters=1000))
112 print(t_iter)
113 covar_k_2.value = covar_k_1.value + covar_k_tilde.value
~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in solve(self, *args, **kwargs)
287 else:
288 solve_func = Problem._solve
--> 289 return solve_func(self, *args, **kwargs)
290
291 @classmethod
~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _solve(self, solver, warm_start, verbose, parallel, gp, qcp, **kwargs)
565 solver, warm_start, verbose, **kwargs)
566
--> 567 self._construct_chains(solver=solver, gp=gp)
568 data, solving_inverse_data = self._solving_chain.apply(
569 self._intermediate_problem)
~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _construct_chains(self, solver, gp)
508
509 except Exception as e:
--> 510 raise e
511
512 def _solve(self,
~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _construct_chains(self, solver, gp)
503 self._solving_chain = \
504 construct_solving_chain(self._intermediate_problem,
--> 505 candidate_solvers)
506
507 self._cached_chain_key = chain_key
~\Anaconda3\lib\site-packages\cvxpy\reductions\solvers\solving_chain.py in construct_solving_chain(problem, candidates)
93 "enough constraints in the problem." % (
94 candidates['conic_solvers'],
---> 95 ", ".join([cone.__name__ for cone in cones])))
96
97
SolverError: Either candidate conic solvers (['CVXOPT']) do not support the cones output by the problem (SOC, ExpCone, PSD), or there are not enough constraints in the problem.
代码引用自:https://www.maths.ox.ac.uk/system/files/attachments/TT18_dissertation_Vu_0.pdf
您的 objective 包含 PSD 矩阵,log_det 和 entr,因此您需要一个支持半定锥和指数锥的求解器。据我所知,只有 MOSEK 9 和 SCS 可以在 CVXPY 中为您完成这两项工作。
严格的不等式没有实际意义,因为浮点运算的精度和未达到的问题。考虑最小化 x 服从 x>0。具有严格不等式的问题往往在极限中有最佳点,这意味着您无论如何都在合并非严格闭包。