Python: CVXPY SolverError

Python: CVXPY SolverError

目的:我试图在 python 中使用 cvxpy 来最大化 dual_func,但是我得到以下 SolverError,我相信它可能是 Variable 具有不同的维度,但似乎无法弄清楚这个问题。我试过使用其他求解器,如 ECOS,但无济于事。我是凸优化的新手,如果有人能帮助我指出代码中错误的正确方向,那就太好了。

问题:代码在 cvxpy 版本 0.4 中有效,但在最新的 cvxpy 版本中无效,给我错误:

SolverError: Either candidate conic solvers (['CVXOPT']) do not support the
cones output by the problem (SOC, ExpCone, PSD), or there are not enough 
constraints in the problem.

dual_func是凸的,是DCP,所以我还是不知道为什么解不出来

附加问题:是否可以在 cvxpy 中使用严格的不等式约束?文档指出这是不可能的,因为它在现实世界中没有意义,但是,我真的不明白为什么会这样。

from cvxpy import *
alpha_k = Variable()
mu_k_1 = Variable(shape=(int(len(h_undl_list))))
mu_k_2 = Variable(shape=(int(len(h_undl_list))))
covar_k_1 = Variable(shape=(int(len(h_undl_list)), int(len(h_undl_list))), PSD=True)
covar_k_2 = Variable(shape=(int(len(h_undl_list)), int(len(h_undl_list))), PSD=True)

# fitting the first 1000 returns with normal GMM
from sklearn import mixture
clf = mixture.GaussianMixture(n_components=2, max_iter=1000, tol=1e-7).fit(ret_df_period)
clf = mixture.GaussianMixture(n_components=2, max_iter=500, tol=1e-7, weights_init=[0.2, 0.8]).fit(ret_df_period)

print('weights')
print(clf.weights_)

# sorting the values to the normal regime or the stressed regime
if clf.weights_[0] > clf.weights_[1]:
    alpha_k.value = clf.weights_[0]
    mu_k_1.value = clf.means_[0]
    mu_k_2.value = clf.means_[1]
    covar_k_1.value = clf.covariances_[0]
    covar_k_2.value = clf.covariances_[1]
else:
    alpha_k.value = clf.weights_[1]
    mu_k_1.value = clf.means_[1]
    mu_k_2.value = clf.means_[0]
    covar_k_1.value = clf.covariances_[1]
    covar_k_2.value = clf.covariances_[0]
iter_t = 3
N = ret_df_period.shape[0] #time step, e.g. return at t
K = 2

for t_iter in range(iter_t):
    q_k = [alpha_k.value, 1 - alpha_k.value]
    mu_sk_list = [np.array(mu_k_1.value).flatten(), np.array(mu_k_2.value).flatten()]
    covar_sk_list = [covar_k_1.value, covar_k_2.value]
    q_posterior_list = []
    q_posterior_map = {}
    
    
    # E-Step
    import math
    # marginal probability, lambda_k * multivariate_normal.pdf, returns a matrix with K rows, N columns
    mar_prob_list = np.zeros((K,N))
    for k_idx_temp in range(0, K):
        for j_temp in range(0, N):
            mar_prob_list[k_idx_temp, j_temp] = (q_k[k_idx_temp] * multivariate_normal.pdf(ret_df_period.iloc[j_temp].values, mean = mu_sk_list[k_idx_temp], cov = covar_sk_list[k_idx_temp]))
    
    # lambda_k_n, q_n, q_posterior_distribution, returns a matrix with K rows, N columns
    for k_idx in range(0, K):
        for j in range(0, N):
            joint_prob_k = q_k[k_idx] * multivariate_normal.pdf(ret_df_period.iloc[j], mean = mu_sk_list[k_idx], cov = covar_sk_list[k_idx])
            q_posterior = joint_prob_k / np.sum(mar_prob_list[:, j])
            q_posterior_list.append(q_posterior)
        
        q_posterior_map[k_idx] = q_posterior_list
        q_posterior_list = []
    
    
    #####a priori after E-Step, i.e. M-Step
    # Empirical probability, returns the two lambda_sk values
    alpha_sk_list = []
    for k_idx in range(0, K):
        alpha_sk = 0
        for j in range(0, N):
            alpha_sk += q_posterior_map[k_idx][j]
        alpha_sk_list.append(alpha_sk / N)
    alpha_sk_list[1] = 1 - alpha_sk_list[0]
        
    
    # Mean, returns two sets of means
    mu_sk_list = []
    weighted_xs_map = {}
    for k_idx in range(0, K):
        weighted_xs_map[k_idx] = np.empty([int(len(h_undl_list)), N])
    
    for k_idx in range(0, K):
        mu_sk = 0
        for j in range(0, N):
            weighted_x = (q_posterior_map[k_idx][j]) * ret_df_period.iloc[j].values
            mu_sk += weighted_x
            weighted_xs_map[k_idx][:, j] = weighted_x / alpha_sk_list[k_idx] # should divide twice of alpha_sk here? Come back and double check later!!!
        mu_sk_list.append(mu_sk / N / alpha_sk_list[k_idx])
    
    # Cov, returns two cov
    covar_sk_list = []
    for k_idx in range(0, K):
        sum_covar = 0
        for j in range(0, N):
            w0_temp = (ret_df_period.iloc[j] - mu_sk_list[k_idx]).values
            w0_covar = np.outer(w0_temp, w0_temp)
            weighted_covar = (q_posterior_map[k_idx][j]) * w0_covar
            sum_covar += weighted_covar
        sum_covar = sum_covar / (alpha_sk_list[k_idx] * N)
        covar_sk_list.append(sum_covar)


    #####
    #information params
    n_sk_list = []
    for k_idx in range(0, K - 1):
        n_sk = np.log(alpha_sk_list[k_idx] / (1 - (np.sum(alpha_sk_list[: -1]))))
        n_sk_list.append(n_sk)
    
    m_sk_list = []
    for k_idx in range(0, K):
        m_sk = np.dot(np.linalg.inv(covar_sk_list[k_idx]), mu_sk_list[k_idx])
        m_sk_list.append(m_sk)
    
    S_sk_list = []
    for k_idx in range(0, K):
        S_sk = np.linalg.inv(covar_sk_list[k_idx])
        S_sk_list.append(S_sk)
    
    
    alpha_sk_1 = Parameter(nonneg=True)
    alpha_sk_2 = Parameter(nonneg=True)
    n_sk_pr = Parameter()
    dim = Parameter(nonneg=True)
    m_sk_1 = Parameter(shape=(int(len(h_undl_list)),))
    m_sk_2 = Parameter(shape=(int(len(h_undl_list)),))
    alpha_sk_1.value = alpha_sk_list[0]
    alpha_sk_2.value = alpha_sk_list[1]
    n_sk_pr.value = n_sk_list[0]
    m_sk_1.value = m_sk_list[0]
    m_sk_2.value = m_sk_list[1]
    dim.value = int(len(h_undl_list))
    covar_k_tilde = Variable(shape=(int(len(h_undl_list)), int(len(h_undl_list))), PSD=True)
    
    
    dual_func = entr(alpha_k) + entr(1 - alpha_k) + \
    alpha_sk_1*0.5*log_det(covar_k_1) + alpha_sk_1*dim*0.5*log(2*np.e*np.pi) + alpha_sk_2*0.5*log_det(covar_k_1 + covar_k_tilde) + alpha_sk_2*dim*0.5*log(2*np.e*np.pi) + alpha_k*n_sk_pr + \
    alpha_sk_1*(mu_k_1.T)*m_sk_1 - alpha_sk_1*0.5*trace(covar_k_1*S_sk_list[0]) - alpha_sk_1*0.5*quad_form(mu_k_1, S_sk_list[0]) + \
    alpha_sk_2*(mu_k_1.T)*m_sk_2 - alpha_sk_2*0.5*trace((covar_k_1 + covar_k_tilde)*S_sk_list[1]) - alpha_sk_2*0.5*quad_form(mu_k_2, S_sk_list[1])


    prob = Problem(Maximize(dual_func), [alpha_k == 0.96, mu_k_1 >= 0, mu_k_2 - mu_k_1 <= 0, mu_k_2 >= - mu_k_1, mu_k_2 <= 0])
    
    print(prob.solve(solver=CVXOPT, verbose=True, kktsolver='robust', max_iters=1000))
    print(t_iter)
    covar_k_2.value = covar_k_1.value + covar_k_tilde.value
    print(is_pos_def(covar_k_1.value))
    print(is_pos_def(covar_k_tilde.value))
    print(is_pos_def(covar_k_2.value))
    
    covar_k_1_temp = np.array(covar_k_1.value)
    covar_k_2_temp = np.array(covar_k_2.value)
    covar_k_tilde_temp = np.array(covar_k_tilde.value)
    mu_k_1_temp = np.array(mu_k_1.value).flatten()
    mu_k_2_temp = np.array(mu_k_2.value).flatten()
    D_1 = np.diag(np.sqrt(np.diag(covar_k_1_temp)))
    correl_k_1 = np.dot(np.dot(np.linalg.inv(D_1), covar_k_1_temp), np.linalg.inv(D_1))
    D2 = np.diag(np.sqrt(np.diag(covar_k_2_temp)))
    correl_k_2 = np.dot(np.dot(np.linalg.inv(D_2), covar_k_2_temp), np.linalg.inv(D_2))

错误:

---------------------------------------------------------------------------
SolverError                               Traceback (most recent call last)
<ipython-input-28-1e1e9c7b0d73> in <module>
    109     prob = Problem(Maximize(dual_func), [alpha_k == 0.96, mu_k_1 >= 0, mu_k_2 - mu_k_1 <= 0, mu_k_2 >= - mu_k_1, mu_k_2 <= 0])
    110 
--> 111     print(prob.solve(solver=CVXOPT, verbose=True, kktsolver='robust', max_iters=1000))
    112     print(t_iter)
    113     covar_k_2.value = covar_k_1.value + covar_k_tilde.value

~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in solve(self, *args, **kwargs)
    287         else:
    288             solve_func = Problem._solve
--> 289         return solve_func(self, *args, **kwargs)
    290 
    291     @classmethod

~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _solve(self, solver, warm_start, verbose, parallel, gp, qcp, **kwargs)
    565                     solver, warm_start, verbose, **kwargs)
    566 
--> 567         self._construct_chains(solver=solver, gp=gp)
    568         data, solving_inverse_data = self._solving_chain.apply(
    569             self._intermediate_problem)

~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _construct_chains(self, solver, gp)
    508 
    509             except Exception as e:
--> 510                 raise e
    511 
    512     def _solve(self,

~\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _construct_chains(self, solver, gp)
    503                 self._solving_chain = \
    504                     construct_solving_chain(self._intermediate_problem,
--> 505                                             candidate_solvers)
    506 
    507                 self._cached_chain_key = chain_key

~\Anaconda3\lib\site-packages\cvxpy\reductions\solvers\solving_chain.py in construct_solving_chain(problem, candidates)
     93                       "enough constraints in the problem." % (
     94                           candidates['conic_solvers'],
---> 95                           ", ".join([cone.__name__ for cone in cones])))
     96 
     97 

SolverError: Either candidate conic solvers (['CVXOPT']) do not support the cones output by the problem (SOC, ExpCone, PSD), or there are not enough constraints in the problem.

代码引用自:https://www.maths.ox.ac.uk/system/files/attachments/TT18_dissertation_Vu_0.pdf

您的 objective 包含 PSD 矩阵,log_det 和 entr,因此您需要一个支持半定锥和指数锥的求解器。据我所知,只有 MOSEK 9 和 SCS 可以在 CVXPY 中为您完成这两项工作。

严格的不等式没有实际意义,因为浮点运算的精度和未达到的问题。考虑最小化 x 服从 x>0。具有严格不等式的问题往往在极限中有最佳点,这意味着您无论如何都在合并非严格闭包。