How to fix the TypeError: G must be a 'd' matrix?

Question

Objective：尝试通过优化过程运行玩具数据集。

我遇到以下错误：

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-16-2706eba23671> in <module>()
     50 # Solving the problem
     51 problem = cvxpy.Problem(cvxpy.Minimize(cost), constraints=constraints)
---> 52 problem.solve(solver=cvxpy.GLPK_MI)

D:\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in solve(self, *args, **kwargs)
    193             return func(self, *args, **kwargs)
    194         else:
--> 195             return self._solve(*args, **kwargs)
    196 
    197     @classmethod

D:\Anaconda3\lib\site-packages\cvxpy\problems\problem.py in _solve(self, solver, ignore_dcp, warm_start, verbose, parallel, **kwargs)
    319             results_dict = solver.solve(objective, constraints,
    320                                         self._cached_data, warm_start, verbose,
--> 321                                         kwargs)
    322         # Presolve determined problem was unbounded or infeasible.
    323         else:

D:\Anaconda3\lib\site-packages\cvxpy\problems\solvers\glpk_mi_intf.py in solve(self, objective, constraints, cached_data, warm_start, verbose, solver_opts)
     97                                           data[s.B],
     98                                           set(data[s.INT_IDX]),
---> 99                                           set(data[s.BOOL_IDX]))
    100             results_dict = {}
    101             results_dict["status"] = results_tup[0]

TypeError: G must be a 'd' matrix

下面是我使用的代码：

from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import numpy as np
import cvxpy

X, y = make_classification(n_samples=1000, n_features=20, n_classes=8,n_informative=5,
                           class_sep=0.2, random_state=2)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=1)

model = RandomForestClassifier(random_state=1)
model.fit(X_train, y_train)
test_probs = model.predict_proba(X_test)

#clipping so that we don't take log of 0 or 1
test_probs = np.clip(test_probs, 0.0001, 0.9999)

#turning into costs
model_costs = -np.log10(test_probs)

# Our allocation cannot exceed our supply
# 150 flyers, 80 pamphlets, 25 bumper stickers
supply = np.atleast_2d([150, 80, 25])

# Creating our cvxpy variable of assignments
selection = cvxpy.Bool(*test_probs.shape)

# Constant matrix that counts how many of each 
# material we sent to each constituent
TRANSFORMER = np.array([[1,0,0],
                        [0,1,0],
                        [0,0,1],
                        [1,1,0],
                        [1,0,1],
                        [0,1,1],
                        [1,1,1],
                        [0,0,0]])

supply_constraint = cvxpy.sum_entries(selection * TRANSFORMER, axis=0) <= supply

# We must make our choice per constituent
# remember that the last column is for "no materials"
feasibility_constraint = cvxpy.sum_entries(selection, axis=1) == 1
constraints = [supply_constraint, feasibility_constraint]

# Take the negative log of our probabilities to turn them into costs
cost = cvxpy.sum_entries(cvxpy.mul_elemwise(model_costs, selection))

# Solving the problem
problem = cvxpy.Problem(cvxpy.Minimize(cost), constraints=constraints)
problem.solve(solver=cvxpy.GLPK_MI)

如果更多上下文有帮助，我会在 here. I did find 上关注 SO，但无法理解我应该如何处理我的问题的当前状态。

Answer 1

尝试将 TRANSFORMER 转换为浮点数组。

对于它的价值，代码在 CVXPY v 1.0.11 中工作（有一些小的修改，包括在下面）：

from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import numpy as np
import cvxpy

X, y = make_classification(n_samples=1000, n_features=20, n_classes=8,n_informative=5,
                           class_sep=0.2, random_state=2)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=1)

model = RandomForestClassifier(random_state=1)
model.fit(X_train, y_train)
test_probs = model.predict_proba(X_test)

#clipping so that we don't take log of 0 or 1
test_probs = np.clip(test_probs, 0.0001, 0.9999)

#turning into costs
model_costs = -np.log10(test_probs)

# Our allocation cannot exceed our supply
# 150 flyers, 80 pamphlets, 25 bumper stickers
supply = np.array([150, 80, 25])

# Creating our cvxpy variable of assignments
selection = cvxpy.Variable(shape=test_probs.shape, boolean=True)

# Constant matrix that counts how many of each
# material we sent to each constituent
TRANSFORMER = np.array([[1,0,0],
                        [0,1,0],
                        [0,0,1],
                        [1,1,0],
                        [1,0,1],
                        [0,1,1],
                        [1,1,1],
                        [0,0,0]])

supply_constraint = cvxpy.sum(selection * TRANSFORMER, axis=0) <= supply

# We must make our choice per constituent
# remember that the last column is for "no materials"
feasibility_constraint = cvxpy.sum(selection, axis=1) == 1
constraints = [supply_constraint, feasibility_constraint]

# Take the negative log of our probabilities to turn them into costs
cost = cvxpy.sum(cvxpy.multiply(model_costs, selection))

# Solving the problem
problem = cvxpy.Problem(cvxpy.Minimize(cost), constraints=constraints)
problem.solve(solver=cvxpy.GLPK_MI)
print(problem.value)
print(selection.value[0:5])
print(np.dot(selection.value, TRANSFORMER).sum(axis=0))

How to fix the TypeError: G must be a 'd' matrix?

How to fix the TypeError: G must be a 'd' matrix?

optimization

numpy

python-3.x

cvxopt

cvxpy