TypeError: Unsupported type <class 'scipy.sparse.csr.csr_matrix'> for StructuredDataAdapter
TypeError: Unsupported type <class 'scipy.sparse.csr.csr_matrix'> for StructuredDataAdapter
谁能帮我解决上面的错误?
### using trasnformers
from sklearn.compose import ColumnTransformer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
column_trans = ColumnTransformer(
[
('CompanyName_bow', TfidfVectorizer(), 'CompanyName'),
('state_category', OneHotEncoder(), ['state']),
('Termination_Reason_Desc_bow', TfidfVectorizer(), 'Termination_Reason_Desc'),
('TermType_category', OneHotEncoder(), ['TermType'])
],
remainder=MinMaxScaler()
)
X = column_trans.fit_transform(X.head(100))
from sklearn.preprocessing import LabelEncoder
y = LabelEncoder().fit_transform(y.head(100))
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=5)
X_train.shape #(80, 92)
X_test.shape #(20, 92)
y_train.shape #(80,)
X_train.todense()
matrix([[0. , 0. , 0. , ..., 0.26921709, 1. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
1. ],
[0. , 0. , 0. , ..., 0.46148896, 1. ,
0. ],
...,
[0. , 0. , 0. , ..., 0.46148896, 1. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
1. ],
[0. , 0. , 0. , ..., 0.46148896, 1. ,
0. ]])
type(X_train)
--> scipy.sparse.csr.csr_matrix
print(y_train)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
type(y_train)
numpy.ndarray
# use autokeras to find a model for the sonar dataset
from numpy import asarray
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from autokeras import StructuredDataClassifier
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
# define the search
search = StructuredDataClassifier(max_trials=15)
# perform the search
search.fit(x=(X_train), y=y_train, verbose=0)
# evaluate the model
loss, acc = search.evaluate(X_test, y_test, verbose=0)
print('Accuracy: %.3f' % acc)
错误
(80, 92) (20, 92) (80,) (20,)
INFO:tensorflow:Reloading Oracle from existing project .\structured_data_classifier\oracle.json
INFO:tensorflow:Reloading Tuner from .\structured_data_classifier\tuner0.json
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-106-94708e5d279d> in <module>
10 search = StructuredDataClassifier(max_trials=15)
11 # perform the search
---> 12 search.fit(x=(X_train), y=y_train, verbose=0)
13 # evaluate the model
14 loss, acc = search.evaluate(X_test, y_test, verbose=0)
~\anaconda3\lib\site-packages\autokeras\tasks\structured_data.py in fit(self, x, y, epochs, callbacks, validation_split, validation_data, **kwargs)
313 [keras.Model.fit](https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit).
314 """
--> 315 super().fit(
316 x=x,
317 y=y,
~\anaconda3\lib\site-packages\autokeras\tasks\structured_data.py in fit(self, x, y, epochs, callbacks, validation_split, validation_data, **kwargs)
132 self.check_in_fit(x)
133
--> 134 super().fit(
135 x=x,
136 y=y,
~\anaconda3\lib\site-packages\autokeras\auto_model.py in fit(self, x, y, batch_size, epochs, callbacks, validation_split, validation_data, **kwargs)
259 validation_split = 0
260
--> 261 dataset, validation_data = self._convert_to_dataset(
262 x=x, y=y, validation_data=validation_data, batch_size=batch_size
263 )
~\anaconda3\lib\site-packages\autokeras\auto_model.py in _convert_to_dataset(self, x, y, validation_data, batch_size)
373 x = dataset.map(lambda x, y: x)
374 y = dataset.map(lambda x, y: y)
--> 375 x = self._adapt(x, self.inputs, batch_size)
376 y = self._adapt(y, self._heads, batch_size)
377 dataset = tf.data.Dataset.zip((x, y))
~\anaconda3\lib\site-packages\autokeras\auto_model.py in _adapt(self, dataset, hms, batch_size)
287 adapted = []
288 for source, hm in zip(sources, hms):
--> 289 source = hm.get_adapter().adapt(source, batch_size)
290 adapted.append(source)
291 if len(adapted) == 1:
~\anaconda3\lib\site-packages\autokeras\engine\adapter.py in adapt(self, dataset, batch_size)
65 tf.data.Dataset. The converted dataset.
66 """
---> 67 self.check(dataset)
68 dataset = self.convert_to_dataset(dataset, batch_size)
69 return dataset
~\anaconda3\lib\site-packages\autokeras\adapters\input_adapters.py in check(self, x)
63 def check(self, x):
64 if not isinstance(x, (pd.DataFrame, np.ndarray, tf.data.Dataset)):
---> 65 raise TypeError(
66 "Unsupported type {type} for "
67 "{name}.".format(type=type(x), name=self.__class__.__name__)
TypeError: Unsupported type <class 'scipy.sparse.csr.csr_matrix'> for StructuredDataAdapter.
可能的原因(由于上面粘贴的代码的可读性较低)可能是使用不同的数据集和已保存的模型。我建议你在 BayesianOptimization
构造代码块中添加 overwrite=True
。重新安装 TensorFlow 也可能有所帮助。
正如在 AutoKeras Github issue you opened in parallel with this thread, sparse matrices are not (currently) supported in AutoKeras, and the advice is to convert them to dense Numpy arrays. Indeed, from the documentation StructuredDataClassifier
中所注意到的,相应 .fit
方法中的训练数据 x
预计为:
String, numpy.ndarray, pandas.DataFrame or tensorflow.Dataset
而不是 SciPy 稀疏矩阵。
鉴于这里你的 X_train
真的很小:
X_train.shape
# (80, 92)
你完全没有理由使用稀疏矩阵。虽然在这里你似乎试图将 X_train
转换为密集的,但你没有 重新分配 它,结果是它仍然是稀疏的;来自你自己的代码:
X_train.todense()
# ...
type(X_train)
# scipy.sparse.csr.csr_matrix
你需要做的只是将它重新分配给一个密集数组:
from scipy.sparse import csr_matrix
X_train = X_train.toarray()
这是一个简短的演示,它可以处理虚拟数据:
import numpy as np
from scipy.sparse import csr_matrix
X_train = csr_matrix((3, 4), dtype=np.float)
type(X_train)
# scipy.sparse.csr.csr_matrix
# this will not work:
X_train.todense()
type(X_train)
# scipy.sparse.csr.csr_matrix # still sparse
# this will work:
X_train = X_train.toarray()
type(X_train)
# numpy.ndarray
您应该对 X_test
数据执行类似的过程(您的 y_train
和 y_test
似乎已经是密集的 Numpy 数组)。
谁能帮我解决上面的错误?
### using trasnformers
from sklearn.compose import ColumnTransformer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
column_trans = ColumnTransformer(
[
('CompanyName_bow', TfidfVectorizer(), 'CompanyName'),
('state_category', OneHotEncoder(), ['state']),
('Termination_Reason_Desc_bow', TfidfVectorizer(), 'Termination_Reason_Desc'),
('TermType_category', OneHotEncoder(), ['TermType'])
],
remainder=MinMaxScaler()
)
X = column_trans.fit_transform(X.head(100))
from sklearn.preprocessing import LabelEncoder
y = LabelEncoder().fit_transform(y.head(100))
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=5)
X_train.shape #(80, 92)
X_test.shape #(20, 92)
y_train.shape #(80,)
X_train.todense()
matrix([[0. , 0. , 0. , ..., 0.26921709, 1. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
1. ],
[0. , 0. , 0. , ..., 0.46148896, 1. ,
0. ],
...,
[0. , 0. , 0. , ..., 0.46148896, 1. ,
0. ],
[0. , 0. , 0. , ..., 0. , 0. ,
1. ],
[0. , 0. , 0. , ..., 0.46148896, 1. ,
0. ]])
type(X_train)
--> scipy.sparse.csr.csr_matrix
print(y_train)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
type(y_train)
numpy.ndarray
# use autokeras to find a model for the sonar dataset
from numpy import asarray
from pandas import read_csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from autokeras import StructuredDataClassifier
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
# define the search
search = StructuredDataClassifier(max_trials=15)
# perform the search
search.fit(x=(X_train), y=y_train, verbose=0)
# evaluate the model
loss, acc = search.evaluate(X_test, y_test, verbose=0)
print('Accuracy: %.3f' % acc)
错误
(80, 92) (20, 92) (80,) (20,)
INFO:tensorflow:Reloading Oracle from existing project .\structured_data_classifier\oracle.json
INFO:tensorflow:Reloading Tuner from .\structured_data_classifier\tuner0.json
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-106-94708e5d279d> in <module>
10 search = StructuredDataClassifier(max_trials=15)
11 # perform the search
---> 12 search.fit(x=(X_train), y=y_train, verbose=0)
13 # evaluate the model
14 loss, acc = search.evaluate(X_test, y_test, verbose=0)
~\anaconda3\lib\site-packages\autokeras\tasks\structured_data.py in fit(self, x, y, epochs, callbacks, validation_split, validation_data, **kwargs)
313 [keras.Model.fit](https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit).
314 """
--> 315 super().fit(
316 x=x,
317 y=y,
~\anaconda3\lib\site-packages\autokeras\tasks\structured_data.py in fit(self, x, y, epochs, callbacks, validation_split, validation_data, **kwargs)
132 self.check_in_fit(x)
133
--> 134 super().fit(
135 x=x,
136 y=y,
~\anaconda3\lib\site-packages\autokeras\auto_model.py in fit(self, x, y, batch_size, epochs, callbacks, validation_split, validation_data, **kwargs)
259 validation_split = 0
260
--> 261 dataset, validation_data = self._convert_to_dataset(
262 x=x, y=y, validation_data=validation_data, batch_size=batch_size
263 )
~\anaconda3\lib\site-packages\autokeras\auto_model.py in _convert_to_dataset(self, x, y, validation_data, batch_size)
373 x = dataset.map(lambda x, y: x)
374 y = dataset.map(lambda x, y: y)
--> 375 x = self._adapt(x, self.inputs, batch_size)
376 y = self._adapt(y, self._heads, batch_size)
377 dataset = tf.data.Dataset.zip((x, y))
~\anaconda3\lib\site-packages\autokeras\auto_model.py in _adapt(self, dataset, hms, batch_size)
287 adapted = []
288 for source, hm in zip(sources, hms):
--> 289 source = hm.get_adapter().adapt(source, batch_size)
290 adapted.append(source)
291 if len(adapted) == 1:
~\anaconda3\lib\site-packages\autokeras\engine\adapter.py in adapt(self, dataset, batch_size)
65 tf.data.Dataset. The converted dataset.
66 """
---> 67 self.check(dataset)
68 dataset = self.convert_to_dataset(dataset, batch_size)
69 return dataset
~\anaconda3\lib\site-packages\autokeras\adapters\input_adapters.py in check(self, x)
63 def check(self, x):
64 if not isinstance(x, (pd.DataFrame, np.ndarray, tf.data.Dataset)):
---> 65 raise TypeError(
66 "Unsupported type {type} for "
67 "{name}.".format(type=type(x), name=self.__class__.__name__)
TypeError: Unsupported type <class 'scipy.sparse.csr.csr_matrix'> for StructuredDataAdapter.
可能的原因(由于上面粘贴的代码的可读性较低)可能是使用不同的数据集和已保存的模型。我建议你在 BayesianOptimization
构造代码块中添加 overwrite=True
。重新安装 TensorFlow 也可能有所帮助。
正如在 AutoKeras Github issue you opened in parallel with this thread, sparse matrices are not (currently) supported in AutoKeras, and the advice is to convert them to dense Numpy arrays. Indeed, from the documentation StructuredDataClassifier
中所注意到的,相应 .fit
方法中的训练数据 x
预计为:
String, numpy.ndarray, pandas.DataFrame or tensorflow.Dataset
而不是 SciPy 稀疏矩阵。
鉴于这里你的 X_train
真的很小:
X_train.shape
# (80, 92)
你完全没有理由使用稀疏矩阵。虽然在这里你似乎试图将 X_train
转换为密集的,但你没有 重新分配 它,结果是它仍然是稀疏的;来自你自己的代码:
X_train.todense()
# ...
type(X_train)
# scipy.sparse.csr.csr_matrix
你需要做的只是将它重新分配给一个密集数组:
from scipy.sparse import csr_matrix
X_train = X_train.toarray()
这是一个简短的演示,它可以处理虚拟数据:
import numpy as np
from scipy.sparse import csr_matrix
X_train = csr_matrix((3, 4), dtype=np.float)
type(X_train)
# scipy.sparse.csr.csr_matrix
# this will not work:
X_train.todense()
type(X_train)
# scipy.sparse.csr.csr_matrix # still sparse
# this will work:
X_train = X_train.toarray()
type(X_train)
# numpy.ndarray
您应该对 X_test
数据执行类似的过程(您的 y_train
和 y_test
似乎已经是密集的 Numpy 数组)。