在 Flask App 中从 Jupyter Notebook 中解开实例
Unpickle instance from Jupyter Notebook in Flask App
我为 word2vec 矢量化创建了一个 class,它运行良好。
但是当我创建一个模型 pickle 文件并在 Flask 应用程序中使用该 pickle 文件时,我收到如下错误:
AttributeError: module '__main__'
has no attribute 'GensimWord2VecVectorizer'
我正在 Google Colab 上创建模型。
Jupyter Notebook 中的代码:
# Word2Vec Model
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from gensim.models import Word2Vec
class GensimWord2VecVectorizer(BaseEstimator, TransformerMixin):
def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None,
sample=0.001, seed=1, workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5,
ns_exponent=0.75, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=10000, compute_loss=False,
callbacks=(), max_final_vocab=None):
self.size = size
self.alpha = alpha
self.window = window
self.min_count = min_count
self.max_vocab_size = max_vocab_size
self.sample = sample
self.seed = seed
self.workers = workers
self.min_alpha = min_alpha
self.sg = sg
self.hs = hs
self.negative = negative
self.ns_exponent = ns_exponent
self.cbow_mean = cbow_mean
self.hashfxn = hashfxn
self.iter = iter
self.null_word = null_word
self.trim_rule = trim_rule
self.sorted_vocab = sorted_vocab
self.batch_words = batch_words
self.compute_loss = compute_loss
self.callbacks = callbacks
self.max_final_vocab = max_final_vocab
def fit(self, X, y=None):
self.model_ = Word2Vec(
sentences=X, corpus_file=None,
size=self.size, alpha=self.alpha, window=self.window, min_count=self.min_count,
max_vocab_size=self.max_vocab_size, sample=self.sample, seed=self.seed,
workers=self.workers, min_alpha=self.min_alpha, sg=self.sg, hs=self.hs,
negative=self.negative, ns_exponent=self.ns_exponent, cbow_mean=self.cbow_mean,
hashfxn=self.hashfxn, iter=self.iter, null_word=self.null_word,
trim_rule=self.trim_rule, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words,
compute_loss=self.compute_loss, callbacks=self.callbacks,
max_final_vocab=self.max_final_vocab)
return self
def transform(self, X):
X_embeddings = np.array([self._get_embedding(words) for words in X])
return X_embeddings
def _get_embedding(self, words):
valid_words = [word for word in words if word in self.model_.wv.vocab]
if valid_words:
embedding = np.zeros((len(valid_words), self.size), dtype=np.float32)
for idx, word in enumerate(valid_words):
embedding[idx] = self.model_.wv[word]
return np.mean(embedding, axis=0)
else:
return np.zeros(self.size)
# column transformer
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([
('step1', GensimWord2VecVectorizer(), 'STATUS')
], remainder='drop')
# Create Model
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import pickle
import numpy as np
import dill
import torch
# ##########
# SVC - support vector classifier
# ##########
# defining parameter range
hyperparameters = {'C': [0.1, 1],
'gamma': [1, 0.1],
'kernel': ['rbf'],
'probability': [True]}
model_sv = Pipeline([
('column_transformers', ct),
('model', GridSearchCV(SVC(), hyperparameters,
refit=True, verbose=3)),
])
model_sv_cEXT = model_sv.fit(X_train, y_train['cEXT'])
# Save the trained cEXT - SVM Model.
import joblib
joblib.dump(model_sv_cEXT, 'model_Word2Vec_sv_cEXT.pkl')
Flask 应用程序中的代码:
# Word2Vec
model_EXT_WV_SV = joblib.load('utility/model/MachineLearning/SVM/model_Word2Vec_sv_cEXT.pkl')
我试图将相同的 class 复制到我的 Flask 文件中,但它也不起作用。
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from gensim.models import Word2Vec
class GensimWord2VecVectorizer(BaseEstimator, TransformerMixin):
def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None,
sample=0.001, seed=1, workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5,
ns_exponent=0.75, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=10000, compute_loss=False,
callbacks=(), max_final_vocab=None):
self.size = size
self.alpha = alpha
self.window = window
self.min_count = min_count
self.max_vocab_size = max_vocab_size
self.sample = sample
self.seed = seed
self.workers = workers
self.min_alpha = min_alpha
self.sg = sg
self.hs = hs
self.negative = negative
self.ns_exponent = ns_exponent
self.cbow_mean = cbow_mean
self.hashfxn = hashfxn
self.iter = iter
self.null_word = null_word
self.trim_rule = trim_rule
self.sorted_vocab = sorted_vocab
self.batch_words = batch_words
self.compute_loss = compute_loss
self.callbacks = callbacks
self.max_final_vocab = max_final_vocab
def fit(self, X, y=None):
self.model_ = Word2Vec(
sentences=X, corpus_file=None,
size=self.size, alpha=self.alpha, window=self.window, min_count=self.min_count,
max_vocab_size=self.max_vocab_size, sample=self.sample, seed=self.seed,
workers=self.workers, min_alpha=self.min_alpha, sg=self.sg, hs=self.hs,
negative=self.negative, ns_exponent=self.ns_exponent, cbow_mean=self.cbow_mean,
hashfxn=self.hashfxn, iter=self.iter, null_word=self.null_word,
trim_rule=self.trim_rule, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words,
compute_loss=self.compute_loss, callbacks=self.callbacks,
max_final_vocab=self.max_final_vocab)
return self
def transform(self, X):
X_embeddings = np.array([self._get_embedding(words) for words in X])
return X_embeddings
def _get_embedding(self, words):
valid_words = [word for word in words if word in self.model_.wv.vocab]
if valid_words:
embedding = np.zeros((len(valid_words), self.size), dtype=np.float32)
for idx, word in enumerate(valid_words):
embedding[idx] = self.model_.wv[word]
return np.mean(embedding, axis=0)
else:
return np.zeros(self.size)
# Word2Vec
model_EXT_WV_SV = joblib.load('utility/model/MachineLearning/SVM/model_Word2Vec_sv_cEXT.pkl')
GitHub代码:https://github.com/Juned-Ansari/test
泡菜文件:https://github.com/Juned-Ansari/test/blob/main/model_Word2Vec_sv_cEXT.pkl
Flask 网络应用程序:https://github.com/Juned-Ansari/test/tree/main/WebApp
在您的 Flask Web 应用 python 文件中导入 GensimWord2VecVectorizer
。
来自 https://docs.python.org/3/library/pickle.html:
pickle
can save and restore class instances transparently, however the class definition must be importable and live in the same module as when the object was stored.
The following types can be pickled:
- ...
- classes that are defined at the top level of a module
- instances of such classes ...
考虑您的目录结构:
├── WebApp/
│ └── app.py
└── Untitled.ipynb
假设您 flask run
来自 WebApp/
,那么 app
是一个 top-level 模块。
首先,将 class GensimWord2VecVectorizer
移动到 WebApp/app.py
的顶层。
接下来,在您的 Jupyter Notebook 中,导入 GensimWord2VecVectorizer
并欺骗 pickle
认为它来自 top-level app
模块:
from WebApp.app import GensimWord2VecVectorizer
GensimWord2VecVectorizer.__module__ = 'app'
import sys
sys.modules['app'] = sys.modules['WebApp.app']
那么你应该可以 dump
和 load
pickle 文件。
Google Colab
如果导入本地模块很麻烦,请改为:
GensimWord2VecVectorizer.__module__ = 'app'
import sys
app = sys.modules['app'] = type(sys)('app')
app.GensimWord2VecVectorizer = GensimWord2VecVectorizer
那么你应该可以 dump
和 load
pickle 文件。
我为 word2vec 矢量化创建了一个 class,它运行良好。 但是当我创建一个模型 pickle 文件并在 Flask 应用程序中使用该 pickle 文件时,我收到如下错误:
AttributeError: module
'__main__'
has no attribute 'GensimWord2VecVectorizer'
我正在 Google Colab 上创建模型。
Jupyter Notebook 中的代码:
# Word2Vec Model
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from gensim.models import Word2Vec
class GensimWord2VecVectorizer(BaseEstimator, TransformerMixin):
def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None,
sample=0.001, seed=1, workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5,
ns_exponent=0.75, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=10000, compute_loss=False,
callbacks=(), max_final_vocab=None):
self.size = size
self.alpha = alpha
self.window = window
self.min_count = min_count
self.max_vocab_size = max_vocab_size
self.sample = sample
self.seed = seed
self.workers = workers
self.min_alpha = min_alpha
self.sg = sg
self.hs = hs
self.negative = negative
self.ns_exponent = ns_exponent
self.cbow_mean = cbow_mean
self.hashfxn = hashfxn
self.iter = iter
self.null_word = null_word
self.trim_rule = trim_rule
self.sorted_vocab = sorted_vocab
self.batch_words = batch_words
self.compute_loss = compute_loss
self.callbacks = callbacks
self.max_final_vocab = max_final_vocab
def fit(self, X, y=None):
self.model_ = Word2Vec(
sentences=X, corpus_file=None,
size=self.size, alpha=self.alpha, window=self.window, min_count=self.min_count,
max_vocab_size=self.max_vocab_size, sample=self.sample, seed=self.seed,
workers=self.workers, min_alpha=self.min_alpha, sg=self.sg, hs=self.hs,
negative=self.negative, ns_exponent=self.ns_exponent, cbow_mean=self.cbow_mean,
hashfxn=self.hashfxn, iter=self.iter, null_word=self.null_word,
trim_rule=self.trim_rule, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words,
compute_loss=self.compute_loss, callbacks=self.callbacks,
max_final_vocab=self.max_final_vocab)
return self
def transform(self, X):
X_embeddings = np.array([self._get_embedding(words) for words in X])
return X_embeddings
def _get_embedding(self, words):
valid_words = [word for word in words if word in self.model_.wv.vocab]
if valid_words:
embedding = np.zeros((len(valid_words), self.size), dtype=np.float32)
for idx, word in enumerate(valid_words):
embedding[idx] = self.model_.wv[word]
return np.mean(embedding, axis=0)
else:
return np.zeros(self.size)
# column transformer
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([
('step1', GensimWord2VecVectorizer(), 'STATUS')
], remainder='drop')
# Create Model
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import pickle
import numpy as np
import dill
import torch
# ##########
# SVC - support vector classifier
# ##########
# defining parameter range
hyperparameters = {'C': [0.1, 1],
'gamma': [1, 0.1],
'kernel': ['rbf'],
'probability': [True]}
model_sv = Pipeline([
('column_transformers', ct),
('model', GridSearchCV(SVC(), hyperparameters,
refit=True, verbose=3)),
])
model_sv_cEXT = model_sv.fit(X_train, y_train['cEXT'])
# Save the trained cEXT - SVM Model.
import joblib
joblib.dump(model_sv_cEXT, 'model_Word2Vec_sv_cEXT.pkl')
Flask 应用程序中的代码:
# Word2Vec
model_EXT_WV_SV = joblib.load('utility/model/MachineLearning/SVM/model_Word2Vec_sv_cEXT.pkl')
我试图将相同的 class 复制到我的 Flask 文件中,但它也不起作用。
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from gensim.models import Word2Vec
class GensimWord2VecVectorizer(BaseEstimator, TransformerMixin):
def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None,
sample=0.001, seed=1, workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5,
ns_exponent=0.75, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
trim_rule=None, sorted_vocab=1, batch_words=10000, compute_loss=False,
callbacks=(), max_final_vocab=None):
self.size = size
self.alpha = alpha
self.window = window
self.min_count = min_count
self.max_vocab_size = max_vocab_size
self.sample = sample
self.seed = seed
self.workers = workers
self.min_alpha = min_alpha
self.sg = sg
self.hs = hs
self.negative = negative
self.ns_exponent = ns_exponent
self.cbow_mean = cbow_mean
self.hashfxn = hashfxn
self.iter = iter
self.null_word = null_word
self.trim_rule = trim_rule
self.sorted_vocab = sorted_vocab
self.batch_words = batch_words
self.compute_loss = compute_loss
self.callbacks = callbacks
self.max_final_vocab = max_final_vocab
def fit(self, X, y=None):
self.model_ = Word2Vec(
sentences=X, corpus_file=None,
size=self.size, alpha=self.alpha, window=self.window, min_count=self.min_count,
max_vocab_size=self.max_vocab_size, sample=self.sample, seed=self.seed,
workers=self.workers, min_alpha=self.min_alpha, sg=self.sg, hs=self.hs,
negative=self.negative, ns_exponent=self.ns_exponent, cbow_mean=self.cbow_mean,
hashfxn=self.hashfxn, iter=self.iter, null_word=self.null_word,
trim_rule=self.trim_rule, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words,
compute_loss=self.compute_loss, callbacks=self.callbacks,
max_final_vocab=self.max_final_vocab)
return self
def transform(self, X):
X_embeddings = np.array([self._get_embedding(words) for words in X])
return X_embeddings
def _get_embedding(self, words):
valid_words = [word for word in words if word in self.model_.wv.vocab]
if valid_words:
embedding = np.zeros((len(valid_words), self.size), dtype=np.float32)
for idx, word in enumerate(valid_words):
embedding[idx] = self.model_.wv[word]
return np.mean(embedding, axis=0)
else:
return np.zeros(self.size)
# Word2Vec
model_EXT_WV_SV = joblib.load('utility/model/MachineLearning/SVM/model_Word2Vec_sv_cEXT.pkl')
GitHub代码:https://github.com/Juned-Ansari/test
泡菜文件:https://github.com/Juned-Ansari/test/blob/main/model_Word2Vec_sv_cEXT.pkl
Flask 网络应用程序:https://github.com/Juned-Ansari/test/tree/main/WebApp
在您的 Flask Web 应用 python 文件中导入 GensimWord2VecVectorizer
。
来自 https://docs.python.org/3/library/pickle.html:
pickle
can save and restore class instances transparently, however the class definition must be importable and live in the same module as when the object was stored.
The following types can be pickled:
- ...
- classes that are defined at the top level of a module
- instances of such classes ...
考虑您的目录结构:
├── WebApp/
│ └── app.py
└── Untitled.ipynb
假设您 flask run
来自 WebApp/
,那么 app
是一个 top-level 模块。
首先,将 class GensimWord2VecVectorizer
移动到 WebApp/app.py
的顶层。
接下来,在您的 Jupyter Notebook 中,导入 GensimWord2VecVectorizer
并欺骗 pickle
认为它来自 top-level app
模块:
from WebApp.app import GensimWord2VecVectorizer
GensimWord2VecVectorizer.__module__ = 'app'
import sys
sys.modules['app'] = sys.modules['WebApp.app']
那么你应该可以 dump
和 load
pickle 文件。
Google Colab
如果导入本地模块很麻烦,请改为:
GensimWord2VecVectorizer.__module__ = 'app'
import sys
app = sys.modules['app'] = type(sys)('app')
app.GensimWord2VecVectorizer = GensimWord2VecVectorizer
那么你应该可以 dump
和 load
pickle 文件。