不能在 hyperas 中使用中间函数
Can't use intermediate function in hyperas
我正在尝试使用 hyperas 来优化我的 keras 模型,但我不断得到 NameError: processing (function_name) is not defined.
我已经看过来自 hyperas 的 this and this 示例并完全做到了。它似乎对我不起作用。
这是我的代码:
def processing():
df = pd.read_json('balanced_all.json')
def label (df):
if df['rating'] < 3:
return 0
if df['rating'] > 3:
return 1
df['label'] = df.apply (lambda df: label(df), axis=1)
df = df[['review_text', 'label']]
maxlen = 100
max_words = 2000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['review_text'].values)
sequences = tokenizer.texts_to_sequences(df['review_text'].values)
word_index = tokenizer.word_index
sequences = pad_sequences(sequences, maxlen=maxlen)
labels = pd.get_dummies(df['label']).values
glove_dir = '/home/uttam/Documents/Thesis/Glove'
embeddings_index = {}
f = open(os.path.join(glove_dir, 'glove.6B.100d.txt'), 'r', encoding='utf-8')
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
f.close()
embedding_dim = 100
embedding_matrix = np.zeros((max_words, embedding_dim))
for word, i in word_index.items():
if i < max_words:
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
return sequences, labels, embedding_matrix
def data():
sequences = processing()[0]
labels = processing()[1]
x_train, x_test, y_train, y_test = train_test_split(sequences,labels, test_size = 0.33, random_state = 42)
return x_train, y_train, x_test, y_test
def create_model(x_train, y_train, x_test, y_test):
embedding_dim = 100
max_words = 2000
embedding_matrix = processing()[2]
model = Sequential()
model.add(Embedding(max_words, embedding_dim, input_length=100))
model.add(LSTM(128))
model.add(Dropout({{uniform(0, 1)}}))
model.add(Dense(2, activation='sigmoid'))
model.layers[0].set_weights([embedding_matrix])
model.layers[0].trainable = False
model.compile(optimizer={{choice(['rmsprop', 'adam', 'sgd'])}}, loss='binary_crossentropy',metrics=['acc'])
result = model.fit(x_train, y_train, epochs=20, batch_size={{choice([64, 128])}}, validation_split=0.2)
model.save('pre_trained_glove_model.h5')
validation_acc = np.amax(result.history['val_acc'])
print('Best validation acc of epoch:', validation_acc)
return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}
if __name__ == '__main__':
best_run, best_model = optim.minimize(model=create_model,
data=data,
algo=tpe.suggest,
max_evals=5,
trials=Trials())
x_train, y_train, x_test, y_test = data()
print("Evalutation of best performing model:")
print(best_model.evaluate(x_test, y_test))
print("Best performing model chosen hyper-parameters:")
print(best_run)
我什至不需要中间函数,我不得不创建它,因为 hyperas 没有找到全局变量。例如如果我在 hyperas 函数之外有一个变量 x
说 create_model()
,它会说 NameError: x is not defined
我需要这个,因为正如你所看到的,我正在使用预训练的手套嵌入。我无法将所有内容都放在 data()
或 create_model()
中。例如data()
需要变量 sequences
和 label
并且 create_model
需要变量 embedding_matrix
,所以没有办法(据我所知)将所有内容拆分两个函数。
唯一对我有用的方法是将所有内容都放在 data()
和 create_model()
函数中,这绝对是低效的,也不是可行的方法。
有点晚了,但为了将来参考,你是对的 hyperas
不识别全局变量。您可以在 minimize
:
中的函数列表中传递该函数
best_run, best_model = optim.minimize(model=create_model,
data=data,
functions=[processing], # <<
algo=tpe.suggest,
max_evals=5,
trials=Trials())
正如你提到的,如果你需要在hyperas
中传递一个全局变量。您可以选择以下选项之一:
使用data()
:
def data():
## ... my code ...
return x_train, y_train, x_test, y_test, foo
def create_model(x_train, y_train, x_test, y_test, foo):
或定义一个新函数并将其传递到函数列表中:
def my_funct():
return foo
def data():
return x_train, y_train, x_test, y_test
def create_model(x_train, y_train, x_test, y_test):
foo = my_funct()
best_run, best_model = optim.minimize(model=create_model,
data=data,
functions=[my_funct], # << foo
algo=tpe.suggest,
max_evals=5,
trials=Trials())
我正在尝试使用 hyperas 来优化我的 keras 模型,但我不断得到 NameError: processing (function_name) is not defined.
我已经看过来自 hyperas 的 this and this 示例并完全做到了。它似乎对我不起作用。
这是我的代码:
def processing():
df = pd.read_json('balanced_all.json')
def label (df):
if df['rating'] < 3:
return 0
if df['rating'] > 3:
return 1
df['label'] = df.apply (lambda df: label(df), axis=1)
df = df[['review_text', 'label']]
maxlen = 100
max_words = 2000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['review_text'].values)
sequences = tokenizer.texts_to_sequences(df['review_text'].values)
word_index = tokenizer.word_index
sequences = pad_sequences(sequences, maxlen=maxlen)
labels = pd.get_dummies(df['label']).values
glove_dir = '/home/uttam/Documents/Thesis/Glove'
embeddings_index = {}
f = open(os.path.join(glove_dir, 'glove.6B.100d.txt'), 'r', encoding='utf-8')
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
f.close()
embedding_dim = 100
embedding_matrix = np.zeros((max_words, embedding_dim))
for word, i in word_index.items():
if i < max_words:
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
return sequences, labels, embedding_matrix
def data():
sequences = processing()[0]
labels = processing()[1]
x_train, x_test, y_train, y_test = train_test_split(sequences,labels, test_size = 0.33, random_state = 42)
return x_train, y_train, x_test, y_test
def create_model(x_train, y_train, x_test, y_test):
embedding_dim = 100
max_words = 2000
embedding_matrix = processing()[2]
model = Sequential()
model.add(Embedding(max_words, embedding_dim, input_length=100))
model.add(LSTM(128))
model.add(Dropout({{uniform(0, 1)}}))
model.add(Dense(2, activation='sigmoid'))
model.layers[0].set_weights([embedding_matrix])
model.layers[0].trainable = False
model.compile(optimizer={{choice(['rmsprop', 'adam', 'sgd'])}}, loss='binary_crossentropy',metrics=['acc'])
result = model.fit(x_train, y_train, epochs=20, batch_size={{choice([64, 128])}}, validation_split=0.2)
model.save('pre_trained_glove_model.h5')
validation_acc = np.amax(result.history['val_acc'])
print('Best validation acc of epoch:', validation_acc)
return {'loss': -validation_acc, 'status': STATUS_OK, 'model': model}
if __name__ == '__main__':
best_run, best_model = optim.minimize(model=create_model,
data=data,
algo=tpe.suggest,
max_evals=5,
trials=Trials())
x_train, y_train, x_test, y_test = data()
print("Evalutation of best performing model:")
print(best_model.evaluate(x_test, y_test))
print("Best performing model chosen hyper-parameters:")
print(best_run)
我什至不需要中间函数,我不得不创建它,因为 hyperas 没有找到全局变量。例如如果我在 hyperas 函数之外有一个变量 x
说 create_model()
,它会说 NameError: x is not defined
我需要这个,因为正如你所看到的,我正在使用预训练的手套嵌入。我无法将所有内容都放在 data()
或 create_model()
中。例如data()
需要变量 sequences
和 label
并且 create_model
需要变量 embedding_matrix
,所以没有办法(据我所知)将所有内容拆分两个函数。
唯一对我有用的方法是将所有内容都放在 data()
和 create_model()
函数中,这绝对是低效的,也不是可行的方法。
有点晚了,但为了将来参考,你是对的 hyperas
不识别全局变量。您可以在 minimize
:
best_run, best_model = optim.minimize(model=create_model,
data=data,
functions=[processing], # <<
algo=tpe.suggest,
max_evals=5,
trials=Trials())
正如你提到的,如果你需要在hyperas
中传递一个全局变量。您可以选择以下选项之一:
使用data()
:
def data():
## ... my code ...
return x_train, y_train, x_test, y_test, foo
def create_model(x_train, y_train, x_test, y_test, foo):
或定义一个新函数并将其传递到函数列表中:
def my_funct():
return foo
def data():
return x_train, y_train, x_test, y_test
def create_model(x_train, y_train, x_test, y_test):
foo = my_funct()
best_run, best_model = optim.minimize(model=create_model,
data=data,
functions=[my_funct], # << foo
algo=tpe.suggest,
max_evals=5,
trials=Trials())