estimator.train throws ValueError: model_fn should return an EstimatorSpec
estimator.train throws ValueError: model_fn should return an EstimatorSpec
这是我正在使用的代码...
我在第 304 行对我来说安装了一个断点...
estimator.train(input_fn=train_input_fn,max_steps=num_train_steps)
有人看过吗?我确定我安装了正确版本的 TensorFlow 和 BERT。
完整的堆栈跟踪如下....
Exception has occurred: ValueError
model_fn should return an EstimatorSpec.
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1153, in _call_model_fn
raise ValueError('model_fn should return an EstimatorSpec.')
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1191, in _train_model_default
features, labels, ModeKeys.TRAIN, self.config)
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1161, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 370, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Users\brownru\eclipse-workspace\tiaaNLPPython\org\tiaa\ai\penelope\bertNLP\sentiment\sentiment.py", line 304, in <module>
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
File "C:\Program Files\Python36\Lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Program Files\Python36\Lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "C:\Program Files\Python36\Lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
ValueError: model_fn should return an EstimatorSpec.
此代码是我尝试从此处 运行 一些 Google colab 代码 -
# Copyright 2019 Google Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# install --proxy http://proxy.ops.tiaa-cref.org:8080 tensorFlow
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_estimator as tfe
from datetime import datetime
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
# Set the output directory for saving model file
# Optionally, set a GCP bucket location
OUTPUT_DIR = r'C:\Users\brownru\Documents\npsExplanationComplains\sentimentOutput'
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = True #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}
if USE_BUCKET:
OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
#from google.colab import auto
#auth.authenticate_user()
if DO_DELETE:
try:
tf.gfile.DeleteRecursively(OUTPUT_DIR)
except:
# Doesn't matter if the directory didn't exist
pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))
'''
First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).
'''
from tensorflow import keras
import os
import re
# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
data = {}
data["sentence"] = []
data["sentiment"] = []
for file_path in os.listdir(directory):
with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
data["sentence"].append(f.read())
data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
return pd.DataFrame.from_dict(data)
# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
pos_df = load_directory_data(os.path.join(directory, "pos"))
neg_df = load_directory_data(os.path.join(directory, "neg"))
pos_df["polarity"] = 1
neg_df["polarity"] = 0
return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)
# Download and process the dataset files.
def download_and_load_datasets():
#dataset = tf.keras.utils.get_file(fname="aclImdb.tar.gz", origin="http://chapdc3sas51.ops.tiaa-cref.org/nlpAssets/aclImdb_v1.tar.gz", extract=True)
trainPath = r'C:\Users\brownru\.keras\datasets\aclImdb\train'
testPath = r'C:\Users\brownru\.keras\datasets\aclImdb\test'
train_df = load_dataset(trainPath)
test_df = load_dataset(testPath)
return train_df, test_df
train, test = download_and_load_datasets()
#To keep training fast, we'll take a sample of 5000 train and test examples, respectively.
train = train.sample(5000)
test = test.sample(5000)
train.columns
#Index(['sentence', 'sentiment', 'polarity'], dtype='object')
#For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respectively)
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]
#Data Preprocessing We'll need to transform our data into a format BERT understands. This involves two steps. First, we create InputExample's using the constructor provided in the BERT library.
#text_a is the text we want to classify, which in this case, is the Request field in our Dataframe.
#text_b is used if we're training a model to understand the relationship between sentences (i.e. is text_b a translation of text_a? Is text_b an answer to the question asked by text_a?). This doesn't apply to our task, so we can leave text_b blank.
#label is the label for our example, i.e. True, False
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None,
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "http://chapdc3sas51.ops.tiaa-cref.org/nlpAssets/1.tar.gz"
def create_tokenizer_from_hub_module():
with tf.Graph().as_default():
bert_module = hub.Module(BERT_MODEL_HUB)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
with tf.Session() as sess:
vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],tokenization_info["do_lower_case"]])
return bert.tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)
tokenizer = create_tokenizer_from_hub_module()
tokenizer.tokenize("This here's an example of using the BERT tokenizer")
# We'll set sequences to be at most 128 tokens long TEST.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
#Creating a model
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels, num_labels):
#Creates a classification model.
bert_module = hub.Module(BERT_MODEL_HUB,trainable=True)
bert_inputs = dict(input_ids=input_ids,input_mask=input_mask,segment_ids=segment_ids)
bert_outputs = bert_module(inputs=bert_inputs,signature="tokens",as_dict=True)
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_outputs" for token-level output.
output_layer = bert_outputs["pooled_output"]
hidden_size = output_layer.shape[-1].value
# Create our own layer to tune for politeness data.
output_weights = tf.get_variable("output_weights", [num_labels, hidden_size],initializer=tf.truncated_normal_initializer(stddev=0.02))
output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer())
with tf.variable_scope("loss"):
# Dropout helps prevent overfitting
output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
logits = tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits, output_bias)
log_probs = tf.nn.log_softmax(logits, axis=-1)
# Convert labels into one-hot encoding
one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
# If we're predicting, we want predicted labels and the probabilities.
if is_predicting:
return (predicted_labels, log_probs)
# If we're train/eval, compute loss between predicted and actual label
per_example_loss = tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
loss = tf.reduce_mean(per_example_loss)
return (loss, predicted_labels, log_probs)
'''Next we'll wrap our model function in a model_fn_builder function that adapts our model to work for training, evaluation, and prediction.'''
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
num_warmup_steps):
#Returns `model_fn` closure for TPUEstimator."""
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
#"""The `model_fn` for TPUEstimator."""
input_ids = features["input_ids"]
input_mask = features["input_mask"]
segment_ids = features["segment_ids"]
label_ids = features["label_ids"]
is_predicting = (mode == tfe.estimator.ModeKeys.PREDICT)
# TRAIN and EVAL
if not is_predicting:
(loss, predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
train_op = bert.optimization.create_optimizer(loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)
# Calculate evaluation metrics.
def metric_fn(label_ids, predicted_labels):
accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
f1_score = tf.contrib.metrics.f1_score(
label_ids,
predicted_labels)
auc = tf.metrics.auc(
label_ids,
predicted_labels)
recall = tf.metrics.recall(
label_ids,
predicted_labels)
precision = tf.metrics.precision(
label_ids,
predicted_labels)
true_pos = tf.metrics.true_positives(
label_ids,
predicted_labels)
true_neg = tf.metrics.true_negatives(
label_ids,
predicted_labels)
false_pos = tf.metrics.false_positives(
label_ids,
predicted_labels)
false_neg = tf.metrics.false_negatives(
label_ids,
predicted_labels)
return {
"eval_accuracy": accuracy,
"f1_score": f1_score,
"auc": auc,
"precision": precision,
"recall": recall,
"true_positives": true_pos,
"true_negatives": true_neg,
"false_positives": false_pos,
"false_negatives": false_neg
}
eval_metrics = metric_fn(label_ids, predicted_labels)
if mode == tfe.estimator.ModeKeys.TRAIN:
return tfe.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
else:
return tfe.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics)
else:
(predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
predictions = {'probabilities': log_probs, 'labels': predicted_labels}
return tfe.estimator.EstimatorSpec(mode, predictions=predictions)
# Return the actual model function in the closure
return model_fn
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)
# Specify outpit directory and number of checkpoint steps to save
run_config = tfe.estimator.RunConfig(
model_dir=OUTPUT_DIR,
save_summary_steps=SAVE_SUMMARY_STEPS,
save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)
model_fn = model_fn_builder(
num_labels=len(label_list),
learning_rate=LEARNING_RATE,
num_train_steps=num_train_steps,
num_warmup_steps=num_warmup_steps)
estimator = tfe.estimator.Estimator(
model_fn=model_fn,
config=run_config,
params={"batch_size": BATCH_SIZE}
)
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
features=train_features,
seq_length=MAX_SEQ_LENGTH,
is_training=True,
drop_remainder=False)
#Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)
#Now let's use our test data to see how well our model did:
test_input_fn = run_classifier.input_fn_builder(
features=test_features,
seq_length=MAX_SEQ_LENGTH,
is_training=False,
drop_remainder=False)
estimator.evaluate(input_fn=test_input_fn, steps=None)
def getPrediction(in_sentences):
labels = ["Negative", "Positive"]
input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
predictions = estimator.predict(predict_input_fn)
return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]
pred_sentences = [
"That movie was absolutely awful",
"The acting was a bit lacking",
"The film was creative and surprising",
"Absolutely fantastic!"
]
predictions = getPrediction(pred_sentences)
predictions
可怕的是,这个问题的答案全在于缩进。在上面发布的 Google Colab 示例中有一个名为 def model_fn 的函数。这似乎是另一个函数的包装函数,该函数实际创建一个模型以传递给 TensorFlow Estimator。当我在 VS 代码中调试它时,我在函数中放置了一个断点来尝试理清发生了什么,并且它一直跳过检查“false pos,false_neg 等的中间位” .
显然我在 VS Code 中编辑时以某种方式破坏了缩进,并且函数是嵌套的,以至于 pylint 没有发现任何语法问题 - 它只是跳过了函数。
修复只是从 colab notebook 中重新复制整个 def model_fn 函数,voila 它起作用了。
这是我正在使用的代码...
我在第 304 行对我来说安装了一个断点...
estimator.train(input_fn=train_input_fn,max_steps=num_train_steps)
有人看过吗?我确定我安装了正确版本的 TensorFlow 和 BERT。
完整的堆栈跟踪如下....
Exception has occurred: ValueError
model_fn should return an EstimatorSpec.
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1153, in _call_model_fn
raise ValueError('model_fn should return an EstimatorSpec.')
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1191, in _train_model_default
features, labels, ModeKeys.TRAIN, self.config)
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1161, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 370, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "C:\Users\brownru\eclipse-workspace\tiaaNLPPython\org\tiaa\ai\penelope\bertNLP\sentiment\sentiment.py", line 304, in <module>
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
File "C:\Program Files\Python36\Lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Program Files\Python36\Lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "C:\Program Files\Python36\Lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
ValueError: model_fn should return an EstimatorSpec.
此代码是我尝试从此处 运行 一些 Google colab 代码 -
# Copyright 2019 Google Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# install --proxy http://proxy.ops.tiaa-cref.org:8080 tensorFlow
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_estimator as tfe
from datetime import datetime
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
# Set the output directory for saving model file
# Optionally, set a GCP bucket location
OUTPUT_DIR = r'C:\Users\brownru\Documents\npsExplanationComplains\sentimentOutput'
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = True #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}
if USE_BUCKET:
OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
#from google.colab import auto
#auth.authenticate_user()
if DO_DELETE:
try:
tf.gfile.DeleteRecursively(OUTPUT_DIR)
except:
# Doesn't matter if the directory didn't exist
pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))
'''
First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).
'''
from tensorflow import keras
import os
import re
# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
data = {}
data["sentence"] = []
data["sentiment"] = []
for file_path in os.listdir(directory):
with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
data["sentence"].append(f.read())
data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
return pd.DataFrame.from_dict(data)
# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
pos_df = load_directory_data(os.path.join(directory, "pos"))
neg_df = load_directory_data(os.path.join(directory, "neg"))
pos_df["polarity"] = 1
neg_df["polarity"] = 0
return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)
# Download and process the dataset files.
def download_and_load_datasets():
#dataset = tf.keras.utils.get_file(fname="aclImdb.tar.gz", origin="http://chapdc3sas51.ops.tiaa-cref.org/nlpAssets/aclImdb_v1.tar.gz", extract=True)
trainPath = r'C:\Users\brownru\.keras\datasets\aclImdb\train'
testPath = r'C:\Users\brownru\.keras\datasets\aclImdb\test'
train_df = load_dataset(trainPath)
test_df = load_dataset(testPath)
return train_df, test_df
train, test = download_and_load_datasets()
#To keep training fast, we'll take a sample of 5000 train and test examples, respectively.
train = train.sample(5000)
test = test.sample(5000)
train.columns
#Index(['sentence', 'sentiment', 'polarity'], dtype='object')
#For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respectively)
DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]
#Data Preprocessing We'll need to transform our data into a format BERT understands. This involves two steps. First, we create InputExample's using the constructor provided in the BERT library.
#text_a is the text we want to classify, which in this case, is the Request field in our Dataframe.
#text_b is used if we're training a model to understand the relationship between sentences (i.e. is text_b a translation of text_a? Is text_b an answer to the question asked by text_a?). This doesn't apply to our task, so we can leave text_b blank.
#label is the label for our example, i.e. True, False
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None,
text_a = x[DATA_COLUMN],
text_b = None,
label = x[LABEL_COLUMN]), axis = 1)
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "http://chapdc3sas51.ops.tiaa-cref.org/nlpAssets/1.tar.gz"
def create_tokenizer_from_hub_module():
with tf.Graph().as_default():
bert_module = hub.Module(BERT_MODEL_HUB)
tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
with tf.Session() as sess:
vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],tokenization_info["do_lower_case"]])
return bert.tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)
tokenizer = create_tokenizer_from_hub_module()
tokenizer.tokenize("This here's an example of using the BERT tokenizer")
# We'll set sequences to be at most 128 tokens long TEST.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
#Creating a model
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels, num_labels):
#Creates a classification model.
bert_module = hub.Module(BERT_MODEL_HUB,trainable=True)
bert_inputs = dict(input_ids=input_ids,input_mask=input_mask,segment_ids=segment_ids)
bert_outputs = bert_module(inputs=bert_inputs,signature="tokens",as_dict=True)
# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_outputs" for token-level output.
output_layer = bert_outputs["pooled_output"]
hidden_size = output_layer.shape[-1].value
# Create our own layer to tune for politeness data.
output_weights = tf.get_variable("output_weights", [num_labels, hidden_size],initializer=tf.truncated_normal_initializer(stddev=0.02))
output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer())
with tf.variable_scope("loss"):
# Dropout helps prevent overfitting
output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)
logits = tf.matmul(output_layer, output_weights, transpose_b=True)
logits = tf.nn.bias_add(logits, output_bias)
log_probs = tf.nn.log_softmax(logits, axis=-1)
# Convert labels into one-hot encoding
one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
# If we're predicting, we want predicted labels and the probabilities.
if is_predicting:
return (predicted_labels, log_probs)
# If we're train/eval, compute loss between predicted and actual label
per_example_loss = tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
loss = tf.reduce_mean(per_example_loss)
return (loss, predicted_labels, log_probs)
'''Next we'll wrap our model function in a model_fn_builder function that adapts our model to work for training, evaluation, and prediction.'''
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
num_warmup_steps):
#Returns `model_fn` closure for TPUEstimator."""
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument
#"""The `model_fn` for TPUEstimator."""
input_ids = features["input_ids"]
input_mask = features["input_mask"]
segment_ids = features["segment_ids"]
label_ids = features["label_ids"]
is_predicting = (mode == tfe.estimator.ModeKeys.PREDICT)
# TRAIN and EVAL
if not is_predicting:
(loss, predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
train_op = bert.optimization.create_optimizer(loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)
# Calculate evaluation metrics.
def metric_fn(label_ids, predicted_labels):
accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
f1_score = tf.contrib.metrics.f1_score(
label_ids,
predicted_labels)
auc = tf.metrics.auc(
label_ids,
predicted_labels)
recall = tf.metrics.recall(
label_ids,
predicted_labels)
precision = tf.metrics.precision(
label_ids,
predicted_labels)
true_pos = tf.metrics.true_positives(
label_ids,
predicted_labels)
true_neg = tf.metrics.true_negatives(
label_ids,
predicted_labels)
false_pos = tf.metrics.false_positives(
label_ids,
predicted_labels)
false_neg = tf.metrics.false_negatives(
label_ids,
predicted_labels)
return {
"eval_accuracy": accuracy,
"f1_score": f1_score,
"auc": auc,
"precision": precision,
"recall": recall,
"true_positives": true_pos,
"true_negatives": true_neg,
"false_positives": false_pos,
"false_negatives": false_neg
}
eval_metrics = metric_fn(label_ids, predicted_labels)
if mode == tfe.estimator.ModeKeys.TRAIN:
return tfe.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
else:
return tfe.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics)
else:
(predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
predictions = {'probabilities': log_probs, 'labels': predicted_labels}
return tfe.estimator.EstimatorSpec(mode, predictions=predictions)
# Return the actual model function in the closure
return model_fn
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)
# Specify outpit directory and number of checkpoint steps to save
run_config = tfe.estimator.RunConfig(
model_dir=OUTPUT_DIR,
save_summary_steps=SAVE_SUMMARY_STEPS,
save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)
model_fn = model_fn_builder(
num_labels=len(label_list),
learning_rate=LEARNING_RATE,
num_train_steps=num_train_steps,
num_warmup_steps=num_warmup_steps)
estimator = tfe.estimator.Estimator(
model_fn=model_fn,
config=run_config,
params={"batch_size": BATCH_SIZE}
)
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
features=train_features,
seq_length=MAX_SEQ_LENGTH,
is_training=True,
drop_remainder=False)
#Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)
#Now let's use our test data to see how well our model did:
test_input_fn = run_classifier.input_fn_builder(
features=test_features,
seq_length=MAX_SEQ_LENGTH,
is_training=False,
drop_remainder=False)
estimator.evaluate(input_fn=test_input_fn, steps=None)
def getPrediction(in_sentences):
labels = ["Negative", "Positive"]
input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
predictions = estimator.predict(predict_input_fn)
return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]
pred_sentences = [
"That movie was absolutely awful",
"The acting was a bit lacking",
"The film was creative and surprising",
"Absolutely fantastic!"
]
predictions = getPrediction(pred_sentences)
predictions
可怕的是,这个问题的答案全在于缩进。在上面发布的 Google Colab 示例中有一个名为 def model_fn 的函数。这似乎是另一个函数的包装函数,该函数实际创建一个模型以传递给 TensorFlow Estimator。当我在 VS 代码中调试它时,我在函数中放置了一个断点来尝试理清发生了什么,并且它一直跳过检查“false pos,false_neg 等的中间位” .
显然我在 VS Code 中编辑时以某种方式破坏了缩进,并且函数是嵌套的,以至于 pylint 没有发现任何语法问题 - 它只是跳过了函数。
修复只是从 colab notebook 中重新复制整个 def model_fn 函数,voila 它起作用了。