estimator.train throws ValueError: model_fn should return an EstimatorSpec

Question

这是我正在使用的代码...

我在第 304 行对我来说安装了一个断点...

estimator.train(input_fn=train_input_fn,max_steps=num_train_steps)

有人看过吗？我确定我安装了正确版本的 TensorFlow 和 BERT。

完整的堆栈跟踪如下....

    Exception has occurred: ValueError
    model_fn should return an EstimatorSpec.
    File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1153, in _call_model_fn
 raise ValueError('model_fn should return an EstimatorSpec.')
    File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1191, in _train_model_default
features, labels, ModeKeys.TRAIN, self.config)
    File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 1161, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
    File "C:\Program Files\Python36\Lib\site-packages\tensorflow_estimator\python\estimator\estimator.py", line 370, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
    File "C:\Users\brownru\eclipse-workspace\tiaaNLPPython\org\tiaa\ai\penelope\bertNLP\sentiment\sentiment.py", line 304, in <module>
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
    File "C:\Program Files\Python36\Lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
    File "C:\Program Files\Python36\Lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
    File "C:\Program Files\Python36\Lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
    ValueError: model_fn should return an EstimatorSpec.

此代码是我尝试从此处运行一些 Google colab 代码 -

https://colab.research.google.com/github/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb#scrollTo=t6Nukby2EB6-

# Copyright 2019 Google Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# install --proxy http://proxy.ops.tiaa-cref.org:8080 tensorFlow

import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_estimator as tfe
from datetime import datetime

import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization


# Set the output directory for saving model file
# Optionally, set a GCP bucket location

OUTPUT_DIR = r'C:\Users\brownru\Documents\npsExplanationComplains\sentimentOutput' 
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = True #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}

if USE_BUCKET:
    OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
#from google.colab import auto
#auth.authenticate_user()

if DO_DELETE:
    try:
        tf.gfile.DeleteRecursively(OUTPUT_DIR)
    except:
            # Doesn't matter if the directory didn't exist
            pass
    tf.gfile.MakeDirs(OUTPUT_DIR)
    print('***** Model output directory: {} *****'.format(OUTPUT_DIR))

'''
First, let's download the dataset, hosted by Stanford. The code below, which downloads, extracts, and imports the IMDB Large Movie Review Dataset, is borrowed from [this Tensorflow tutorial](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub).
'''
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
def load_directory_data(directory):
    data = {}
    data["sentence"] = []
    data["sentiment"] = []
    for file_path in os.listdir(directory):
        with tf.gfile.GFile(os.path.join(directory, file_path), "r") as f:
            data["sentence"].append(f.read())
            data["sentiment"].append(re.match("\d+_(\d+)\.txt", file_path).group(1))
    return pd.DataFrame.from_dict(data)

# Merge positive and negative examples, add a polarity column and shuffle.
def load_dataset(directory):
    pos_df = load_directory_data(os.path.join(directory, "pos"))
    neg_df = load_directory_data(os.path.join(directory, "neg"))
    pos_df["polarity"] = 1
    neg_df["polarity"] = 0
    return pd.concat([pos_df, neg_df]).sample(frac=1).reset_index(drop=True)

# Download and process the dataset files.
def download_and_load_datasets():
    #dataset = tf.keras.utils.get_file(fname="aclImdb.tar.gz", origin="http://chapdc3sas51.ops.tiaa-cref.org/nlpAssets/aclImdb_v1.tar.gz", extract=True)
    trainPath = r'C:\Users\brownru\.keras\datasets\aclImdb\train'
    testPath = r'C:\Users\brownru\.keras\datasets\aclImdb\test'
    train_df = load_dataset(trainPath)
    test_df = load_dataset(testPath)

    return train_df, test_df

train, test = download_and_load_datasets()

#To keep training fast, we'll take a sample of 5000 train and test examples, respectively.

train = train.sample(5000)
test = test.sample(5000)

train.columns

#Index(['sentence', 'sentiment', 'polarity'], dtype='object')

#For us, our input data is the 'sentence' column and our label is the 'polarity' column (0, 1 for negative and positive, respectively)

DATA_COLUMN = 'sentence'
LABEL_COLUMN = 'polarity'
# label_list is the list of labels, i.e. True, False or 0, 1 or 'dog', 'cat'
label_list = [0, 1]


#Data Preprocessing We'll need to transform our data into a format BERT understands. This involves two steps. First, we create InputExample's using the constructor provided in the BERT library.
#text_a is the text we want to classify, which in this case, is the Request field in our Dataframe. 
#text_b is used if we're training a model to understand the relationship between sentences (i.e. is text_b a translation of text_a? Is text_b an answer to the question asked by text_a?). This doesn't apply to our task, so we can leave text_b blank.
#label is the label for our example, i.e. True, False

# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                text_a = x[DATA_COLUMN], 
                                                                text_b = None, 
                                                                label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                text_a = x[DATA_COLUMN], 
                                                                text_b = None, 
                                                                label = x[LABEL_COLUMN]), axis = 1)



# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "http://chapdc3sas51.ops.tiaa-cref.org/nlpAssets/1.tar.gz"

def create_tokenizer_from_hub_module():
    with tf.Graph().as_default():
        bert_module = hub.Module(BERT_MODEL_HUB)
        tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
        with tf.Session() as sess:
                vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],tokenization_info["do_lower_case"]])      
    return bert.tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

tokenizer.tokenize("This here's an example of using the BERT tokenizer")

# We'll set sequences to be at most 128 tokens long TEST.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

#Creating a model

def create_model(is_predicting, input_ids, input_mask, segment_ids, labels, num_labels):
#Creates a classification model.
    bert_module = hub.Module(BERT_MODEL_HUB,trainable=True)
    bert_inputs = dict(input_ids=input_ids,input_mask=input_mask,segment_ids=segment_ids)
    bert_outputs = bert_module(inputs=bert_inputs,signature="tokens",as_dict=True)

# Use "pooled_output" for classification tasks on an entire sentence.
# Use "sequence_outputs" for token-level output.
    output_layer = bert_outputs["pooled_output"]

    hidden_size = output_layer.shape[-1].value

# Create our own layer to tune for politeness data.  
    output_weights = tf.get_variable("output_weights", [num_labels, hidden_size],initializer=tf.truncated_normal_initializer(stddev=0.02))
    output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        # Dropout helps prevent overfitting
        output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        # Convert labels into one-hot encoding
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
        predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
        # If we're predicting, we want predicted labels and the probabilities.
        if is_predicting:
            return (predicted_labels, log_probs)

        # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


'''Next we'll wrap our model function in a model_fn_builder function that adapts our model to work for training, evaluation, and prediction.'''

# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
#Returns `model_fn` closure for TPUEstimator."""
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    #"""The `model_fn` for TPUEstimator."""

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_predicting = (mode == tfe.estimator.ModeKeys.PREDICT)

    # TRAIN and EVAL
        if not is_predicting:

            (loss, predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
            train_op = bert.optimization.create_optimizer(loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

# Calculate evaluation metrics. 
            def metric_fn(label_ids, predicted_labels):
                        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
                        f1_score = tf.contrib.metrics.f1_score(
                            label_ids,
                            predicted_labels)
                        auc = tf.metrics.auc(
                            label_ids,
                            predicted_labels)
                        recall = tf.metrics.recall(
                            label_ids,
                            predicted_labels)
                        precision = tf.metrics.precision(
                            label_ids,
                            predicted_labels) 
                        true_pos = tf.metrics.true_positives(
                            label_ids,
                            predicted_labels)
                        true_neg = tf.metrics.true_negatives(
                            label_ids,
                            predicted_labels)   
                        false_pos = tf.metrics.false_positives(
                            label_ids,
                            predicted_labels)  
                        false_neg = tf.metrics.false_negatives(
                            label_ids,
                            predicted_labels)
                        return {
                            "eval_accuracy": accuracy,
                            "f1_score": f1_score,
                            "auc": auc,
                            "precision": precision,
                            "recall": recall,
                            "true_positives": true_pos,
                            "true_negatives": true_neg,
                            "false_positives": false_pos,
                            "false_negatives": false_neg
                        }

                        eval_metrics = metric_fn(label_ids, predicted_labels)

                        if mode == tfe.estimator.ModeKeys.TRAIN:
                            return tfe.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
                        else:
                            return tfe.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics)
        else:
            (predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)
            predictions = {'probabilities': log_probs, 'labels': predicted_labels}
            return tfe.estimator.EstimatorSpec(mode, predictions=predictions)

# Return the actual model function in the closure
    return model_fn

# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)
# Specify outpit directory and number of checkpoint steps to save
run_config = tfe.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tfe.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE}
  )

# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

#Now we train our model! For me, using a Colab notebook running on Google's GPUs, my training time was about 14 minutes.
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

#Now let's use our test data to see how well our model did:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

estimator.evaluate(input_fn=test_input_fn, steps=None)

def getPrediction(in_sentences):
    labels = ["Negative", "Positive"]
    input_examples = [run_classifier.InputExample(guid="", text_a = x, text_b = None, label = 0) for x in in_sentences] # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]




pred_sentences = [
  "That movie was absolutely awful",
  "The acting was a bit lacking",
  "The film was creative and surprising",
  "Absolutely fantastic!"
]

predictions = getPrediction(pred_sentences)

predictions

Answer 1

可怕的是，这个问题的答案全在于缩进。在上面发布的 Google Colab 示例中有一个名为 def model_fn 的函数。这似乎是另一个函数的包装函数，该函数实际创建一个模型以传递给 TensorFlow Estimator。当我在 VS 代码中调试它时，我在函数中放置了一个断点来尝试理清发生了什么，并且它一直跳过检查“false pos，false_neg 等的中间位” .

显然我在 VS Code 中编辑时以某种方式破坏了缩进，并且函数是嵌套的，以至于 pylint 没有发现任何语法问题 - 它只是跳过了函数。

修复只是从 colab notebook 中重新复制整个 def model_fn 函数，voila 它起作用了。

estimator.train throws ValueError: model_fn should return an EstimatorSpec

estimator.train throws ValueError: model_fn should return an EstimatorSpec

tensorflow

tensorflow-estimator

bert-language-model