使用深度神经网络分类器与 Tensorflow 创建逻辑与

Create a Logical AND with Tensorflow using a Deep Neural Network classifier

我正在尝试制作一个基本的 Tensorflow 模型来预测逻辑与的输出:

+---------+---------+--------+
| Input A | Input B | Output |
+---------+---------+--------+
|    0    |    0    |   0    |
|    0    |    1    |   0    |
|    1    |    0    |   0    |
|    1    |    1    |   1    |
+---------+---------+--------+

我知道其他估算器更适合这项特定任务,但我坚持使用 DNNClassifier,因为我将在未来的项目中需要它。这是我用于训练和预测输出的代码:

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
import tensorflow as tf
from tensorflow.python.data import Dataset
import seaborn as sns
import glob
import os

def input(features, targets, batch_size=1, shuffle=True, num_epochs=None):
    # Convert pandas data into a dict of np arrays.
    features = {key: np.array(value) for key, value in dict(features).items()}

    # Construct a dataset, and configure batching/repeating.
    ds = Dataset.from_tensor_slices((features, targets))  # warning: 2GB limit
    ds = ds.batch(batch_size).repeat(num_epochs)

    # Shuffle the data, if specified.
    if shuffle:
        ds = ds.shuffle(10000)

    # Return the next batch of data.
    features, labels = ds.make_one_shot_iterator().get_next()
    return features, labels


def train_model(learning_rate, steps, batch_size, training_examples, training_targets, validation_examples, validation_targets):
    periods = 10

    steps_per_period = steps / periods
    training_input_fn = lambda: input(training_examples, training_targets, batch_size=batch_size)
    predict_training_input_fn = lambda: input(training_examples, training_targets, num_epochs=1, shuffle=False)
    predict_validation_input_fn = lambda: input(validation_examples, validation_targets, num_epochs=1, shuffle=False)

    tensorflow_features = set([tf.feature_column.numeric_column(my_feature) for my_feature in training_examples])
    optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate)
    optimizer = tf.contrib.estimator.clip_gradients_by_norm(optimizer, 5.0)
    classifier = tf.estimator.DNNClassifier(
        feature_columns=tensorflow_features,
        n_classes=2,
        hidden_units=[1],
        optimizer=optimizer,
        activation_fn=tf.nn.sigmoid,
        config=tf.contrib.learn.RunConfig(keep_checkpoint_max=1)
    )

    # Train the model, but do so inside a loop so that we can periodically assess
    # loss metrics.
    print("Training model...")
    print("LogLoss error (on validation data):")
    training_errors = []
    validation_errors = []
    # The model will be trained in 10 periods:
    for period in range(0, periods):
        # Train the model, starting from the prior state.
        classifier.train(
            input_fn=training_input_fn,
            steps=steps_per_period
        )

        # Take a break and compute probabilities.
        training_predictions = list(classifier.predict(input_fn=predict_training_input_fn))
        training_probabilities = np.array([item['probabilities'] for item in training_predictions])
        training_pred_class_id = np.array([item['class_ids'][0] for item in training_predictions])
        training_pred_one_hot = tf.keras.utils.to_categorical(training_pred_class_id, num_classes=2)

        validation_predictions = list(classifier.predict(input_fn=predict_validation_input_fn))
        validation_probabilities = np.array([item['probabilities'] for item in validation_predictions])
        validation_pred_class_id = np.array([item['class_ids'][0] for item in validation_predictions])
        validation_pred_one_hot = tf.keras.utils.to_categorical(validation_pred_class_id, num_classes=2)

        # Compute training and validation errors.
        training_log_loss = metrics.log_loss(training_targets, training_pred_one_hot)
        validation_log_loss = metrics.log_loss(validation_targets, validation_pred_one_hot)
        # Occasionally print the current loss.
        print("  period %02d : %0.2f" % (period, validation_log_loss))
        # Add the loss metrics from this period to our list.
        training_errors.append(training_log_loss)
        validation_errors.append(validation_log_loss)
    print("Model training finished.")
    # Remove event files to save disk space.
    _ = map(os.remove, glob.glob(os.path.join(classifier.model_dir, 'events.out.tfevents*')))

    # Calculate final predictions (not probabilities, as above).
    final_predictions = classifier.predict(input_fn=predict_validation_input_fn)
    final_predictions = np.array([item['class_ids'][0] for item in final_predictions])

    accuracy = metrics.accuracy_score(validation_targets, final_predictions)
    print("Final accuracy (on validation data): %0.2f" % accuracy)
    # Output a graph of loss metrics over periods.
    plt.ylabel("LogLoss")
    plt.xlabel("Periods")
    plt.title("LogLoss vs. Periods")
    plt.plot(training_errors, label="training")
    plt.plot(validation_errors, label="validation")
    plt.legend()
    plt.show()

    # Plot the confusion matrix.
    cm = metrics.confusion_matrix(validation_targets, final_predictions)
    # Normalize the confusion matrix by row (i.e by the number of samples
    # in each class).
    cm_normalized = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]
    ax = sns.heatmap(cm_normalized, cmap="bone_r")
    ax.set_aspect(1)
    plt.title("Confusion matrix")
    plt.ylabel("True label")
    plt.xlabel("Predicted label")
    plt.show()

为了训练和预测输出,我只是 运行 以下内容:

training_examples = pd.DataFrame([[0,0],[0,1],[1,0],[1,1]], columns=['input_a', 'input_b'])
training_targets = pd.DataFrame([[0],[0],[0],[1]], columns=['output'])
validation_examples = pd.DataFrame([[1,0],[0,1],[1,1],[0,0],[0,1],[0,0],[1,1]], columns=['input_a', 'input_b'])
validation_targets = pd.DataFrame([[0],[0],[1],[0],[0],[0],[1]], columns=['output'])

train_model(
  learning_rate=0.001,
  steps=10,
  batch_size=1,
  training_examples=training_examples,
  training_targets=training_targets,
  validation_examples=validation_examples,
  validation_targets=validation_targets,
)

在多次调整超参数和 运行 代码后,我总是得到非常糟糕的准确性和明显错误的混淆矩阵:

有 Tensorflow 专家可以解释一下吗?

尝试 sigmoid 激活。

经典的单神经元示例 AND 有一个带有 sigmoid 激活函数的解,但没有一个带有 ReLU 的单神经元解,没有适合该函数的权重。

如果你想使用 ReLU,那么你将需要一个额外的层和更多的神经元来获得这个函数的行为。

我终于成功了!

我删除了 hidden_units:

optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate)
optimizer = tf.contrib.estimator.clip_gradients_by_norm(optimizer, 5.0)
classifier = tf.estimator.DNNClassifier(
    feature_columns=tensorflow_features,
    n_classes=2,
    hidden_units=[],
    optimizer=optimizer,
    activation_fn=tf.nn.sigmoid,
    config=tf.contrib.learn.RunConfig(keep_checkpoint_max=1)
)

并更新超参数如下:

classifier = train_model(
  learning_rate=1,
  steps=100,
  batch_size=1,
  training_examples=training_examples,
  training_targets=training_targets,
  validation_examples=validation_examples,
  validation_targets=validation_targets,
)

最终输出:

Training model...
LogLoss error (on validation data):
  period 00 : 9.87
  period 01 : 0.00
  period 02 : 0.00
  period 03 : 0.00
  period 04 : 0.00
  period 05 : 0.00
  period 06 : 0.00
  period 07 : 0.00
  period 08 : 0.00
  period 09 : 0.00
Model training finished.
Final accuracy (on validation data): 100%