神经网络不良收敛

Neural Network bad convergeance

过去两周我读了很多关于 NN 的文章,我想我几乎在网上看到了所有 "XOR" 方法教程。但是,我无法让工作成为我自己的。我从一个简单的 "OR" 神经元方法开始。取得好成绩。我认为我的问题在于反向传播实现。我做了一个对象方法,所以这里是主线。

三个 类 :

神经元

public class Neuron {

/*
 * Attributes
 */

double[] inputs;
double[] weights;

double output;
double error;

double delta;
double deltaWeight;

/*
 * Constructors
 */

public Neuron(int nInputs) 
{
    inputs = new double[nInputs + 1];
    inputs[inputs.length - 1] = 1; // bias
    weights = new double[nInputs + 1];
}

/*
 * Methods
 */

/**
 * Reset all weights of the neuron to random values between -1 and 1
 */
public void reset()
{       
    Random random = new Random();
    for (int i = 0; i < weights.length; i++)
        weights[i] = (random.nextDouble() * ((0.5d - (-0.5d))) + (-0.5d));
}

/**
 * Compute output for given inputs
 * @param inputs
 */
public void computeOutput(double inputs[])
{
    setInputs(inputs);
    output = Sigmoid.activation(getDotProduct());
}

/**
 * Compute error for given ideal
 * @param ideal
 */
public void computeError(double ideal)
{
    error = ideal - output;
    delta = error;
}

/**
 * Compute error for hidden neurons
 */
public void computeError(FeedForwardLayer previousLayer, int position)
{
    double sum = 0;
    for (int i = 0; i < previousLayer.neurons.length; i++)
        sum += (previousLayer.neurons[i].delta * previousLayer.neurons[i].weights[position]);

    delta = Sigmoid.derivative(getDotProduct()) * sum;
    error = delta;
}

/**
 * Adjust every weight of the neuron
 */
public void adjustWeights(double lambda, double momentum)
{
    for (int i = 0; i < weights.length; i++) 
    {
        double lastDeltaWeight = deltaWeight;
        deltaWeight = lambda * (delta * inputs[i]) + momentum * lastDeltaWeight;
        weights[i] += deltaWeight;
    }
}

@Override
public String toString() 
{
    String str = "";
    for (int i = 0; i < weights.length; i++)
        str = str.concat(String.format("IN|W --> %.6f | %.6f \n", (float) inputs[i], (float) weights[i]));

    str = str.concat("Output = " + output + "\n");
    str = str.concat("Error = " + error + "\n");
    return str;
}

/*
 * Getters & Setters
 */

/**
 * @return weights * inputs + bias
 */
public double getDotProduct()
{
    double sum = 0;
    for (int i = 0; i < inputs.length; i++)
        sum += (weights[i] * inputs[i]);

    return sum;
}

/**
 * Set inputs (keep bias input)
 * @param inputs
 */
public void setInputs(double[] inputs)
{
    for (int i = 0; i < inputs.length; i++)
        this.inputs[i] = inputs[i];
}

/**
 * Set every weight to a single value
 * @param weight
 */
public void setWeights(double weight)
{
    for (int i = 0; i < weights.length; i++)
        this.weights[i] = weight;
}
}

FeedForwardLayer(包含神经元)

public class FeedForwardLayer {

/*
 * Attributes
 */

Neuron[] neurons;
LayerTypes type;

/*
 * Constructors
 */

/**
 * First layer constructor
 * @param nNeurons
 */
public FeedForwardLayer(int nInputs, int nNeurons, LayerTypes type) 
{
    neurons = new Neuron[nNeurons];
    for (int i = 0; i < neurons.length; i++)
        neurons[i] = new Neuron(nInputs);

    this.type = type;
}

/*
 * Methods
 */

/**
 * Reset all weights of the layer's neurons to random values between -1 and 1
 */
public void reset()
{
    for (Neuron neuron : neurons) 
        neuron.reset();
}

/**
 * Compute output, if layer isn't input one, you can pass null into parameter
 * @param inputs
 */
public void computeOutputs(double[] inputs)
{
    for (int i = 0; i < neurons.length; i++) 
        neurons[i].computeOutput(inputs);
}

/**
 * Compute error, if layer is output one
 * @param ideals
 */
public void computeErrors(double[] ideals)
{
    for (int i = 0; i < neurons.length; i++)
        neurons[i].computeError(ideals[i]);
}

/**
 * Compute error, if layer isn't output one
 * @param layer n+1
 */
public void computeErrors(FeedForwardLayer next)
{
    for (int i = 0; i < neurons.length; i++)
        neurons[i].computeError(next, i);
}

/**
 * Adjust weights for every neurons
 */
public void adjustWeights(double lambda, double momentum)
{
    for (Neuron neuron : neurons) 
        neuron.adjustWeights(lambda, momentum);
}

@Override
public String toString()
{
    String str = "";
    for (int i = 0; i < neurons.length; i++)
        str = str.concat("Neuron " + i + "\n" + neurons[i]);
    return str;
}

/*
 * Getters - Setters
 */

/**
 * @return true if layer is input, false otherwise
 */
public boolean isInput()
{
    if (type == LayerTypes.INPUT)
        return true;

    return false;
}

/**
 * @return true if layer is input, false otherwise
 */
public boolean isOutput()
{
    if (type == LayerTypes.OUTPUT)
        return true;

    return false;
}

/**
 * @return an array of layer's outputs
 */
public double[] getOutputs()
{
    double[] outputs = new double[neurons.length];

    for (int i = 0; i < neurons.length; i++) 
        outputs[i] = neurons[i].output;

    return outputs;
}

/**
 * @return array of layer's errors
 */
public double[] getErrors()
{
    double[] errors = new double[neurons.length];

    for (int i = 0; i < neurons.length; i++)
        errors[i] = neurons[i].error;

    return errors;
}

/**
 * Set all the weights of the layer to given weight
 * @param weight
 */
public void setWeights(double weight)
{
    for (int i = 0; i < neurons.length; i++)
        neurons[i].setWeights(weight);
}
}

FeedForwardNetwork(包含 FeedForwardLayers)

public class FeedForwardNetwork {

static final double lambda = 0.1;
static final double momentum = 0;

/*
 * Attributes
 */

private ArrayList<FeedForwardLayer> layers;

/*
 * Constructors
 */

public FeedForwardNetwork() 
{
    layers = new ArrayList<FeedForwardLayer>();
}

/*
 * Methods
 */

/**
 * Init all the weights to random values
 */
public void reset()
{       
    for (int i = 0; i < layers.size(); i++)
        layers.get(i).reset();;
}

/**
 * Compute output for all the neurons of all the layers for given inputs
 * @param inputs
 */
public void feedForward(double[] inputs)
{
    //System.err.println("FeedForwardNetwork.feedForward(" + inputs[0] + ", " + inputs[1] +")");
    for (int i = 0; i < layers.size(); i++) 
    {
        //System.err.println("\n*** COMPUTING OUTPUT FOR LAYER " + i + "***\n");
        if (layers.get(i).isInput())
            layers.get(i).computeOutputs(inputs);
        else
            layers.get(i).computeOutputs(layers.get(i - 1).getOutputs());
    }
}

/**
 * Compute errors for all the neurons of all the layers starting by output layer
 * @param ideals
 */
public void feedBackward(double[] ideals)
{
    //System.err.println("FeedForwardNetwork.feedBackward(" + ideals[0] + ")");
    // For each layers starting by output one
    for (int i = layers.size() - 1; i > 0; i--) 
    {
        //System.err.println("*** COMPUTING ERROR FOR LAYER " + i + "***");
        if (layers.get(i).isOutput())
            layers.get(i).computeErrors(ideals);
        else
            layers.get(i).computeErrors(layers.get(i + 1));
    }
}

/**
 * Adjust weights of every layer
 */
public void adjustWeights()
{
    for (FeedForwardLayer feedForwardLayer : layers) 
        feedForwardLayer.adjustWeights(lambda, momentum);
}

/**
 * Train the nn with given inputs and outputs
 * @param inputs
 * @param outputs
 */
public void train(double[] inputs, double... outputs)
{
    feedForward(inputs);
    feedBackward(outputs);
    adjustWeights();
}

/**
 * Add a layer to the network
 * @param layer
 */
public void addLayer(FeedForwardLayer layer)
{
    layers.add(layer);
}

@Override
public String toString() 
{
    String str = "";
    for (int i = 0; i < layers.size(); i++)
        str = str.concat("Layer " + LayerTypes.values()[i] + "\n" + layers.get(i));

    str = str.concat("\n");
    str = str.concat("OUTPUT = " + getOutputs()[0] + "\n");
    str = str.concat("ERROR = "  + getError(false) + "\n");
    return str;
}
/*
 * Getters & Setters
 */

public FeedForwardLayer getInputLayer()
{
    return layers.get(0);
}

public FeedForwardLayer getOutputLayer()
{
    return layers.get(layers.size() - 1);
}

public FeedForwardLayer getLayer(int index)
{
    return layers.get(index);
}

public double getError(boolean abs)
{
    if (abs)
        return Math.abs(getOutputLayer().neurons[0].error);

    return getOutputLayer().neurons[0].error;
}

public double[] getOutputs()
{
    return getOutputLayer().getOutputs();
}
}

所以我通过给它 xor 的纪元来训练网络 table 异或table

X | Y | S
0   0   0
0   1   1
0   1   1
0   0   0

网络将在数千个 epoch 后输出大约 0.5... 有趣的是,如果我用 AND table、OR table 或 NAND table 替换训练集,nn 将输出 1 在训练集的 S 列..(它将输出 0.25 AND 和 NAND table 和 0.75 OR table)

我只是想知道我的实现是否足够好以使其工作,ty!

所以,经过一些研究,我意识到我的实现很好,只是我不明白输入层是如何工作的。就是这样,输入层的工作方式类似于 In = Out