为什么我的基因 AI 无法解决 XOR 问题?

Why is my genetic AI unable to solve the XOR problem?

我是 AI 的新手,但我已经用 Python 编写了遗传 AI,因此想挑战自己用 C 从头开始​​编写 AI。为了利用面向对象的优势,我目前仍在用 C++ 编写它,但如果它真的有效,那么我想重写结构中的所有 类,但这就是为什么很多是用 C 而不是 C++ 风格编写的(输出,随机数.. .).我想从 XOR 问题开始,但我很难得到正确的结果,但我不知道为什么。所有 4 个选项的输出都相同。

例如

0 0 -> 0.34

0 1 -> 0.34

1 0 -> 0.34

1 1 -> 0.34

谁能找到解决办法?

我也很感激 C/C++ 中有关遗传神经网络的资源,如果有人能找到的话。

#include <vector>
#include <cstdlib>
#include <ctime>
#include <cstdio>
#include <cmath>

using std::vector;

double random_double(double min, double max) {
    double f = (double) rand() / RAND_MAX;
    return min + f * (max - min);
}

int random_int(int min, int max) {
    return (int) random_double((float) min, (float) max + 1);
}

double sig(double x) {
    return 1 / (1 + exp(-x));
}

class Settings {
public:
    int n_pop = 100;
    int n_iter = 500;
    int n_selection = 5;
    double r_cross = 0.9;
    double r_mut = 0.2;

    int inputs;
    int outputs;
    int hidden_layers;
    int *n_hidden;

    Settings(int inputs, int outputs, int hidden_layers, int *n_hidden) : inputs(inputs), outputs(outputs),
                                                                          hidden_layers(hidden_layers),
                                                                          n_hidden(n_hidden) {}
};

class Network {
    vector<double> _nodes;

    int inputs, hidden_layers, outputs, n_nodes;
    int *hidden_nodes;
    vector<int> layers;

public:
    double fitness = 0;
    vector<double> edges;
    vector<double> biases;

    Network(int inputs, int hidden_layers, int *hidden_nodes, int outputs) : inputs(inputs),
                                                                             hidden_layers(hidden_layers),
                                                                             hidden_nodes(hidden_nodes),
                                                                             outputs(outputs) {
        n_nodes = inputs + outputs;
        for (int i = 0; i < hidden_layers; i++) {
            n_nodes += hidden_nodes[i];
        }
    }

    void print() {
        printf("Nodes:\n");
        for (int i = 0; i < n_nodes; i++) {
            printf("%.2lf ", _nodes[i]);
        }
        printf("\n\nEdges:\n");
        for (double edge : edges) {
            printf("%.2lf ", edge);
        }
        printf("\n\nBiases:\n");
        for (double bias : biases) {
            printf("%.2lf ", bias);
        }
        printf("\n");
    }

    void output() {
        double in[2] = {0, 0};
        printf("\n0 : 0 -> %.2lf\n", forward(in)[0]);
        in[1] = 1;
        printf("0 : 1 -> %.2lf\n", forward(in)[0]);
        in[0] = 1;
        in[1] = 0;
        printf("1 : 0 -> %.2lf\n", forward(in)[0]);
        in[1] = 1;
        printf("1 : 1 -> %.2lf\n", forward(in)[0]);
    }

    void generate() {
        layers.push_back(0);
        for (int i = 0; i < inputs; i++) {
            layers[0]++;
            _nodes.emplace_back(0);
        }

        for (int l = 0; l < hidden_layers; l++) {
            layers.push_back(0);
            for (int i = 0; i < hidden_nodes[l]; i++) {
                layers[l + 1]++;
                _nodes.emplace_back(0);
                biases.push_back(0.1);
            }
        }

        layers.push_back(0);
        for (int i = 0; i < outputs; i++) {
            _nodes.emplace_back(0);
            biases.push_back(0.1);
            layers[hidden_layers + 1]++;
        }

        for (int layer = 0; layer < layers.size() - 1; layer++) {
            for (int l = 0; l < layers[layer]; l++) {
                for (int r = 0; r < layers[layer + 1]; r++) {
                    edges.push_back(random_double(-1, 1));
                }
            }
        }
    }

    vector<double> forward(const double *in) {
        for (int i = 0; i < inputs; i++)
            _nodes[i] = in[i];

        double x;
        int edge = 0, node = 0;

        for (int l = 0; l < layers.size() - 1; l++) {
            for (int i = 0; i < layers[l + 1]; i++) {
                x = 0;
                for (int j = 0; j < layers[l]; j++) {
                    x += edges[edge] * _nodes[node];
                    edge++;
                }
                _nodes[node + inputs] = sig(x + biases[node]);
                node++;
            }
        }

        vector<double> ret = {};
        for (int i = n_nodes - outputs - 1; i < n_nodes; i++)
            ret.push_back(_nodes[i]);
        return ret;
    }

    void change_weight() {
        int type = random_int(0, 1);
        if (type == 0) {
            for (int e = 0; e < edges.size(); e++) {
                if (random_int(0, 1) == 1)
                    edges[e] += random_double(-1, 1);
            }
        } else {
            for (int e = 0; e < edges.size(); e++) {
                if (random_int(0, 1) == 1)
                    edges[e] = random_double(-1, 1);
            }
        }
    }

    void change_bias() {
        int type = random_int(0, 1);
        if (type == 0) {
            for (int b = 0; b < biases.size(); b++) {
                if (random_int(0, 1) == 1)
                    biases[b] += random_double(-1, 1);
            }
        } else {
            for (int b = 0; b < biases.size(); b++) {
                if (random_int(0, 1) == 1)
                    biases[b] = random_double(-1, 1);
            }
        }
    }
};

Network selection(vector<Network> pop, Settings *settings) {
    int x = rand() % (settings->n_pop - settings->n_selection);
    int selected = x;
    for (int i = x + 1; i < x + settings->n_selection; i++) {
        if (pop[i].fitness < pop[selected].fitness)
            selected = i;
    }
    return pop[selected];
}

vector<Network> crossover(Network *p1, Network *p2, Settings *settings) {
    Network c1(*p1);
    Network c2(*p2);

    double x = random_double(0, 1);
    if (x <= settings->r_cross) {
        int edge_size = (int) p1->edges.size();
        int s = random_int(0, edge_size - 1);
        if (random_int(1, 2) == 1) {
            for (int i = 0; i < s; i++) {
                c1.edges[i] = p1->edges[i];
                c2.edges[i] = p2->edges[i];
            }
            for (int i = s; i < edge_size; i++) {
                c1.edges[i] = p2->edges[i];
                c2.edges[i] = p1->edges[i];
            }
        } else {
            for (int i = 0; i < s; i++) {
                c1.edges[i] = p2->edges[i];
                c2.edges[i] = p1->edges[i];
            }
            for (int i = s; i < edge_size; i++) {
                c1.edges[i] = p1->edges[i];
                c2.edges[i] = p2->edges[i];
            }
        }
    }
    vector<Network> ret;
    ret.emplace_back(c1);
    ret.emplace_back(c2);
    return ret;
}

void mutate(Network *n, Settings *settings) {
    double x = random_double(0, 1);
    if (x <= settings->r_mut) {
        switch (random_int(1, 2)) {
            case 1:
                n->change_weight();
                break;
            case 2:
                n->change_bias();
        }
    }
}

double distance(double x1, double x2) {
    if (x1 > x2)
        return x1 - x2;
    else
        return x2 - x1;
}

double x_or(double r1, double r2, double r3, double r4) {
    double ret = 0;
    ret += distance(0, r1);
    ret += distance(1, r2);
    ret += distance(1, r3);
    ret += distance(0, r4);
    return ret;
}

void evolve(Network *n) {
    double in[2] = {0.0, 1.0};
    double r1 = n->forward(in)[0]; //0,1
    in[0] = 1.0;
    double r2 = n->forward(in)[0]; //1,1
    in[1] = 0.0;
    double r3 = n->forward(in)[0]; //1,0
    in[0] = 0.0;
    double r4 = n->forward(in)[0]; //0,0
    n->fitness = x_or(r4, r1, r3, r2);
}

Network genetic_algorithm(Settings *settings) {
    vector<Network> pop;
    vector<Network> selected;
    vector<Network> children;
    for (int i = 0; i < settings->n_pop; i++) {
        Network n = Network(settings->inputs, settings->hidden_layers, settings->n_hidden, settings->outputs);
        n.generate();
        pop.emplace_back(n);
        selected.emplace_back(n);
        children.emplace_back(n);
    }

    Network best = pop[0];
    evolve(&best);

    for (int gen = 0; gen < settings->n_iter; gen++) {
        printf("GEN: %d - %0.3lf\n", gen, best.fitness);
        for (int i = 0; i < settings->n_pop; i++) {
            evolve(&pop[i]);
            if (pop[i].fitness < best.fitness)
                best = pop[i];
        }

        for (int i = 0; i < settings->n_pop; i++) {
            selected[i] = selection(pop, settings);
        }

        for (int i = 0; i < settings->n_pop; i += 2) {
            auto ch = crossover(&selected[i], &selected[i + 1], settings);
            for (int c = 0; c < 2; c++) {
                mutate(&ch[c], settings);
                children[i + c] = ch[c];
            }
            children[i] = ch[0];
            children[i + 1] = ch[1];
        }

        pop = children;
    }
    return best;
}

int main() {
    srand(time(nullptr));
    int hidden[2] = {5, 5};
    Settings s = Settings(2, 1, 2, hidden);
    auto best = genetic_algorithm(&s);
    best.print();
    best.output();
}

forward函数似乎有误。检查这部分:

double x;
int edge = 0, node = 0;

for (int l = 0; l < layers.size() - 1; l++) {
    for (int i = 0; i < layers[l + 1]; i++) {
        x = 0;
        for (int j = 0; j < layers[l]; j++) {
            x += edges[edge] * _nodes[node];
            edge++;
        }
        _nodes[node + inputs] = sig(x + biases[node]);
        node++;
    }
}

它只使用_nodes[node](和一些边权重)来计算x。然后将结果赋值给_nodes[node + inputs]。所以,_nodes[node + inputs]的值只是来自_nodes[node]的值,而不是来自上一层的所有节点。

似乎对 node 变量的内容存在混淆。如果它包含正在计算其新值的节点的索引(node + inputs),那么您应该更改

x += edges[edge] * _nodes[node];

使用最后一层的所有值的行。

您可以这样做,例如,通过保留上一层的第一个节点的索引并在循环内向其添加 j