为什么我的基因 AI 无法解决 XOR 问题?
Why is my genetic AI unable to solve the XOR problem?
我是 AI 的新手,但我已经用 Python 编写了遗传 AI,因此想挑战自己用 C 从头开始编写 AI。为了利用面向对象的优势,我目前仍在用 C++ 编写它,但如果它真的有效,那么我想重写结构中的所有 类,但这就是为什么很多是用 C 而不是 C++ 风格编写的(输出,随机数.. .).我想从 XOR 问题开始,但我很难得到正确的结果,但我不知道为什么。所有 4 个选项的输出都相同。
例如
0 0 -> 0.34
0 1 -> 0.34
1 0 -> 0.34
1 1 -> 0.34
谁能找到解决办法?
我也很感激 C/C++ 中有关遗传神经网络的资源,如果有人能找到的话。
#include <vector>
#include <cstdlib>
#include <ctime>
#include <cstdio>
#include <cmath>
using std::vector;
double random_double(double min, double max) {
double f = (double) rand() / RAND_MAX;
return min + f * (max - min);
}
int random_int(int min, int max) {
return (int) random_double((float) min, (float) max + 1);
}
double sig(double x) {
return 1 / (1 + exp(-x));
}
class Settings {
public:
int n_pop = 100;
int n_iter = 500;
int n_selection = 5;
double r_cross = 0.9;
double r_mut = 0.2;
int inputs;
int outputs;
int hidden_layers;
int *n_hidden;
Settings(int inputs, int outputs, int hidden_layers, int *n_hidden) : inputs(inputs), outputs(outputs),
hidden_layers(hidden_layers),
n_hidden(n_hidden) {}
};
class Network {
vector<double> _nodes;
int inputs, hidden_layers, outputs, n_nodes;
int *hidden_nodes;
vector<int> layers;
public:
double fitness = 0;
vector<double> edges;
vector<double> biases;
Network(int inputs, int hidden_layers, int *hidden_nodes, int outputs) : inputs(inputs),
hidden_layers(hidden_layers),
hidden_nodes(hidden_nodes),
outputs(outputs) {
n_nodes = inputs + outputs;
for (int i = 0; i < hidden_layers; i++) {
n_nodes += hidden_nodes[i];
}
}
void print() {
printf("Nodes:\n");
for (int i = 0; i < n_nodes; i++) {
printf("%.2lf ", _nodes[i]);
}
printf("\n\nEdges:\n");
for (double edge : edges) {
printf("%.2lf ", edge);
}
printf("\n\nBiases:\n");
for (double bias : biases) {
printf("%.2lf ", bias);
}
printf("\n");
}
void output() {
double in[2] = {0, 0};
printf("\n0 : 0 -> %.2lf\n", forward(in)[0]);
in[1] = 1;
printf("0 : 1 -> %.2lf\n", forward(in)[0]);
in[0] = 1;
in[1] = 0;
printf("1 : 0 -> %.2lf\n", forward(in)[0]);
in[1] = 1;
printf("1 : 1 -> %.2lf\n", forward(in)[0]);
}
void generate() {
layers.push_back(0);
for (int i = 0; i < inputs; i++) {
layers[0]++;
_nodes.emplace_back(0);
}
for (int l = 0; l < hidden_layers; l++) {
layers.push_back(0);
for (int i = 0; i < hidden_nodes[l]; i++) {
layers[l + 1]++;
_nodes.emplace_back(0);
biases.push_back(0.1);
}
}
layers.push_back(0);
for (int i = 0; i < outputs; i++) {
_nodes.emplace_back(0);
biases.push_back(0.1);
layers[hidden_layers + 1]++;
}
for (int layer = 0; layer < layers.size() - 1; layer++) {
for (int l = 0; l < layers[layer]; l++) {
for (int r = 0; r < layers[layer + 1]; r++) {
edges.push_back(random_double(-1, 1));
}
}
}
}
vector<double> forward(const double *in) {
for (int i = 0; i < inputs; i++)
_nodes[i] = in[i];
double x;
int edge = 0, node = 0;
for (int l = 0; l < layers.size() - 1; l++) {
for (int i = 0; i < layers[l + 1]; i++) {
x = 0;
for (int j = 0; j < layers[l]; j++) {
x += edges[edge] * _nodes[node];
edge++;
}
_nodes[node + inputs] = sig(x + biases[node]);
node++;
}
}
vector<double> ret = {};
for (int i = n_nodes - outputs - 1; i < n_nodes; i++)
ret.push_back(_nodes[i]);
return ret;
}
void change_weight() {
int type = random_int(0, 1);
if (type == 0) {
for (int e = 0; e < edges.size(); e++) {
if (random_int(0, 1) == 1)
edges[e] += random_double(-1, 1);
}
} else {
for (int e = 0; e < edges.size(); e++) {
if (random_int(0, 1) == 1)
edges[e] = random_double(-1, 1);
}
}
}
void change_bias() {
int type = random_int(0, 1);
if (type == 0) {
for (int b = 0; b < biases.size(); b++) {
if (random_int(0, 1) == 1)
biases[b] += random_double(-1, 1);
}
} else {
for (int b = 0; b < biases.size(); b++) {
if (random_int(0, 1) == 1)
biases[b] = random_double(-1, 1);
}
}
}
};
Network selection(vector<Network> pop, Settings *settings) {
int x = rand() % (settings->n_pop - settings->n_selection);
int selected = x;
for (int i = x + 1; i < x + settings->n_selection; i++) {
if (pop[i].fitness < pop[selected].fitness)
selected = i;
}
return pop[selected];
}
vector<Network> crossover(Network *p1, Network *p2, Settings *settings) {
Network c1(*p1);
Network c2(*p2);
double x = random_double(0, 1);
if (x <= settings->r_cross) {
int edge_size = (int) p1->edges.size();
int s = random_int(0, edge_size - 1);
if (random_int(1, 2) == 1) {
for (int i = 0; i < s; i++) {
c1.edges[i] = p1->edges[i];
c2.edges[i] = p2->edges[i];
}
for (int i = s; i < edge_size; i++) {
c1.edges[i] = p2->edges[i];
c2.edges[i] = p1->edges[i];
}
} else {
for (int i = 0; i < s; i++) {
c1.edges[i] = p2->edges[i];
c2.edges[i] = p1->edges[i];
}
for (int i = s; i < edge_size; i++) {
c1.edges[i] = p1->edges[i];
c2.edges[i] = p2->edges[i];
}
}
}
vector<Network> ret;
ret.emplace_back(c1);
ret.emplace_back(c2);
return ret;
}
void mutate(Network *n, Settings *settings) {
double x = random_double(0, 1);
if (x <= settings->r_mut) {
switch (random_int(1, 2)) {
case 1:
n->change_weight();
break;
case 2:
n->change_bias();
}
}
}
double distance(double x1, double x2) {
if (x1 > x2)
return x1 - x2;
else
return x2 - x1;
}
double x_or(double r1, double r2, double r3, double r4) {
double ret = 0;
ret += distance(0, r1);
ret += distance(1, r2);
ret += distance(1, r3);
ret += distance(0, r4);
return ret;
}
void evolve(Network *n) {
double in[2] = {0.0, 1.0};
double r1 = n->forward(in)[0]; //0,1
in[0] = 1.0;
double r2 = n->forward(in)[0]; //1,1
in[1] = 0.0;
double r3 = n->forward(in)[0]; //1,0
in[0] = 0.0;
double r4 = n->forward(in)[0]; //0,0
n->fitness = x_or(r4, r1, r3, r2);
}
Network genetic_algorithm(Settings *settings) {
vector<Network> pop;
vector<Network> selected;
vector<Network> children;
for (int i = 0; i < settings->n_pop; i++) {
Network n = Network(settings->inputs, settings->hidden_layers, settings->n_hidden, settings->outputs);
n.generate();
pop.emplace_back(n);
selected.emplace_back(n);
children.emplace_back(n);
}
Network best = pop[0];
evolve(&best);
for (int gen = 0; gen < settings->n_iter; gen++) {
printf("GEN: %d - %0.3lf\n", gen, best.fitness);
for (int i = 0; i < settings->n_pop; i++) {
evolve(&pop[i]);
if (pop[i].fitness < best.fitness)
best = pop[i];
}
for (int i = 0; i < settings->n_pop; i++) {
selected[i] = selection(pop, settings);
}
for (int i = 0; i < settings->n_pop; i += 2) {
auto ch = crossover(&selected[i], &selected[i + 1], settings);
for (int c = 0; c < 2; c++) {
mutate(&ch[c], settings);
children[i + c] = ch[c];
}
children[i] = ch[0];
children[i + 1] = ch[1];
}
pop = children;
}
return best;
}
int main() {
srand(time(nullptr));
int hidden[2] = {5, 5};
Settings s = Settings(2, 1, 2, hidden);
auto best = genetic_algorithm(&s);
best.print();
best.output();
}
forward
函数似乎有误。检查这部分:
double x;
int edge = 0, node = 0;
for (int l = 0; l < layers.size() - 1; l++) {
for (int i = 0; i < layers[l + 1]; i++) {
x = 0;
for (int j = 0; j < layers[l]; j++) {
x += edges[edge] * _nodes[node];
edge++;
}
_nodes[node + inputs] = sig(x + biases[node]);
node++;
}
}
它只使用_nodes[node]
(和一些边权重)来计算x
。然后将结果赋值给_nodes[node + inputs]
。所以,_nodes[node + inputs]
的值只是来自_nodes[node]
的值,而不是来自上一层的所有节点。
似乎对 node
变量的内容存在混淆。如果它包含正在计算其新值的节点的索引(node + inputs
),那么您应该更改
x += edges[edge] * _nodes[node];
使用最后一层的所有值的行。
您可以这样做,例如,通过保留上一层的第一个节点的索引并在循环内向其添加 j
。
我是 AI 的新手,但我已经用 Python 编写了遗传 AI,因此想挑战自己用 C 从头开始编写 AI。为了利用面向对象的优势,我目前仍在用 C++ 编写它,但如果它真的有效,那么我想重写结构中的所有 类,但这就是为什么很多是用 C 而不是 C++ 风格编写的(输出,随机数.. .).我想从 XOR 问题开始,但我很难得到正确的结果,但我不知道为什么。所有 4 个选项的输出都相同。
例如
0 0 -> 0.34
0 1 -> 0.34
1 0 -> 0.34
1 1 -> 0.34
谁能找到解决办法?
我也很感激 C/C++ 中有关遗传神经网络的资源,如果有人能找到的话。
#include <vector>
#include <cstdlib>
#include <ctime>
#include <cstdio>
#include <cmath>
using std::vector;
double random_double(double min, double max) {
double f = (double) rand() / RAND_MAX;
return min + f * (max - min);
}
int random_int(int min, int max) {
return (int) random_double((float) min, (float) max + 1);
}
double sig(double x) {
return 1 / (1 + exp(-x));
}
class Settings {
public:
int n_pop = 100;
int n_iter = 500;
int n_selection = 5;
double r_cross = 0.9;
double r_mut = 0.2;
int inputs;
int outputs;
int hidden_layers;
int *n_hidden;
Settings(int inputs, int outputs, int hidden_layers, int *n_hidden) : inputs(inputs), outputs(outputs),
hidden_layers(hidden_layers),
n_hidden(n_hidden) {}
};
class Network {
vector<double> _nodes;
int inputs, hidden_layers, outputs, n_nodes;
int *hidden_nodes;
vector<int> layers;
public:
double fitness = 0;
vector<double> edges;
vector<double> biases;
Network(int inputs, int hidden_layers, int *hidden_nodes, int outputs) : inputs(inputs),
hidden_layers(hidden_layers),
hidden_nodes(hidden_nodes),
outputs(outputs) {
n_nodes = inputs + outputs;
for (int i = 0; i < hidden_layers; i++) {
n_nodes += hidden_nodes[i];
}
}
void print() {
printf("Nodes:\n");
for (int i = 0; i < n_nodes; i++) {
printf("%.2lf ", _nodes[i]);
}
printf("\n\nEdges:\n");
for (double edge : edges) {
printf("%.2lf ", edge);
}
printf("\n\nBiases:\n");
for (double bias : biases) {
printf("%.2lf ", bias);
}
printf("\n");
}
void output() {
double in[2] = {0, 0};
printf("\n0 : 0 -> %.2lf\n", forward(in)[0]);
in[1] = 1;
printf("0 : 1 -> %.2lf\n", forward(in)[0]);
in[0] = 1;
in[1] = 0;
printf("1 : 0 -> %.2lf\n", forward(in)[0]);
in[1] = 1;
printf("1 : 1 -> %.2lf\n", forward(in)[0]);
}
void generate() {
layers.push_back(0);
for (int i = 0; i < inputs; i++) {
layers[0]++;
_nodes.emplace_back(0);
}
for (int l = 0; l < hidden_layers; l++) {
layers.push_back(0);
for (int i = 0; i < hidden_nodes[l]; i++) {
layers[l + 1]++;
_nodes.emplace_back(0);
biases.push_back(0.1);
}
}
layers.push_back(0);
for (int i = 0; i < outputs; i++) {
_nodes.emplace_back(0);
biases.push_back(0.1);
layers[hidden_layers + 1]++;
}
for (int layer = 0; layer < layers.size() - 1; layer++) {
for (int l = 0; l < layers[layer]; l++) {
for (int r = 0; r < layers[layer + 1]; r++) {
edges.push_back(random_double(-1, 1));
}
}
}
}
vector<double> forward(const double *in) {
for (int i = 0; i < inputs; i++)
_nodes[i] = in[i];
double x;
int edge = 0, node = 0;
for (int l = 0; l < layers.size() - 1; l++) {
for (int i = 0; i < layers[l + 1]; i++) {
x = 0;
for (int j = 0; j < layers[l]; j++) {
x += edges[edge] * _nodes[node];
edge++;
}
_nodes[node + inputs] = sig(x + biases[node]);
node++;
}
}
vector<double> ret = {};
for (int i = n_nodes - outputs - 1; i < n_nodes; i++)
ret.push_back(_nodes[i]);
return ret;
}
void change_weight() {
int type = random_int(0, 1);
if (type == 0) {
for (int e = 0; e < edges.size(); e++) {
if (random_int(0, 1) == 1)
edges[e] += random_double(-1, 1);
}
} else {
for (int e = 0; e < edges.size(); e++) {
if (random_int(0, 1) == 1)
edges[e] = random_double(-1, 1);
}
}
}
void change_bias() {
int type = random_int(0, 1);
if (type == 0) {
for (int b = 0; b < biases.size(); b++) {
if (random_int(0, 1) == 1)
biases[b] += random_double(-1, 1);
}
} else {
for (int b = 0; b < biases.size(); b++) {
if (random_int(0, 1) == 1)
biases[b] = random_double(-1, 1);
}
}
}
};
Network selection(vector<Network> pop, Settings *settings) {
int x = rand() % (settings->n_pop - settings->n_selection);
int selected = x;
for (int i = x + 1; i < x + settings->n_selection; i++) {
if (pop[i].fitness < pop[selected].fitness)
selected = i;
}
return pop[selected];
}
vector<Network> crossover(Network *p1, Network *p2, Settings *settings) {
Network c1(*p1);
Network c2(*p2);
double x = random_double(0, 1);
if (x <= settings->r_cross) {
int edge_size = (int) p1->edges.size();
int s = random_int(0, edge_size - 1);
if (random_int(1, 2) == 1) {
for (int i = 0; i < s; i++) {
c1.edges[i] = p1->edges[i];
c2.edges[i] = p2->edges[i];
}
for (int i = s; i < edge_size; i++) {
c1.edges[i] = p2->edges[i];
c2.edges[i] = p1->edges[i];
}
} else {
for (int i = 0; i < s; i++) {
c1.edges[i] = p2->edges[i];
c2.edges[i] = p1->edges[i];
}
for (int i = s; i < edge_size; i++) {
c1.edges[i] = p1->edges[i];
c2.edges[i] = p2->edges[i];
}
}
}
vector<Network> ret;
ret.emplace_back(c1);
ret.emplace_back(c2);
return ret;
}
void mutate(Network *n, Settings *settings) {
double x = random_double(0, 1);
if (x <= settings->r_mut) {
switch (random_int(1, 2)) {
case 1:
n->change_weight();
break;
case 2:
n->change_bias();
}
}
}
double distance(double x1, double x2) {
if (x1 > x2)
return x1 - x2;
else
return x2 - x1;
}
double x_or(double r1, double r2, double r3, double r4) {
double ret = 0;
ret += distance(0, r1);
ret += distance(1, r2);
ret += distance(1, r3);
ret += distance(0, r4);
return ret;
}
void evolve(Network *n) {
double in[2] = {0.0, 1.0};
double r1 = n->forward(in)[0]; //0,1
in[0] = 1.0;
double r2 = n->forward(in)[0]; //1,1
in[1] = 0.0;
double r3 = n->forward(in)[0]; //1,0
in[0] = 0.0;
double r4 = n->forward(in)[0]; //0,0
n->fitness = x_or(r4, r1, r3, r2);
}
Network genetic_algorithm(Settings *settings) {
vector<Network> pop;
vector<Network> selected;
vector<Network> children;
for (int i = 0; i < settings->n_pop; i++) {
Network n = Network(settings->inputs, settings->hidden_layers, settings->n_hidden, settings->outputs);
n.generate();
pop.emplace_back(n);
selected.emplace_back(n);
children.emplace_back(n);
}
Network best = pop[0];
evolve(&best);
for (int gen = 0; gen < settings->n_iter; gen++) {
printf("GEN: %d - %0.3lf\n", gen, best.fitness);
for (int i = 0; i < settings->n_pop; i++) {
evolve(&pop[i]);
if (pop[i].fitness < best.fitness)
best = pop[i];
}
for (int i = 0; i < settings->n_pop; i++) {
selected[i] = selection(pop, settings);
}
for (int i = 0; i < settings->n_pop; i += 2) {
auto ch = crossover(&selected[i], &selected[i + 1], settings);
for (int c = 0; c < 2; c++) {
mutate(&ch[c], settings);
children[i + c] = ch[c];
}
children[i] = ch[0];
children[i + 1] = ch[1];
}
pop = children;
}
return best;
}
int main() {
srand(time(nullptr));
int hidden[2] = {5, 5};
Settings s = Settings(2, 1, 2, hidden);
auto best = genetic_algorithm(&s);
best.print();
best.output();
}
forward
函数似乎有误。检查这部分:
double x;
int edge = 0, node = 0;
for (int l = 0; l < layers.size() - 1; l++) {
for (int i = 0; i < layers[l + 1]; i++) {
x = 0;
for (int j = 0; j < layers[l]; j++) {
x += edges[edge] * _nodes[node];
edge++;
}
_nodes[node + inputs] = sig(x + biases[node]);
node++;
}
}
它只使用_nodes[node]
(和一些边权重)来计算x
。然后将结果赋值给_nodes[node + inputs]
。所以,_nodes[node + inputs]
的值只是来自_nodes[node]
的值,而不是来自上一层的所有节点。
似乎对 node
变量的内容存在混淆。如果它包含正在计算其新值的节点的索引(node + inputs
),那么您应该更改
x += edges[edge] * _nodes[node];
使用最后一层的所有值的行。
您可以这样做,例如,通过保留上一层的第一个节点的索引并在循环内向其添加 j
。