如何将权重和偏差从较大的层复制到较小的层(反之亦然)?
How do I copy weights and biases from a larger layer to a smaller layer (or vice versa)?
我正在研究遗传算法。我希望模型大小能够通过突变来改变,添加或删除层以及改变神经元的数量。但这让我 运行 陷入了如何与不同尺寸的模型进行交叉的问题。
我确实有一个糟糕的解决方案。但我想问一下是否已经开发了一些 public 方法来做这种事情。
顺便说一句,我在 Keras 中这样做。
child是要返回的keras模型。两个亲代模型是正在培育的模型。
def slow_crossover(self, parent1, parent2, child_num):
'''A crossover/mutation function designed to work with models that can change sizes.'''
if self.additional_info: print(f"================================\nChild {child_num}:")
#
# Crossover
#
# Get all genes from parent2
# This will prevent the model from trying to take a gene section from parent2 but it being the wrong size
p2_genes = [] # [weights, biases]
for layer in parent2.layers:
# Get weight/bias data and empty lists to store genes
p2_data = layer.get_weights()
weight, bias = [], []
# Get the weight genes
for x in range(p2_data[0].shape[0]):
for y in range(self.gene_size, p2_data[0].shape[1], self.gene_size):
weight.append(p2_data[0][x][(y-self.gene_size):y])
# Get the bias genes
for x in range(self.gene_size, p2_data[1].shape[0], self.gene_size):
bias.append(p2_data[1][(x-self.gene_size):x])
p2_genes.append([weight, bias])
# Crossover genes
child_crossover = []
for i in range(len(parent1.layers)):
# Get weights and biases of the parents
# p1_data acts as the base for the child
p1_data = parent1.layers[i].get_weights()
# The layer we use for p2, since they might have different numbers of layers
p2_layer = int(i * len(parent2.layers) / len(parent1.layers))
# Handle the weights
for x in range(p1_data[0].shape[0]):
for y in range(self.gene_size, p1_data[0].shape[1], self.gene_size):
# Check to see if crossover should occur
# Make sure there's genes available to be used
try:
if len(p2_genes[p2_layer][0]) and (random() < self.crossover_rate):
p1_data[0][x][(y-self.gene_size):y] = p2_genes[p2_layer][0][int((y / p1_data[0].shape[1]) * len(p2_genes[p2_layer][0]))]
except:
print(f"\nFailed to crossover weight. (list index out of range? -> {p2_layer}, {len(p2_genes)}, {i}, {len(parent1.layers)}, {len(parent2.layers)}\n")
# Handle the biases
# Check to see if crossover should occur
try:
if len(p2_genes[p2_layer][1]) and (random() < self.crossover_rate):
p1_data[1][(y-self.gene_size):y] = p2_genes[p2_layer][1][int((y / p1_data[1].shape[0]) * len(p2_genes[p2_layer][1]))]
except:
print(f"\nFailed to crossover bias. (list index out of range? -> {p2_layer}, {len(p2_genes)}, {i}, {len(parent1.layers)}, {len(parent2.layers)}\n")
# Collect the layer data after crossover
child_crossover.append(p1_data)
#
# Mutate
#
# Value lists
modded_layer = [False for i in range(len(child_crossover))]
hidden_layers = []
#
# Mutate number of neurons
for i in range(len(child_crossover) - 1):
num_neurons = child_crossover[i][0].shape[1]
# Check to see if the size of this layer will mutate
if (random() < self.mutation_rate):
num_neurons += 1 if (random() > 0.5) else -1
if self.additional_info: print("Neuron count changed!")
hidden_layers.append(num_neurons)
#
# Mutate number of hidden layers
if (random() < self.mutation_rate):
# Remove layer
if len(hidden_layers) and (random() > 0.5):
# Choose layer to remove
location = randint(0, len(hidden_layers)-1)
del hidden_layers[location]
# We've removed it, so we don't want to try to copy it
modded_layer.insert(location, True)
if self.additional_info: print("Removed hidden layer!")
# Add layer
else:
# Choose where to insert the new layer and how many neurons it should have
location = randint(0, len(hidden_layers))
num_neurons = randint(1, 10)
# Insert layer
hidden_layers.insert(location, num_neurons)
modded_layer.insert(location, True)
if self.additional_info: print("Added hidden layer!")
#
# Copy weights and biases, then mutate individual weights and biases
child = LinearNet.linear_QNet(child_crossover[0][0].shape[0], child_crossover[-1][0].shape[1], hidden_layers=hidden_layers, random_model=False)
p_counter = 0
for i in range(len(child.layers)):
# Copy old weight and bias values over to new model and mutate them, if it's not a new layer
child_data = child.layers[i].get_weights()
if not modded_layer[i]:
_x = child_data[0].shape[0] if child_data[0].shape[0] < child_crossover[p_counter][0].shape[0] else child_crossover[p_counter][0].shape[0]
_y = child_data[0].shape[1] if child_data[0].shape[1] < child_crossover[p_counter][0].shape[1] else child_crossover[p_counter][0].shape[1]
child_data[0][0:_x, 0:_y] = child_crossover[p_counter][0][0:_x, 0:_y]
child_data[1][0:_y] = child_crossover[p_counter][1][0:_y]
for x in range(_x):
# Check for weight mutation
for y in range(_y):
if (random() < self.mutation_rate):
child_data[0][x][y] += uniform(-self.mutation_degree, self.mutation_degree)
# Check for bias mutation
if ((len(child.layers) - i) - 1) and (random() < self.mutation_rate):
child_data[1][y] += uniform(-self.mutation_degree, self.mutation_degree)
p_counter += 1
# Set weights and biases in child
child.layers[i].build(input_shape=child_data[0].shape[0])
child.layers[i].set_weights(child_data)
print(f"Agent {i}")
[print(f"Layer {j}: {layer.get_weights()[0].shape}") for j, layer in enumerate(child.layers)]
print("")
return child
我正在研究遗传算法。我希望模型大小能够通过突变来改变,添加或删除层以及改变神经元的数量。但这让我 运行 陷入了如何与不同尺寸的模型进行交叉的问题。
我确实有一个糟糕的解决方案。但我想问一下是否已经开发了一些 public 方法来做这种事情。
顺便说一句,我在 Keras 中这样做。
child是要返回的keras模型。两个亲代模型是正在培育的模型。
def slow_crossover(self, parent1, parent2, child_num):
'''A crossover/mutation function designed to work with models that can change sizes.'''
if self.additional_info: print(f"================================\nChild {child_num}:")
#
# Crossover
#
# Get all genes from parent2
# This will prevent the model from trying to take a gene section from parent2 but it being the wrong size
p2_genes = [] # [weights, biases]
for layer in parent2.layers:
# Get weight/bias data and empty lists to store genes
p2_data = layer.get_weights()
weight, bias = [], []
# Get the weight genes
for x in range(p2_data[0].shape[0]):
for y in range(self.gene_size, p2_data[0].shape[1], self.gene_size):
weight.append(p2_data[0][x][(y-self.gene_size):y])
# Get the bias genes
for x in range(self.gene_size, p2_data[1].shape[0], self.gene_size):
bias.append(p2_data[1][(x-self.gene_size):x])
p2_genes.append([weight, bias])
# Crossover genes
child_crossover = []
for i in range(len(parent1.layers)):
# Get weights and biases of the parents
# p1_data acts as the base for the child
p1_data = parent1.layers[i].get_weights()
# The layer we use for p2, since they might have different numbers of layers
p2_layer = int(i * len(parent2.layers) / len(parent1.layers))
# Handle the weights
for x in range(p1_data[0].shape[0]):
for y in range(self.gene_size, p1_data[0].shape[1], self.gene_size):
# Check to see if crossover should occur
# Make sure there's genes available to be used
try:
if len(p2_genes[p2_layer][0]) and (random() < self.crossover_rate):
p1_data[0][x][(y-self.gene_size):y] = p2_genes[p2_layer][0][int((y / p1_data[0].shape[1]) * len(p2_genes[p2_layer][0]))]
except:
print(f"\nFailed to crossover weight. (list index out of range? -> {p2_layer}, {len(p2_genes)}, {i}, {len(parent1.layers)}, {len(parent2.layers)}\n")
# Handle the biases
# Check to see if crossover should occur
try:
if len(p2_genes[p2_layer][1]) and (random() < self.crossover_rate):
p1_data[1][(y-self.gene_size):y] = p2_genes[p2_layer][1][int((y / p1_data[1].shape[0]) * len(p2_genes[p2_layer][1]))]
except:
print(f"\nFailed to crossover bias. (list index out of range? -> {p2_layer}, {len(p2_genes)}, {i}, {len(parent1.layers)}, {len(parent2.layers)}\n")
# Collect the layer data after crossover
child_crossover.append(p1_data)
#
# Mutate
#
# Value lists
modded_layer = [False for i in range(len(child_crossover))]
hidden_layers = []
#
# Mutate number of neurons
for i in range(len(child_crossover) - 1):
num_neurons = child_crossover[i][0].shape[1]
# Check to see if the size of this layer will mutate
if (random() < self.mutation_rate):
num_neurons += 1 if (random() > 0.5) else -1
if self.additional_info: print("Neuron count changed!")
hidden_layers.append(num_neurons)
#
# Mutate number of hidden layers
if (random() < self.mutation_rate):
# Remove layer
if len(hidden_layers) and (random() > 0.5):
# Choose layer to remove
location = randint(0, len(hidden_layers)-1)
del hidden_layers[location]
# We've removed it, so we don't want to try to copy it
modded_layer.insert(location, True)
if self.additional_info: print("Removed hidden layer!")
# Add layer
else:
# Choose where to insert the new layer and how many neurons it should have
location = randint(0, len(hidden_layers))
num_neurons = randint(1, 10)
# Insert layer
hidden_layers.insert(location, num_neurons)
modded_layer.insert(location, True)
if self.additional_info: print("Added hidden layer!")
#
# Copy weights and biases, then mutate individual weights and biases
child = LinearNet.linear_QNet(child_crossover[0][0].shape[0], child_crossover[-1][0].shape[1], hidden_layers=hidden_layers, random_model=False)
p_counter = 0
for i in range(len(child.layers)):
# Copy old weight and bias values over to new model and mutate them, if it's not a new layer
child_data = child.layers[i].get_weights()
if not modded_layer[i]:
_x = child_data[0].shape[0] if child_data[0].shape[0] < child_crossover[p_counter][0].shape[0] else child_crossover[p_counter][0].shape[0]
_y = child_data[0].shape[1] if child_data[0].shape[1] < child_crossover[p_counter][0].shape[1] else child_crossover[p_counter][0].shape[1]
child_data[0][0:_x, 0:_y] = child_crossover[p_counter][0][0:_x, 0:_y]
child_data[1][0:_y] = child_crossover[p_counter][1][0:_y]
for x in range(_x):
# Check for weight mutation
for y in range(_y):
if (random() < self.mutation_rate):
child_data[0][x][y] += uniform(-self.mutation_degree, self.mutation_degree)
# Check for bias mutation
if ((len(child.layers) - i) - 1) and (random() < self.mutation_rate):
child_data[1][y] += uniform(-self.mutation_degree, self.mutation_degree)
p_counter += 1
# Set weights and biases in child
child.layers[i].build(input_shape=child_data[0].shape[0])
child.layers[i].set_weights(child_data)
print(f"Agent {i}")
[print(f"Layer {j}: {layer.get_weights()[0].shape}") for j, layer in enumerate(child.layers)]
print("")
return child