如何将权重和偏差从较大的层复制到较小的层(反之亦然)?

How do I copy weights and biases from a larger layer to a smaller layer (or vice versa)?

我正在研究遗传算法。我希望模型大小能够通过突变来改变,添加或删除层以及改变神经元的数量。但这让我 运行 陷入了如何与不同尺寸的模型进行交叉的问题。

我确实有一个糟糕的解决方案。但我想问一下是否已经开发了一些 public 方法来做这种事情。

顺便说一句,我在 Keras 中这样做。

child是要返回的keras模型。两个亲代模型是正在培育的模型。

def slow_crossover(self, parent1, parent2, child_num):
        '''A crossover/mutation function designed to work with models that can change sizes.'''
        if self.additional_info: print(f"================================\nChild {child_num}:")
        # 
        # Crossover
        #

        # Get all genes from parent2
        # This will prevent the model from trying to take a gene section from parent2 but it being the wrong size
        p2_genes = [] # [weights, biases]
        for layer in parent2.layers:
            # Get weight/bias data and empty lists to store genes
            p2_data = layer.get_weights()
            weight, bias = [], []
            # Get the weight genes
            for x in range(p2_data[0].shape[0]):
                for y in range(self.gene_size, p2_data[0].shape[1], self.gene_size):
                        weight.append(p2_data[0][x][(y-self.gene_size):y])
            # Get the bias genes
            for x in range(self.gene_size, p2_data[1].shape[0], self.gene_size):
                bias.append(p2_data[1][(x-self.gene_size):x])
            p2_genes.append([weight, bias])

        # Crossover genes
        child_crossover = []
        for i in range(len(parent1.layers)):
            # Get weights and biases of the parents
            # p1_data acts as the base for the child
            p1_data = parent1.layers[i].get_weights()

            # The layer we use for p2, since they might have different numbers of layers
            p2_layer = int(i * len(parent2.layers) / len(parent1.layers))

            # Handle the weights
            for x in range(p1_data[0].shape[0]):
                for y in range(self.gene_size, p1_data[0].shape[1], self.gene_size):
                    # Check to see if crossover should occur
                    # Make sure there's genes available to be used
                    try:
                        if len(p2_genes[p2_layer][0]) and (random() < self.crossover_rate):
                            p1_data[0][x][(y-self.gene_size):y] = p2_genes[p2_layer][0][int((y / p1_data[0].shape[1]) * len(p2_genes[p2_layer][0]))]
                    except:
                        print(f"\nFailed to crossover weight. (list index out of range? -> {p2_layer}, {len(p2_genes)}, {i}, {len(parent1.layers)}, {len(parent2.layers)}\n")

                    # Handle the biases
                    # Check to see if crossover should occur
                    try:
                        if len(p2_genes[p2_layer][1]) and (random() < self.crossover_rate):
                            p1_data[1][(y-self.gene_size):y] = p2_genes[p2_layer][1][int((y / p1_data[1].shape[0]) * len(p2_genes[p2_layer][1]))]
                    except:
                        print(f"\nFailed to crossover bias. (list index out of range? -> {p2_layer}, {len(p2_genes)}, {i}, {len(parent1.layers)}, {len(parent2.layers)}\n")
            
            # Collect the layer data after crossover
            child_crossover.append(p1_data)

        # 
        # Mutate
        #

        # Value lists
        modded_layer = [False for i in range(len(child_crossover))]
        hidden_layers = []

        #
        # Mutate number of neurons
        for i in range(len(child_crossover) - 1):
            num_neurons = child_crossover[i][0].shape[1]
            # Check to see if the size of this layer will mutate
            if (random() < self.mutation_rate):
                num_neurons += 1 if (random() > 0.5) else -1
                if self.additional_info: print("Neuron count changed!")
            hidden_layers.append(num_neurons)

        #
        # Mutate number of hidden layers
        if (random() < self.mutation_rate):
            # Remove layer
            if len(hidden_layers) and (random() > 0.5):
                # Choose layer to remove
                location = randint(0, len(hidden_layers)-1)
                del hidden_layers[location]
                # We've removed it, so we don't want to try to copy it
                modded_layer.insert(location, True)
                if self.additional_info: print("Removed hidden layer!")
            # Add layer
            else:
                # Choose where to insert the new layer and how many neurons it should have
                location = randint(0, len(hidden_layers))
                num_neurons = randint(1, 10)
                # Insert layer
                hidden_layers.insert(location, num_neurons)
                modded_layer.insert(location, True)
                if self.additional_info: print("Added hidden layer!")

        #
        # Copy weights and biases, then mutate individual weights and biases
        child = LinearNet.linear_QNet(child_crossover[0][0].shape[0], child_crossover[-1][0].shape[1], hidden_layers=hidden_layers, random_model=False)
        p_counter = 0
        for i in range(len(child.layers)):
            # Copy old weight and bias values over to new model and mutate them, if it's not a new layer
            child_data = child.layers[i].get_weights()
            if not modded_layer[i]:
                _x = child_data[0].shape[0] if child_data[0].shape[0] < child_crossover[p_counter][0].shape[0] else child_crossover[p_counter][0].shape[0]
                _y = child_data[0].shape[1] if child_data[0].shape[1] < child_crossover[p_counter][0].shape[1] else child_crossover[p_counter][0].shape[1]
                child_data[0][0:_x, 0:_y] = child_crossover[p_counter][0][0:_x, 0:_y]
                child_data[1][0:_y] = child_crossover[p_counter][1][0:_y]

                for x in range(_x):
                    # Check for weight mutation
                    for y in range(_y):
                        if (random() < self.mutation_rate):
                            child_data[0][x][y] += uniform(-self.mutation_degree, self.mutation_degree)
                        
                        # Check for bias mutation
                        if ((len(child.layers) - i) - 1) and (random() < self.mutation_rate):
                            child_data[1][y] += uniform(-self.mutation_degree, self.mutation_degree)
                
                p_counter += 1
            # Set weights and biases in child
            child.layers[i].build(input_shape=child_data[0].shape[0])
            child.layers[i].set_weights(child_data)
        
        print(f"Agent {i}")
        [print(f"Layer {j}: {layer.get_weights()[0].shape}") for j, layer in enumerate(child.layers)]
        print("")

        return child