我不知道如何在 python 中为我的神经网络选择权重

I don't know how to pick the weights for my neural network in python

我正在做家庭作业,我已经花了好几个小时寻找如何为神经网络的随机数生成的权重正确设置维度。无论我阅读了多少文章或进行了 Google 次搜索,我都找不到解决方案。每次我根据传入 X_train 集的尺寸更改尺寸时,程序最终都会出现“ValueError:操作数无法与形状 (X,X) (y,y) 一起广播” .主要问题是点积与二维数组进行数学运算的复杂方式。我不知道还能去哪里,所以我就在这里。我将 post 成本和样本输出以提供尽可能多的信息,并每小时检查一次,看看是否有人能解决这个问题。我真正需要的是一种千篇一律的说法……如果你推一个 n 维数组,权重一应该是这些维度,权重二这些维度,权重三,等等……所以有不是计算中的兼容性错误。

我试过在互联网上寻找一种方法来根据数据结构的传入维度来破译权重。 IE。行和列。

到目前为止,这是完整的以下代码:

import numpy as np
import pandas as pd
from numpy import tanh
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

class NeuralNetwork():
    
    def __init__(self):
        print('constructor working...')
        self.inputsize = 4
        self.outputsize = 4
        self.hiddensize = 1

        self.W1 = np.random.randn(self.inputsize, self.hiddensize)
        self.W2 = np.random.randn(self.hiddensize, self.outputsize)
        
    def forward(self, X):
        #print('forward - X:\n', X)
        self.z = np.dot(X, self.W1)
        #print('forward - self.z:\n', self.z)
        self.z2 = self.sigmoid(self.z)
        #print('forward - self.z2:\n', self.z2)
        self.z3 = np.dot(self.z2, self.W2)
        #print('forward - self.z3:\n', self.z3)
        o = self.sigmoid(self.z3)
        print('forward - o:\n', o)
        print('forward shape of o:\n', o.shape)
        print('forward shape of X:\n', X.shape)
        return o
    
    def sigmoid(self, s):
        #print('sigmoid:\n', (1/(1+np.exp(-s))))
        return(1/(1+np.exp(-s)))
        
    def sigmoidPrime(self, s):
        return(s * (1 - s))
        
    def backward(self, X, y, o):
        print('backward - X:\n',X,'\ny:\n',y,'\no:\n',o)
        self.o_error = y - o
        print('backward - o_error:\n', self.o_error)
        self.o_delta = self.o_error * self.sigmoidPrime(o)
        print('backward - o_delta:\n', self.o_delta)
        self.z2_error = self.o_delta.dot(self.W2.T)
        print('backward - z2_error:\n', self.z2_error)
        self.z2_delta = self.z2_error * self.sigmoidPrime(self.z2)
        print('backward - z2_delta:\n', self.z2_delta)
        self.W1 += X.T.dot(self.z2_delta)
        print('backward - W1:\n', self.W1)
        self.W2 += self.z2.T.dot(self.o_delta)
        print('backward - W2:\n', self.W2)
        
    def train(self, X, y):
        o = self.forward(X)
        self.backward(X, y, o)
        
    def saveWeights(self):
        np.savetxt('w1.txt', self.W1, fmt='%s')
        np.savetxt('w2.txt', self.W2, fmt='%s')
        
    def predict(self):
        print("Predicted data based on trained weights: ")
        print("Input (scaled): \n" + str(X_test))
        print("Output: \n" + str(self.forward(X_test)))
        
if __name__ == "__main__":
    
    nn = NeuralNetwork()
    titanic_original_df = pd.read_csv(r'./titanic_data.csv')
    titanic_df = titanic_original_df.copy()
    print('titanic data shape:', titanic_df.shape)
    #print('titanic data head:\n', titanic_df.head(3))
    '''
    for col in titanic_df:
        print(col,': ',titanic_df[col].dtypes)
    for col in titanic_df:
        print(col,'- value counts:\n', titanic_df[col].value_counts())
    for col in titanic_df:
        print(col,'- null data:', titanic_df[col].isnull().sum())
    '''
    titanic_df['Age'] = titanic_df['Age'].interpolate().round()
    #print('after interpolation, Age null counts:\n', titanic_df['Age'].isnull().sum())
    titanic_df['Sex'] = pd.get_dummies(titanic_df['Sex'])
    #print('after dummy encoding Sex:\n', titanic_df['Sex'].value_counts())
    for col in titanic_df:
        print(col,'- null data:', titanic_df[col].dtypes)
    titanic_df[['Pclass','Sex']] = titanic_df[['Pclass','Sex']].astype(np.float64)
    sc = StandardScaler()
    #scaled_data = sc.fit(titanic_df[['Age','Fare']])
    #titanic_df[['Age','Fare']] = sc.transform(titanic_df[['Age','Fare']])
    #print('after scaling, Age column:\n', titanic_df['Age'].value_counts())
    #print('after scaling, Fare column:\n', titanic_df['Fare'].value_counts())
    y = titanic_df.Survived
    X = titanic_df.drop(['PassengerId','Survived','Name','SibSp','Parch','Ticket','Cabin','Embarked'], axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=124)
    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)
    print('y_train shape:', y_train.shape)
    print('y_test shape:', y_test.shape)
    #print('X_train:\n', X_train['Sex'].value_counts())
    for i in range(1):
        print('# '+str(i)+'\n')
        #print('Input (scaled):\n'+str(X_train))
        #print('Actual output:\n'+str(y_train))
        print('Predicted output:\n'+str(nn.forward(X_train)))
        print('shape of X_train:',X_train.shape)
        print('shape of y_train:',y_train.shape)
        print('Loss:\n'+str(np.mean(np.square(y_train - nn.forward(X_train)))))
        print('\n')
        nn.train(X_train, y_train)
        
    nn.saveWeights()
    nn.predict()

In [55]: runfile('C:/Users/John/.spyder-py3/ProgrammingAssignment#9.py', wdir='C:/Users/John/.spyder-py3')
constructor working...
titanic data shape: (891, 12)
PassengerId - null data: int64
Survived - null data: int64
Pclass - null data: int64
Name - null data: object
Sex - null data: uint8
Age - null data: float64
SibSp - null data: int64
Parch - null data: int64
Ticket - null data: object
Fare - null data: float64
Cabin - null data: object
Embarked - null data: object
X_train shape: (623, 4)
X_test shape: (268, 4)
y_train shape: (623,)
y_test shape: (268,)
# 0

forward - o:
 [[0.50384373 0.4961504  0.50183024 0.49790133]
 [0.5001908  0.49980891 0.50009085 0.49989583]
 [0.51753819 0.48243502 0.50835355 0.49042155]
 ...
 [0.51554828 0.48442797 0.50740524 0.49150886]
 [0.50025489 0.49974472 0.50012137 0.49986083]
 [0.50000075 0.49999925 0.50000036 0.49999959]]
forward shape of o:
 (623, 4)
forward shape of X:
 (623, 4)
Predicted output:
[[0.50384373 0.4961504  0.50183024 0.49790133]
 [0.5001908  0.49980891 0.50009085 0.49989583]
 [0.51753819 0.48243502 0.50835355 0.49042155]
 ...
 [0.51554828 0.48442797 0.50740524 0.49150886]
 [0.50025489 0.49974472 0.50012137 0.49986083]
 [0.50000075 0.49999925 0.50000036 0.49999959]]
shape of X_train: (623, 4)
shape of y_train: (623,)
forward - o:
 [[0.50384373 0.4961504  0.50183024 0.49790133]
 [0.5001908  0.49980891 0.50009085 0.49989583]
 [0.51753819 0.48243502 0.50835355 0.49042155]
 ...
 [0.51554828 0.48442797 0.50740524 0.49150886]
 [0.50025489 0.49974472 0.50012137 0.49986083]
 [0.50000075 0.49999925 0.50000036 0.49999959]]
forward shape of o:
 (623, 4)
forward shape of X:
 (623, 4)
Traceback (most recent call last):

  File "<ipython-input-55-52d7c067a2dd>", line 1, in <module>
    runfile('C:/Users/John/.spyder-py3/ProgrammingAssignment#9.py', wdir='C:/Users/John/.spyder-py3')

  File "C:\Users\John\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 786, in runfile
    execfile(filename, namespace)

  File "C:\Users\John\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "C:/Users/John/.spyder-py3/ProgrammingAssignment#9.py", line 117, in <module>
    print('Loss:\n'+str(np.mean(np.square(y_train - nn.forward(X_train)))))

  File "C:\Users\John\Anaconda3\lib\site-packages\pandas\core\ops.py", line 1583, in wrapper
    result = safe_na_op(lvalues, rvalues)

  File "C:\Users\John\Anaconda3\lib\site-packages\pandas\core\ops.py", line 1529, in safe_na_op
    return na_op(lvalues, rvalues)

  File "C:\Users\John\Anaconda3\lib\site-packages\pandas\core\ops.py", line 1505, in na_op
    result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)

  File "C:\Users\John\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py", line 208, in evaluate
    return _evaluate(op, op_str, a, b, **eval_kwargs)

  File "C:\Users\John\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py", line 123, in _evaluate_numexpr
    result = _evaluate_standard(op, op_str, a, b)

  File "C:\Users\John\Anaconda3\lib\site-packages\pandas\core\computation\expressions.py", line 68, in _evaluate_standard
    return op(a, b)

ValueError: operands could not be broadcast together with shapes (623,) (623,4)

无需过多地参与 Python 编程。 转发数据最主要的是权重必须"fit"进入神经元

在数学表达式中,您可以表示一个简单的点积,例如: [2 x 3] 矩阵点乘 [3 x 1] 结果是 [2 x 1] 矩阵,请注意矩阵乘法的方向很重要。

然后您可以将其拆分为一个矩阵 A,其中 n 行和 m 列点乘以矩阵 B,该矩阵 B 必须具有 m 列 (!!) 和可选的列数,比方说 z-列。结果 A x B >> 产生形状为 [n x z].

的矩阵 C

查看您的代码,您可能在数组大小、缺少转置等方面存在拼写错误。