PyTorch NN 不如 sklearn MLP

PyTorch NN not as good as sklearn MLP

我正在将 sklearn 的 MLPRegressor 的准确性与 PyTorch 中的等效网络进行比较,但 PyTorch 模型总是差得多。我不知道为什么。下面是我的代码。

poly = PolynomialFeatures(2,interaction_only=True)
X_train, X_test, y_train, y_test = train_test_split(poly.fit_transform(X),y.ravel(),
                                                    test_size=0.15, 
                                                    random_state=0,shuffle=True)       
#print(X_train)
layers = (78,22,8,3,3,1)
regr_nn = MLPRegressor(hidden_layer_sizes=layers,random_state=0, max_iter=20000,
                       solver='lbfgs',
                       activation='tanh',alpha=1e-5)
regr_nn.fit(X_train, y_train)
y_predict_test_nn = regr_nn.predict(y_test)
y_predict_train_nn = regr_nn.predict(y_train)
test_score = regr_nn.score(X_test, y_test)
train_score = regr_nn.score(X_train, y_train)
poly = PolynomialFeatures(2,interaction_only=True)
X_train, X_test, y_train, y_test = train_test_split(poly.fit_transform(X), 
                                                    y.ravel(),test_size=0.15,
                                                    random_state=0)
# torch can only train on Variable, so convert them to Variable
x_test, y_test = torch.from_numpy(X_test.astype('float')), torch.from_numpy(y_test)
y_test = y_test.reshape((y_test.shape[0], 1))
x_train, y_train = torch.from_numpy(X_train.astype('float')), torch.from_numpy(y_train)
y_train = y_train.reshape((y_train.shape[0], 1))
class Train_set(torch.utils.data.Dataset):
  def __init__(self, X, y):
    if not torch.is_tensor(X) and not torch.is_tensor(y):
      self.X = torch.from_numpy(X)
      self.y = torch.from_numpy(y)
    else:
      self.X = X
      self.y = y

  def __len__(self):
      return len(self.X)

  def __getitem__(self, i):
      return self.X[i], self.y[i]


class Net(torch.nn.Module):
    def __init__(self, n_feature):
      super(Net, self).__init__()
      self.regress = nn.Sequential(nn.Linear(n_feature,78),nn.Tanh(),
          nn.Linear(78, 22),nn.Tanh(), 
          nn.Linear(22, 8),nn.Tanh(), nn.Linear(8, 3),nn.Tanh(),
          nn.Linear(3,3), nn.Tanh(),nn.Linear(3, 1))

    def forward(self, x):
      return self.regress(x.float())      # activation function for hidden layer


net = Net(n_feature=x_train.size(1))
net.to(cuda)

# print(net)  # net architecture
optimizer = torch.optim.LBFGS(net.parameters(), max_iter=20000, lr=1e-5,
                              tolerance_grad=1e-07,tolerance_change=1e-05)
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss    
train_set = Train_set(x_train,y_train)
trainloader = DataLoader(train_set, batch_size=10, shuffle=True)
CL = []
# train the network
for t in tqdm(range(10)):
  for i, data in enumerate(trainloader, 0):
    def closure():
      # Get and prepare inputs
      inputs, targets = data
      inputs, targets = inputs.float(), targets.float()
      inputs, targets = inputs.to(cuda), targets.to(cuda)
      targets = targets.reshape((targets.shape[0], 1))
      # Zero the gradients
      optimizer.zero_grad()
      # Perform forward pass
      outputs = net(inputs)
      # Compute loss
      loss = loss_func(outputs, targets)
      # Perform backward pass
      loss.backward()
      return loss
    optimizer.step(closure)     # apply gradients
prediction_train = net(x_train.to(cuda))
prediction_test = net(x_test.to(cuda))
train_score = r2_score(y_train.data.numpy(), prediction_train.data.numpy())
test_score = r2_score(y_test.data.numpy(), prediction_test.data.numpy())

sklearn 的 R^2 分数高于 0.9,奇偶校验图看起来像一条线,但 PyTorch 的分数接近于零,奇偶校验图看起来很糟糕。 Sklearn result PyTorch result 我真的很感激任何帮助。谢谢!

我认为你的闭包函数需要在 trainloader 循环中:

for t in tqdm(range(10)):
  for i, data in enumerate(trainloader, 0):
    def closure():
      # Get and prepare inputs
      inputs, targets = data
      inputs, targets = inputs.float(), targets.float()
      inputs, targets = inputs.to(cuda), targets.to(cuda)
      targets = targets.reshape((targets.shape[0], 1))
      # Zero the gradients
      optimizer.zero_grad()
      # Perform forward pass
      outputs = net(inputs)
      # Compute loss
      loss = loss_func(outputs, targets)
      # Perform backward pass
      loss.backward()
    return loss
  optimizer.step(closure)     #<<< this is applied to the END of `closure`

我不能肯定地说,因为我没有太多地使用 LBFGS,但我相信您当前的方法每个 epoch 只会执行一次。