PyTorch NN 不如 sklearn MLP
PyTorch NN not as good as sklearn MLP
我正在将 sklearn 的 MLPRegressor 的准确性与 PyTorch 中的等效网络进行比较,但 PyTorch 模型总是差得多。我不知道为什么。下面是我的代码。
poly = PolynomialFeatures(2,interaction_only=True)
X_train, X_test, y_train, y_test = train_test_split(poly.fit_transform(X),y.ravel(),
test_size=0.15,
random_state=0,shuffle=True)
#print(X_train)
layers = (78,22,8,3,3,1)
regr_nn = MLPRegressor(hidden_layer_sizes=layers,random_state=0, max_iter=20000,
solver='lbfgs',
activation='tanh',alpha=1e-5)
regr_nn.fit(X_train, y_train)
y_predict_test_nn = regr_nn.predict(y_test)
y_predict_train_nn = regr_nn.predict(y_train)
test_score = regr_nn.score(X_test, y_test)
train_score = regr_nn.score(X_train, y_train)
poly = PolynomialFeatures(2,interaction_only=True)
X_train, X_test, y_train, y_test = train_test_split(poly.fit_transform(X),
y.ravel(),test_size=0.15,
random_state=0)
# torch can only train on Variable, so convert them to Variable
x_test, y_test = torch.from_numpy(X_test.astype('float')), torch.from_numpy(y_test)
y_test = y_test.reshape((y_test.shape[0], 1))
x_train, y_train = torch.from_numpy(X_train.astype('float')), torch.from_numpy(y_train)
y_train = y_train.reshape((y_train.shape[0], 1))
class Train_set(torch.utils.data.Dataset):
def __init__(self, X, y):
if not torch.is_tensor(X) and not torch.is_tensor(y):
self.X = torch.from_numpy(X)
self.y = torch.from_numpy(y)
else:
self.X = X
self.y = y
def __len__(self):
return len(self.X)
def __getitem__(self, i):
return self.X[i], self.y[i]
class Net(torch.nn.Module):
def __init__(self, n_feature):
super(Net, self).__init__()
self.regress = nn.Sequential(nn.Linear(n_feature,78),nn.Tanh(),
nn.Linear(78, 22),nn.Tanh(),
nn.Linear(22, 8),nn.Tanh(), nn.Linear(8, 3),nn.Tanh(),
nn.Linear(3,3), nn.Tanh(),nn.Linear(3, 1))
def forward(self, x):
return self.regress(x.float()) # activation function for hidden layer
net = Net(n_feature=x_train.size(1))
net.to(cuda)
# print(net) # net architecture
optimizer = torch.optim.LBFGS(net.parameters(), max_iter=20000, lr=1e-5,
tolerance_grad=1e-07,tolerance_change=1e-05)
loss_func = torch.nn.MSELoss() # this is for regression mean squared loss
train_set = Train_set(x_train,y_train)
trainloader = DataLoader(train_set, batch_size=10, shuffle=True)
CL = []
# train the network
for t in tqdm(range(10)):
for i, data in enumerate(trainloader, 0):
def closure():
# Get and prepare inputs
inputs, targets = data
inputs, targets = inputs.float(), targets.float()
inputs, targets = inputs.to(cuda), targets.to(cuda)
targets = targets.reshape((targets.shape[0], 1))
# Zero the gradients
optimizer.zero_grad()
# Perform forward pass
outputs = net(inputs)
# Compute loss
loss = loss_func(outputs, targets)
# Perform backward pass
loss.backward()
return loss
optimizer.step(closure) # apply gradients
prediction_train = net(x_train.to(cuda))
prediction_test = net(x_test.to(cuda))
train_score = r2_score(y_train.data.numpy(), prediction_train.data.numpy())
test_score = r2_score(y_test.data.numpy(), prediction_test.data.numpy())
sklearn 的 R^2 分数高于 0.9,奇偶校验图看起来像一条线,但 PyTorch 的分数接近于零,奇偶校验图看起来很糟糕。
Sklearn result PyTorch result
我真的很感激任何帮助。谢谢!
我认为你的闭包函数需要在 trainloader
循环中:
for t in tqdm(range(10)):
for i, data in enumerate(trainloader, 0):
def closure():
# Get and prepare inputs
inputs, targets = data
inputs, targets = inputs.float(), targets.float()
inputs, targets = inputs.to(cuda), targets.to(cuda)
targets = targets.reshape((targets.shape[0], 1))
# Zero the gradients
optimizer.zero_grad()
# Perform forward pass
outputs = net(inputs)
# Compute loss
loss = loss_func(outputs, targets)
# Perform backward pass
loss.backward()
return loss
optimizer.step(closure) #<<< this is applied to the END of `closure`
我不能肯定地说,因为我没有太多地使用 LBFGS,但我相信您当前的方法每个 epoch 只会执行一次。
我正在将 sklearn 的 MLPRegressor 的准确性与 PyTorch 中的等效网络进行比较,但 PyTorch 模型总是差得多。我不知道为什么。下面是我的代码。
poly = PolynomialFeatures(2,interaction_only=True)
X_train, X_test, y_train, y_test = train_test_split(poly.fit_transform(X),y.ravel(),
test_size=0.15,
random_state=0,shuffle=True)
#print(X_train)
layers = (78,22,8,3,3,1)
regr_nn = MLPRegressor(hidden_layer_sizes=layers,random_state=0, max_iter=20000,
solver='lbfgs',
activation='tanh',alpha=1e-5)
regr_nn.fit(X_train, y_train)
y_predict_test_nn = regr_nn.predict(y_test)
y_predict_train_nn = regr_nn.predict(y_train)
test_score = regr_nn.score(X_test, y_test)
train_score = regr_nn.score(X_train, y_train)
poly = PolynomialFeatures(2,interaction_only=True)
X_train, X_test, y_train, y_test = train_test_split(poly.fit_transform(X),
y.ravel(),test_size=0.15,
random_state=0)
# torch can only train on Variable, so convert them to Variable
x_test, y_test = torch.from_numpy(X_test.astype('float')), torch.from_numpy(y_test)
y_test = y_test.reshape((y_test.shape[0], 1))
x_train, y_train = torch.from_numpy(X_train.astype('float')), torch.from_numpy(y_train)
y_train = y_train.reshape((y_train.shape[0], 1))
class Train_set(torch.utils.data.Dataset):
def __init__(self, X, y):
if not torch.is_tensor(X) and not torch.is_tensor(y):
self.X = torch.from_numpy(X)
self.y = torch.from_numpy(y)
else:
self.X = X
self.y = y
def __len__(self):
return len(self.X)
def __getitem__(self, i):
return self.X[i], self.y[i]
class Net(torch.nn.Module):
def __init__(self, n_feature):
super(Net, self).__init__()
self.regress = nn.Sequential(nn.Linear(n_feature,78),nn.Tanh(),
nn.Linear(78, 22),nn.Tanh(),
nn.Linear(22, 8),nn.Tanh(), nn.Linear(8, 3),nn.Tanh(),
nn.Linear(3,3), nn.Tanh(),nn.Linear(3, 1))
def forward(self, x):
return self.regress(x.float()) # activation function for hidden layer
net = Net(n_feature=x_train.size(1))
net.to(cuda)
# print(net) # net architecture
optimizer = torch.optim.LBFGS(net.parameters(), max_iter=20000, lr=1e-5,
tolerance_grad=1e-07,tolerance_change=1e-05)
loss_func = torch.nn.MSELoss() # this is for regression mean squared loss
train_set = Train_set(x_train,y_train)
trainloader = DataLoader(train_set, batch_size=10, shuffle=True)
CL = []
# train the network
for t in tqdm(range(10)):
for i, data in enumerate(trainloader, 0):
def closure():
# Get and prepare inputs
inputs, targets = data
inputs, targets = inputs.float(), targets.float()
inputs, targets = inputs.to(cuda), targets.to(cuda)
targets = targets.reshape((targets.shape[0], 1))
# Zero the gradients
optimizer.zero_grad()
# Perform forward pass
outputs = net(inputs)
# Compute loss
loss = loss_func(outputs, targets)
# Perform backward pass
loss.backward()
return loss
optimizer.step(closure) # apply gradients
prediction_train = net(x_train.to(cuda))
prediction_test = net(x_test.to(cuda))
train_score = r2_score(y_train.data.numpy(), prediction_train.data.numpy())
test_score = r2_score(y_test.data.numpy(), prediction_test.data.numpy())
sklearn 的 R^2 分数高于 0.9,奇偶校验图看起来像一条线,但 PyTorch 的分数接近于零,奇偶校验图看起来很糟糕。 Sklearn result PyTorch result 我真的很感激任何帮助。谢谢!
我认为你的闭包函数需要在 trainloader
循环中:
for t in tqdm(range(10)):
for i, data in enumerate(trainloader, 0):
def closure():
# Get and prepare inputs
inputs, targets = data
inputs, targets = inputs.float(), targets.float()
inputs, targets = inputs.to(cuda), targets.to(cuda)
targets = targets.reshape((targets.shape[0], 1))
# Zero the gradients
optimizer.zero_grad()
# Perform forward pass
outputs = net(inputs)
# Compute loss
loss = loss_func(outputs, targets)
# Perform backward pass
loss.backward()
return loss
optimizer.step(closure) #<<< this is applied to the END of `closure`
我不能肯定地说,因为我没有太多地使用 LBFGS,但我相信您当前的方法每个 epoch 只会执行一次。