在 mxnet 错误中定义一个简单的神经网络
Defining a simple neural netwok in mxnet error
我正在使用 MXnet 制作简单的 NN,但在 step() 方法中遇到一些问题
x1.shape=(64, 1, 1000)
y1.shape=(64, 1, 10)
net =nm.Sequential()
net.add(nn.Dense(H,activation='relu'),nn.Dense(90,activation='relu'),nn.Dense(D_out))
for t in range(500):
#y_pred = net(x1)
#loss = loss_fn(y_pred, y)
#for i in range(len(x1)):
with autograd.record():
output=net(x1)
loss =loss_fn(output,y1)
loss.backward()
trainer.step(64)
if t % 100 == 99:
print(t, loss)
#optimizer.zero_grad()
UserWarning: Gradient of Parameter dense30_weight
on context cpu(0)
has not been updated by backward since last step
. This could mean a
bug in your model that made it only use a subset of the Parameters
(Blocks) for this iteration. If you are intentionally only using a
subset, call step with ignore_stale_grad=True to suppress this warning
and skip updating of Parameters with stale gradient
该错误表明您在训练器中传递的参数不在您的计算图中。
您需要初始化模型的参数并定义训练器。与 Pytorch 不同,您不需要在 MXNet 中调用 zero_grad,因为默认情况下会写入新的梯度而不是累积梯度。以下代码显示了使用 MXNet 的 Gluon API:
实现的简单神经网络
# Define model
net = gluon.nn.Dense(1)
net.collect_params().initialize(mx.init.Normal(sigma=1.), ctx=model_ctx)
square_loss = gluon.loss.L2Loss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.0001})
# Create random input and labels
def real_fn(X):
return 2 * X[:, 0] - 3.4 * X[:, 1] + 4.2
X = nd.random_normal(shape=(num_examples, num_inputs))
noise = 0.01 * nd.random_normal(shape=(num_examples,))
y = real_fn(X) + noise
# Define Dataloader
batch_size = 4
train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y), batch_size=batch_size, shuffle=True)
num_batches = num_examples / batch_size
for e in range(10):
# Iterate over training batches
for i, (data, label) in enumerate(train_data):
# Load data on the CPU
data = data.as_in_context(mx.cpu())
label = label.as_in_context(mx.cpu())
with autograd.record():
output = net(data)
loss = square_loss(output, label)
# Backpropagation
loss.backward()
trainer.step(batch_size)
cumulative_loss += nd.mean(loss).asscalar()
print("Epoch %s, loss: %s" % (e, cumulative_loss / num_examples))
我正在使用 MXnet 制作简单的 NN,但在 step() 方法中遇到一些问题
x1.shape=(64, 1, 1000)
y1.shape=(64, 1, 10)
net =nm.Sequential()
net.add(nn.Dense(H,activation='relu'),nn.Dense(90,activation='relu'),nn.Dense(D_out))
for t in range(500):
#y_pred = net(x1)
#loss = loss_fn(y_pred, y)
#for i in range(len(x1)):
with autograd.record():
output=net(x1)
loss =loss_fn(output,y1)
loss.backward()
trainer.step(64)
if t % 100 == 99:
print(t, loss)
#optimizer.zero_grad()
UserWarning: Gradient of Parameter
dense30_weight
on context cpu(0) has not been updated by backward since laststep
. This could mean a bug in your model that made it only use a subset of the Parameters (Blocks) for this iteration. If you are intentionally only using a subset, call step with ignore_stale_grad=True to suppress this warning and skip updating of Parameters with stale gradient
该错误表明您在训练器中传递的参数不在您的计算图中。 您需要初始化模型的参数并定义训练器。与 Pytorch 不同,您不需要在 MXNet 中调用 zero_grad,因为默认情况下会写入新的梯度而不是累积梯度。以下代码显示了使用 MXNet 的 Gluon API:
实现的简单神经网络# Define model
net = gluon.nn.Dense(1)
net.collect_params().initialize(mx.init.Normal(sigma=1.), ctx=model_ctx)
square_loss = gluon.loss.L2Loss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.0001})
# Create random input and labels
def real_fn(X):
return 2 * X[:, 0] - 3.4 * X[:, 1] + 4.2
X = nd.random_normal(shape=(num_examples, num_inputs))
noise = 0.01 * nd.random_normal(shape=(num_examples,))
y = real_fn(X) + noise
# Define Dataloader
batch_size = 4
train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y), batch_size=batch_size, shuffle=True)
num_batches = num_examples / batch_size
for e in range(10):
# Iterate over training batches
for i, (data, label) in enumerate(train_data):
# Load data on the CPU
data = data.as_in_context(mx.cpu())
label = label.as_in_context(mx.cpu())
with autograd.record():
output = net(data)
loss = square_loss(output, label)
# Backpropagation
loss.backward()
trainer.step(batch_size)
cumulative_loss += nd.mean(loss).asscalar()
print("Epoch %s, loss: %s" % (e, cumulative_loss / num_examples))