TypeError: 'numpy.float32' object is not iterable when logging in mlflow

TypeError: 'numpy.float32' object is not iterable when logging in mlflow

我正在尝试机器学习模型并使用 mlflow 记录指标。但是我得到 TypeError: 'numpy.float32' object is not iterable。我试过使用 .tolist()dict() 但似乎没有任何效果。

def train(max_epochs, model, optimizer, scheduler, train_loader, valid_loader, project_name):
    best_val_loss = 100
    for epoch in range(max_epochs):
        model.train()
        running_loss = []
        tq_loader = tqdm(train_loader)
        o = {}
        for samples in tq_loader:
            optimizer.zero_grad()
            outputs, interaction_map = model(
                [samples[0].to(device), samples[1].to(device), torch.tensor(samples[2]).to(device),
                 torch.tensor(samples[3]).to(device)])
            l1_norm = torch.norm(interaction_map, p=2) * 1e-4
            loss = loss_fn(outputs, torch.tensor(samples[4]).to(device).float()) + l1_norm
            loss.backward()
            optimizer.step()
            loss = loss - l1_norm
            running_loss.append(loss.cpu().detach())
            tq_loader.set_description(
                "Epoch: " + str(epoch + 1) + "  Training loss: " + str(np.mean(np.array(running_loss))))
        model.eval()
        val_loss, mae_loss = get_metrics(model, valid_loader)
        scheduler.step(val_loss)
        
        #metrics mlflow
        mlflow.log_metrics('train_loss',(np.mean(np.array(running_loss))).tolist())
        mlflow.log_metrics('validation_loss',(val_loss).tolist())
        mlflow.log_metrics('MAE Val_loss', (mae_loss).tolist())

        print("Epoch: " + str(epoch + 1) + "  train_loss " + str(np.mean(np.array(running_loss))) + " Val_loss " + str(
            val_loss) + " MAE Val_loss " + str(mae_loss))
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), "./runs/run-" + str(project_name) + "/models/best_model.tar")

mlflow.set_experiment('CIGIN_V2')
mlflow.start_run(nested=True)
train(max_epochs, model, optimizer, scheduler, train_loader, valid_loader, project_name)
mlflow.end_run()

错误

Epoch: 1  Training loss: 6770.575: 100%|██████████| 1/1 [00:04<00:00,  4.35s/it]
100%|██████████| 1/1 [00:03<00:00,  3.86s/it]

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-96-8c3a6eb822c3> in <module>()
      1 mlflow.set_experiment('CIGIN_V2')
      2 mlflow.start_run(nested=True)
----> 3 train(max_epochs, model, optimizer, scheduler, train_loader, valid_loader, project_name)
      4 mlflow.end_run()

<ipython-input-95-ab0a6c80b65b> in train(max_epochs, model, optimizer, scheduler, train_loader, valid_loader, project_name)
     55 
     56         #metrics mlflow
---> 57         mlflow.log_metrics('train_loss',dict(np.mean(np.array(running_loss))).tolist())
     58         mlflow.log_metrics('validation_loss',dict(val_loss).tolist())
     59         mlflow.log_metrics('MAE Val_loss', dict(mae_loss).tolist())

TypeError: 'numpy.float32' object is not iterable

您将单个值记录到 log_metrics 中,根据文档中 log_metric 和 log_metrics 的实现,我认为这是不正确的:

https://www.mlflow.org/docs/latest/python_api/mlflow.html#mlflow.log_metrichttps://www.mlflow.org/docs/latest/python_api/mlflow.html#mlflow.log_metrics

所以我建议也许将“log_metrics”更改为“log_metric”并保留 tolist