Dataloader throwing error TypeError: new(): data must be a sequence (got map)
Dataloader throwing error TypeError: new(): data must be a sequence (got map)
我正在尝试对时间序列数据实施双向 LSTM。主文件调用数据加载器为模型加载数据。
Main.py
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import numpy as np
import time
import utils
import models
from models import rits_i
from models import brits_i
from models import rits
from models import brits
import argparse
import data_loader
import pandas as pd
import ujson as json
from sklearn import metrics
from ipdb import set_trace
parser = argparse.ArgumentParser()
parser.add_argument('--epochs', type = int, default = 1000)
parser.add_argument('--batch_size', type = int, default = 32)
parser.add_argument('--model', type = str)
args = parser.parse_args()
def train(model):
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
data_iter = data_loader.get_loader(batch_size = args.batch_size)
for epoch in range(args.epochs):
model.train()
run_loss = 0.0
for idx, data in enumerate(data_iter):
data = utils.to_var(data)
ret = model.run_on_batch(data, optimizer)
run_loss += ret['loss'].data[0]
print('\r Progress epoch {}, {:.2f}%, average loss {}'.format(epoch, (idx + 1) * 100.0 / len(data_iter), run_loss / (idx + 1.0)),)
if epoch % 1 == 0:
evaluate(model, data_iter)
def evaluate(model, val_iter):
model.eval()
labels = []
preds = []
evals = []
imputations = []
for idx, data in enumerate(val_iter):
data = utils.to_var(data)
ret = model.run_on_batch(data, None)
pred = ret['predictions'].data.cpu().numpy()
label = ret['labels'].data.cpu().numpy()
is_train = ret['is_train'].data.cpu().numpy()
eval_masks = ret['eval_masks'].data.cpu().numpy()
eval_ = ret['evals'].data.cpu().numpy()
imputation = ret['imputations'].data.cpu().numpy()
evals += eval_[np.where(eval_masks == 1)].tolist()
imputations += imputation[np.where(eval_masks == 1)].tolist()
# collect test label & prediction
pred = pred[np.where(is_train == 0)]
label = label[np.where(is_train == 0)]
labels += label.tolist()
preds += pred.tolist()
labels = np.asarray(labels).astype('int32')
preds = np.asarray(preds)
print('AUC {}'.format(metrics.roc_auc_score(labels, preds)))
evals = np.asarray(evals)
imputations = np.asarray(imputations)
print('MAE', np.abs(evals - imputations).mean())
print('MRE', np.abs(evals - imputations).sum() / np.abs(evals).sum())
def run():
model = getattr(models, args.model).Model()
if torch.cuda.is_available():
model = model.cuda()
train(model)
if __name__ == '__main__':
run()
data_loader.py
import os
import time
import ujson as json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
class MySet(Dataset):
def __init__(self):
super(MySet, self).__init__()
self.content = open('./json/json').readlines()
indices = np.arange(len(self.content))
val_indices = np.random.choice(indices, len(self.content) // 5)
self.val_indices = set(val_indices.tolist())
def __len__(self):
return len(self.content)
def __getitem__(self, idx):
rec = json.loads(self.content[idx])
if idx in self.val_indices:
rec['is_train'] = 0
else:
rec['is_train'] = 1
return rec
def collate_fn(recs):
forward = map(lambda x: x['forward'], recs)
backward = map(lambda x: x['backward'], recs)
def to_tensor_dict(recs):
values = torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
masks = torch.FloatTensor(map(lambda r: map(lambda x: x['masks'], r), recs))
deltas = torch.FloatTensor(map(lambda r: map(lambda x: x['deltas'], r), recs))
forwards = torch.FloatTensor(map(lambda r: map(lambda x: x['forwards'], r), recs))
evals = torch.FloatTensor(map(lambda r: map(lambda x: x['evals'], r), recs))
eval_masks = torch.FloatTensor(map(lambda r: map(lambda x: x['eval_masks'], r), recs))
return {'values': values, 'forwards': forwards, 'masks': masks, 'deltas': deltas, 'evals': evals, 'eval_masks': eval_masks}
ret_dict = {'forward': to_tensor_dict(forward), 'backward': to_tensor_dict(backward)}
ret_dict['labels'] = torch.FloatTensor(map(lambda x: x['label'], recs))
ret_dict['is_train'] = torch.FloatTensor(map(lambda x: x['is_train'], recs))
return ret_dict
def get_loader(batch_size = 64, shuffle = True):
data_set = MySet()
data_iter = DataLoader(dataset = data_set, \
batch_size = batch_size, \
num_workers = 4, \
shuffle = shuffle, \
pin_memory = True, \
collate_fn = collate_fn
)
return data_iter
但我无法解决错误 TypeError: new(): data must be a sequence (got map)
终端正在接收以下消息:
C:\Users\ankit\anaconda3\python.exe "C:\Program Files\JetBrains\PyCharm Community Edition 2021.2\plugins\python-ce\helpers\pydev\pydevd.py" --multiproc --qt-support=auto --client 127.0.0.1 --port 61292 --file C:/Users/ankit/PycharmProjects/BRITS/main.py --epochs 1000 --batch_size 32 --model brits
Connected to pydev debugger (build 212.4746.96)
Traceback (most recent call last):
File "C:\Users\ankit\anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 521, in __next__
data = self._next_data()
File "C:\Users\ankit\anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 1203, in _next_data
return self._process_data(data)
File "C:\Users\ankit\anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 1229, in _process_data
data.reraise()
File "C:\Users\ankit\anaconda3\lib\site-packages\torch\_utils.py", line 425, in reraise
raise self.exc_type(msg)
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "C:\Users\ankit\anaconda3\lib\site-packages\torch\utils\data\_utils\worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "C:\Users\ankit\anaconda3\lib\site-packages\torch\utils\data\_utils\fetch.py", line 47, in fetch
return self.collate_fn(data)
File "C:\Users\ankit\PycharmProjects\BRITS\data_loader.py", line 48, in collate_fn
ret_dict = {'forward': to_tensor_dict(forward), 'backward': to_tensor_dict(backward)}
File "C:\Users\ankit\PycharmProjects\BRITS\data_loader.py", line 38, in to_tensor_dict
values = torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
TypeError: new(): data must be a sequence (got map)
输入数据为jason格式(以下为部分数据):
{"forward":[{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"deltas":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"forwards":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"masks":[0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0],"values":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"eval_masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,0.0,0.0,0.0,-0.5174302535,0.0],"deltas":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0],"forwards":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,-1.2017103673,0.0,0.0,-0.5174302535,0.0],"masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0],"values":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,0.0,0.0,0.0,-0.5174302535,0.0],"eval_masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.3987459261,0.0,0.0,0.0,0.0,0.0,-0.4149215779,-0.0992249514,0.1738832786,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,0.0,0.0,0.0,0.3156699689,0.0],"deltas":[2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0]
我研究并发现 python 3 可能存在问题,但我 运行 在 python 2 上仍然面临同样的问题。请帮我解决问题。
我没有看过或尝试过 运行 你所有的代码,但乍一看,这一行显然是错误的
torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
在 python3 中,map()
returns 一个 map
对象,而不是列表。 FloatTensor(..)
期望后者,即列表。只需使用 list()
投射所有地图对象
torch.FloatTensor(list(map(lambda r: map(lambda x: x['values'], r), recs)))
将地图传递给 torch.*Tensor
是行不通的,您必须先将其转换为 list。问题是您的地图本身包含 map 个对象。另一种解决方案是使用单个列表理解,这很简单:
>>> torch.FloatTensor([recs for recs in x['values']])
我正在尝试对时间序列数据实施双向 LSTM。主文件调用数据加载器为模型加载数据。
Main.py
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import numpy as np
import time
import utils
import models
from models import rits_i
from models import brits_i
from models import rits
from models import brits
import argparse
import data_loader
import pandas as pd
import ujson as json
from sklearn import metrics
from ipdb import set_trace
parser = argparse.ArgumentParser()
parser.add_argument('--epochs', type = int, default = 1000)
parser.add_argument('--batch_size', type = int, default = 32)
parser.add_argument('--model', type = str)
args = parser.parse_args()
def train(model):
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
data_iter = data_loader.get_loader(batch_size = args.batch_size)
for epoch in range(args.epochs):
model.train()
run_loss = 0.0
for idx, data in enumerate(data_iter):
data = utils.to_var(data)
ret = model.run_on_batch(data, optimizer)
run_loss += ret['loss'].data[0]
print('\r Progress epoch {}, {:.2f}%, average loss {}'.format(epoch, (idx + 1) * 100.0 / len(data_iter), run_loss / (idx + 1.0)),)
if epoch % 1 == 0:
evaluate(model, data_iter)
def evaluate(model, val_iter):
model.eval()
labels = []
preds = []
evals = []
imputations = []
for idx, data in enumerate(val_iter):
data = utils.to_var(data)
ret = model.run_on_batch(data, None)
pred = ret['predictions'].data.cpu().numpy()
label = ret['labels'].data.cpu().numpy()
is_train = ret['is_train'].data.cpu().numpy()
eval_masks = ret['eval_masks'].data.cpu().numpy()
eval_ = ret['evals'].data.cpu().numpy()
imputation = ret['imputations'].data.cpu().numpy()
evals += eval_[np.where(eval_masks == 1)].tolist()
imputations += imputation[np.where(eval_masks == 1)].tolist()
# collect test label & prediction
pred = pred[np.where(is_train == 0)]
label = label[np.where(is_train == 0)]
labels += label.tolist()
preds += pred.tolist()
labels = np.asarray(labels).astype('int32')
preds = np.asarray(preds)
print('AUC {}'.format(metrics.roc_auc_score(labels, preds)))
evals = np.asarray(evals)
imputations = np.asarray(imputations)
print('MAE', np.abs(evals - imputations).mean())
print('MRE', np.abs(evals - imputations).sum() / np.abs(evals).sum())
def run():
model = getattr(models, args.model).Model()
if torch.cuda.is_available():
model = model.cuda()
train(model)
if __name__ == '__main__':
run()
data_loader.py
import os
import time
import ujson as json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
class MySet(Dataset):
def __init__(self):
super(MySet, self).__init__()
self.content = open('./json/json').readlines()
indices = np.arange(len(self.content))
val_indices = np.random.choice(indices, len(self.content) // 5)
self.val_indices = set(val_indices.tolist())
def __len__(self):
return len(self.content)
def __getitem__(self, idx):
rec = json.loads(self.content[idx])
if idx in self.val_indices:
rec['is_train'] = 0
else:
rec['is_train'] = 1
return rec
def collate_fn(recs):
forward = map(lambda x: x['forward'], recs)
backward = map(lambda x: x['backward'], recs)
def to_tensor_dict(recs):
values = torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
masks = torch.FloatTensor(map(lambda r: map(lambda x: x['masks'], r), recs))
deltas = torch.FloatTensor(map(lambda r: map(lambda x: x['deltas'], r), recs))
forwards = torch.FloatTensor(map(lambda r: map(lambda x: x['forwards'], r), recs))
evals = torch.FloatTensor(map(lambda r: map(lambda x: x['evals'], r), recs))
eval_masks = torch.FloatTensor(map(lambda r: map(lambda x: x['eval_masks'], r), recs))
return {'values': values, 'forwards': forwards, 'masks': masks, 'deltas': deltas, 'evals': evals, 'eval_masks': eval_masks}
ret_dict = {'forward': to_tensor_dict(forward), 'backward': to_tensor_dict(backward)}
ret_dict['labels'] = torch.FloatTensor(map(lambda x: x['label'], recs))
ret_dict['is_train'] = torch.FloatTensor(map(lambda x: x['is_train'], recs))
return ret_dict
def get_loader(batch_size = 64, shuffle = True):
data_set = MySet()
data_iter = DataLoader(dataset = data_set, \
batch_size = batch_size, \
num_workers = 4, \
shuffle = shuffle, \
pin_memory = True, \
collate_fn = collate_fn
)
return data_iter
但我无法解决错误 TypeError: new(): data must be a sequence (got map)
终端正在接收以下消息:
C:\Users\ankit\anaconda3\python.exe "C:\Program Files\JetBrains\PyCharm Community Edition 2021.2\plugins\python-ce\helpers\pydev\pydevd.py" --multiproc --qt-support=auto --client 127.0.0.1 --port 61292 --file C:/Users/ankit/PycharmProjects/BRITS/main.py --epochs 1000 --batch_size 32 --model brits
Connected to pydev debugger (build 212.4746.96)
Traceback (most recent call last):
File "C:\Users\ankit\anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 521, in __next__
data = self._next_data()
File "C:\Users\ankit\anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 1203, in _next_data
return self._process_data(data)
File "C:\Users\ankit\anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 1229, in _process_data
data.reraise()
File "C:\Users\ankit\anaconda3\lib\site-packages\torch\_utils.py", line 425, in reraise
raise self.exc_type(msg)
TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "C:\Users\ankit\anaconda3\lib\site-packages\torch\utils\data\_utils\worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "C:\Users\ankit\anaconda3\lib\site-packages\torch\utils\data\_utils\fetch.py", line 47, in fetch
return self.collate_fn(data)
File "C:\Users\ankit\PycharmProjects\BRITS\data_loader.py", line 48, in collate_fn
ret_dict = {'forward': to_tensor_dict(forward), 'backward': to_tensor_dict(backward)}
File "C:\Users\ankit\PycharmProjects\BRITS\data_loader.py", line 38, in to_tensor_dict
values = torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
TypeError: new(): data must be a sequence (got map)
输入数据为jason格式(以下为部分数据):
{"forward":[{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"deltas":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"forwards":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"masks":[0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,1,0,0,1,0],"values":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-0.6644796629,0.0,0.0,0.0,0.0,0.0,0.2233221525,0.9872348884,1.5032897407,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,-1.2017103673,0.0,0.0,2.16039189,0.0],"eval_masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,0.0,0.0,0.0,-0.5174302535,0.0],"deltas":[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0],"forwards":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9056552797,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,-1.2017103673,0.0,0.0,-0.5174302535,0.0],"masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,1,0],"values":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.5117099666,0.0,0.0,0.0,0.0,0.0,0.2569139277,0.6703649195,0.8156657086,0.0,0.0,0.0,0.0,-0.3034796866,0.0,0.0,0.0,0.0,0.0,-0.5174302535,0.0],"eval_masks":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},{"evals":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.3987459261,0.0,0.0,0.0,0.0,0.0,-0.4149215779,-0.0992249514,0.1738832786,0.0,0.0,0.0,0.0,-0.1197213235,0.0,0.0,0.0,0.0,0.0,0.3156699689,0.0],"deltas":[2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,2.0,2.0,2.0,2.0,0.0,2.0,2.0,2.0,2.0,2.0,0.0,2.0]
我研究并发现 python 3 可能存在问题,但我 运行 在 python 2 上仍然面临同样的问题。请帮我解决问题。
我没有看过或尝试过 运行 你所有的代码,但乍一看,这一行显然是错误的
torch.FloatTensor(map(lambda r: map(lambda x: x['values'], r), recs))
在 python3 中,map()
returns 一个 map
对象,而不是列表。 FloatTensor(..)
期望后者,即列表。只需使用 list()
torch.FloatTensor(list(map(lambda r: map(lambda x: x['values'], r), recs)))
将地图传递给 torch.*Tensor
是行不通的,您必须先将其转换为 list。问题是您的地图本身包含 map 个对象。另一种解决方案是使用单个列表理解,这很简单:
>>> torch.FloatTensor([recs for recs in x['values']])