Create custom convolutional Loss function that only takes parts of the tensor
这发生在 DataLoader.getitem:
def __getitem__(self, index):
path = self.input_data[index]
imgs_path = sorted(glob.glob(path + '/*.png'))
#read light conditions
lightConditions = []
with open(path +"/lightConditions.json", 'r') as file:
lightConditions = json.load(file)
#shift light conditions
frameNumber = 0
imgs = []
for img_path in imgs_path:
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
im_pil = Image.fromarray(img)
#img = cv2.resize(img, (256,448))
if lightConditions[frameNumber] ==False:
imgBorder = ImageOps.expand(im_pil,border = 6, fill='black')
imgBorder = ImageOps.expand(im_pil, border = 6, fill='orange')
img = np.asarray(imgBorder)
img = cv2.resize(img, (256,448))
#img = cv2.resize(img, (0, 0), fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC) #has been 0.5 for official data, new is fx = 2.63 and fy = 2.84
img_tensor = ToTensor()(img).float()
frameNumber +=1
imgs = torch.stack(imgs, dim=0)
return imgs
for idx_epoch in range(startEpoch, nEpochs):
#set epoch in dataloader for right shuffle ->set seed really random
#Remember time for displaying time for epoch
startTimeEpoch = datetime.now()
i = 0
if processGPU==0:
running_loss = 0
beenValuated = False
for index, data_sr in enumerate(train_loader):
#Transfer Data to GPU but don't block other processes because this only effects this single process
data_sr = data_sr.cuda(processGPU, non_blocking=True)
startTimeIteration = time.time()
#Remove all dimensions of size 1
data_sr = data_sr.squeeze()
# calculate the index of the input images and GT images
num_f = len(data_sr)
#If model_type is 0 -> only calculate one frame that is marked with gt
if cfg.model_type == 0:
idx_start = random.randint(-2, 2)
idx_all = list(np.arange(idx_start, idx_start + num_f).clip(0, num_f - 1))
idx_gt = [idx_all.pop(int(num_f / 2))]
idx_input = idx_all
#Else when model_type is 1 then input frames 1,2,3 and predict frame 4 to number of cfg.dec_frames. Set all images that will be predicted to 'gt' images
idx_all = np.arange(0, num_f)
idx_input = list(idx_all[0:4])
idx_gt = list(idx_all[4:4+cfg.dec_frames])
imgs_input = data_sr[idx_input]
imgs_gt = data_sr[idx_gt]
# get predicted result
imgs_pred = model(imgs_input)
我使用 cfg.model_type = 1。这个模型会给我新的图像,也有彩色边框。通常这里会进行损失计算:
loss = criterion_mse(imgs_pred, imgs_gt)
您可以像在 numpy 中一样对张量进行切片。图像批次的维度是 NCHW。如果 b
是你的边框大小,并且它从各个方面都是对称的,那么只需 crop 张量:
loss = criterion_mse(imgs_pred[:, :, b:-b, b:-b] , imgs_gt[:, :, b:-b, b:-b])
我有一个获取图像的卷积网络,而且每个图像上还有一个彩色边框,用于向网络输入额外信息。现在我想计算损失,但通常的损失函数也会考虑预测的边界。边界是完全随机的,只是系统的输入。我不希望模型在预测错误颜色时认为它表现不佳。 这发生在 DataLoader.getitem:
def __getitem__(self, index):
path = self.input_data[index]
imgs_path = sorted(glob.glob(path + '/*.png'))
#read light conditions
lightConditions = []
with open(path +"/lightConditions.json", 'r') as file:
lightConditions = json.load(file)
#shift light conditions
frameNumber = 0
imgs = []
for img_path in imgs_path:
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
im_pil = Image.fromarray(img)
#img = cv2.resize(img, (256,448))
if lightConditions[frameNumber] ==False:
imgBorder = ImageOps.expand(im_pil,border = 6, fill='black')
imgBorder = ImageOps.expand(im_pil, border = 6, fill='orange')
img = np.asarray(imgBorder)
img = cv2.resize(img, (256,448))
#img = cv2.resize(img, (0, 0), fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC) #has been 0.5 for official data, new is fx = 2.63 and fy = 2.84
img_tensor = ToTensor()(img).float()
frameNumber +=1
imgs = torch.stack(imgs, dim=0)
return imgs
for idx_epoch in range(startEpoch, nEpochs):
#set epoch in dataloader for right shuffle ->set seed really random
#Remember time for displaying time for epoch
startTimeEpoch = datetime.now()
i = 0
if processGPU==0:
running_loss = 0
beenValuated = False
for index, data_sr in enumerate(train_loader):
#Transfer Data to GPU but don't block other processes because this only effects this single process
data_sr = data_sr.cuda(processGPU, non_blocking=True)
startTimeIteration = time.time()
#Remove all dimensions of size 1
data_sr = data_sr.squeeze()
# calculate the index of the input images and GT images
num_f = len(data_sr)
#If model_type is 0 -> only calculate one frame that is marked with gt
if cfg.model_type == 0:
idx_start = random.randint(-2, 2)
idx_all = list(np.arange(idx_start, idx_start + num_f).clip(0, num_f - 1))
idx_gt = [idx_all.pop(int(num_f / 2))]
idx_input = idx_all
#Else when model_type is 1 then input frames 1,2,3 and predict frame 4 to number of cfg.dec_frames. Set all images that will be predicted to 'gt' images
idx_all = np.arange(0, num_f)
idx_input = list(idx_all[0:4])
idx_gt = list(idx_all[4:4+cfg.dec_frames])
imgs_input = data_sr[idx_input]
imgs_gt = data_sr[idx_gt]
# get predicted result
imgs_pred = model(imgs_input)
我使用 cfg.model_type = 1。这个模型会给我新的图像,也有彩色边框。通常这里会进行损失计算:
loss = criterion_mse(imgs_pred, imgs_gt)
您可以像在 numpy 中一样对张量进行切片。图像批次的维度是 NCHW。如果 b
是你的边框大小,并且它从各个方面都是对称的,那么只需 crop 张量:
loss = criterion_mse(imgs_pred[:, :, b:-b, b:-b] , imgs_gt[:, :, b:-b, b:-b])