Tensorflow - 范围明智的回归损失

Tensorflow - range wise regression loss

我正在尝试为以下问题创建一个有效的损失函数: 损失是为红线之间的每个范围计算的 MAE 的总和。蓝线是 ground truth,橙色线是预测,红点标记了 ground truth 值从一个变化到另一个并关闭当前值范围的索引。输入值在 [0,1] 范围内。值范围的数量各不相同;它可以介于 2-12 之间。

之前,我用 TF map_fn 编写了一个代码,但是速度很慢:

def rwmae_old(y_true, y_pred):
    y_pred = tf.convert_to_tensor(y_pred)
    y_true = tf.cast(y_true, y_pred.dtype)

    # prepare array
    yt_tmp = tf.concat(
        [tf.ones([len(y_true), 1], dtype=y_pred.dtype) * tf.cast(len(y_true), dtype=y_true.dtype), y_true], axis=-1)
    yt_tmp = tf.concat([yt_tmp, tf.ones([len(y_true), 1]) * tf.cast(len(y_true), dtype=y_true.dtype)], axis=-1)

    # find where there is a change of values between consecutive indices
    ranges = tf.transpose(tf.where(yt_tmp[:, :-1] != yt_tmp[:, 1:]))
    ranges_cols = tf.concat(
        [[0], tf.transpose(tf.where(ranges[1][1:] == 0))[0] + 1, [tf.cast(len(ranges[1]), dtype=y_true.dtype)]], axis=0)
    ranges_rows = tf.range(len(y_true))

    losses = tf.map_fn(
        # loop through every row in the array
        lambda ii:
        tf.reduce_mean(
            tf.map_fn(
                # loop through every range within the example and calculate the loss
                lambda jj:
                tf.reduce_mean(
                    tf.abs(
                        y_true[ii][ranges[1][ranges_cols[ii] + jj]: ranges[1][ranges_cols[ii] + jj + 1]] -
                        y_pred[ii][ranges[1][ranges_cols[ii] + jj]: ranges[1][ranges_cols[ii] + jj + 1]]
                    ),
                ),
                tf.range(ranges_cols[ii + 1] - ranges_cols[ii] - 1),
                fn_output_signature=y_pred.dtype
            )
        ),
        ranges_rows,
        fn_output_signature=y_pred.dtype
    )

    return losses

今天,我创建了一个惰性代码,它只遍历批处理中的每个示例,并检查索引之间的值是否发生变化,如果发生变化,则计算当前范围的 MAE:

def rwmae(y_true, y_pred):
    (batch_size, length) = y_pred.shape
    losses = tf.zeros(batch_size, dtype=y_pred.dtype)

    for ii in range(batch_size):
        # reset loss for the current row
        loss = tf.constant(0, dtype=y_pred.dtype)
        # set current range start index to 0
        ris = 0

        for jj in range(length - 1):
            if y_true[ii][jj] != y_true[ii][jj + 1]:
                # we found a point of change, calculate the loss in the current range and ...
                loss = tf.add(loss, tf.reduce_mean(tf.abs(y_true[ii][ris: jj + 1] - y_pred[ii][ris: jj + 1])))
                # ... update the new range starting point
                ris = jj + 1
        if ris != length - 1:
            # we need to calculate the loss for the rest of the vector
            loss = tf.add(loss, tf.reduce_mean(tf.abs(y_true[ii][ris: length] - y_pred[ii][ris: length])))
        #replace loss in the proper row
        losses = tf.tensor_scatter_nd_update(losses, [[ii]], [loss])

    return losses

你觉得有什么办法可以提高它的效率吗?或者你认为有更好的损失函数来解决这个问题?

您可以尝试这样的操作:

import numpy as np
import tensorflow as tf

def rwmae(y_true, y_pred):
    (batch_size, length) = tf.shape(y_pred)
    losses = tf.zeros(batch_size, dtype=y_pred.dtype)
    for ii in tf.range(batch_size):
        ris = 0
        indices= tf.concat([tf.where(y_true[ii][:-1] != y_true[ii][1:])[:, 0], [length-1]], axis=0)
        ragged_indices = tf.ragged.range(tf.concat([[ris], indices[:-1] + 1], axis=0), indices + 1)
        loss = tf.reduce_sum(tf.reduce_mean(tf.abs(tf.gather(y_true[ii], ragged_indices) - tf.gather(y_pred[ii], ragged_indices)), axis=-1, keepdims=True))
        losses = tf.tensor_scatter_nd_update(losses, [[ii]], [tf.math.divide_no_nan(loss, tf.cast(tf.shape(indices)[0], dtype=tf.float32))])
    return losses

data = np.load('/content/data.npy', allow_pickle=True)
y_pred = data[0:2][0]
y_true = data[0:2][1]

print(rwmae(y_true, y_pred), y_true.shape)