使用 Tensorflow 数据集解码 RLE(运行 长度编码)掩码
Decoding RLE (run-length encoding) mask with Tensorflow Datasets
我一直在试验 tensorflow 数据集,但我不知道如何有效地创建 RLE-masks。
仅供参考,我正在使用 Kaggle 中空中客车船舶检测挑战赛的数据:https://www.kaggle.com/c/airbus-ship-detection/data
我知道我的 RLE 解码功能可以(借用)一个内核:
def rle_decode(mask_rle, shape=(768, 768)):
'''
mask_rle: run-length as string formated (start length)
shape: (height,width) of array to return
Returns numpy array, 1 - mask, 0 - background
'''
if not isinstance(mask_rle, str):
img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
return img.reshape(shape).T
s = mask_rle.split()
starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
starts -= 1
ends = starts + lengths
img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
for lo, hi in zip(starts, ends):
img[lo:hi] = 1
return img.reshape(shape).T
.. 但它似乎不能很好地处理管道:
list_ds = tf.data.Dataset.list_files(train_paths_abs)
ds = list_ds.map(parse_img)
使用以下解析函数,一切正常:
def parse_img(file_path,new_size=[128,128]):
img_content = tf.io.read_file(file_path)
img = tf.image.decode_jpeg(img_content)
img = tf.image.convert_image_dtype(img, tf.float32)
img = tf.image.resize(img,new_size)
return img
但是如果我加上面具,事情就会变得很糟糕:
def parse_img(file_path,new_size=[128,128]):
# Image
img_content = tf.io.read_file(file_path)
img = tf.image.decode_jpeg(img_content)
img = tf.image.convert_image_dtype(img, tf.float32)
img = tf.image.resize(img,new_size)
# Mask
file_id = tf.strings.split(file_path,'/')[-1]
objects = [rle_decode(m) for m in df2[df.ImageId==file_id]]
mask = np.sum(objects,axis=0)
mask = np.expand_dims(mask,3) # Force mask to have 3 channels, necessary for resize step
mask = tf.image.convert_image_dtype(mask, tf.int8)
mask = tf.clip_by_value(mask,0,1)
mask = tf.image.resize(mask,new_size)
mask = tf.squeeze(mask) # squeeze back
mask = tf.image.convert_image_dtype(mask, tf.int8)
return img, mask
虽然我的 parse_img
函数工作正常(我已经在样本上检查过它,每个 运行 需要 271 µs ± 67.9 µs); list_ds.map
这一步在挂起之前需要很长时间(> 5 分钟)。
我不知道出了什么问题,这让我发疯!
有什么想法吗?
你可以像这样用tensorflow重写函数rle_decode
(这里我不做最后的转置以使其更通用,但你可以稍后再做):
import tensorflow as tf
def rle_decode_tf(mask_rle, shape):
shape = tf.convert_to_tensor(shape, tf.int64)
size = tf.math.reduce_prod(shape)
# Split string
s = tf.strings.split(mask_rle)
s = tf.strings.to_number(s, tf.int64)
# Get starts and lengths
starts = s[::2] - 1
lens = s[1::2]
# Make ones to be scattered
total_ones = tf.reduce_sum(lens)
ones = tf.ones([total_ones], tf.uint8)
# Make scattering indices
r = tf.range(total_ones)
lens_cum = tf.math.cumsum(lens)
s = tf.searchsorted(lens_cum, r, 'right')
idx = r + tf.gather(starts - tf.pad(lens_cum[:-1], [(1, 0)]), s)
# Scatter ones into flattened mask
mask_flat = tf.scatter_nd(tf.expand_dims(idx, 1), ones, [size])
# Reshape into mask
return tf.reshape(mask_flat, shape)
一个小测试(TensorFlow 2.0):
mask_rle = '1 2 4 3 9 4 15 5'
shape = [4, 6]
# Original NumPy function
print(rle_decode(mask_rle, shape))
# [[1 0 0 1]
# [1 0 0 0]
# [0 1 1 0]
# [1 1 1 0]
# [1 1 1 0]
# [1 1 1 0]]
# TensorFlow function (transposing is done out of the function)
tf.print(tf.transpose(rle_decode_tf(mask_rle, shape)))
# [[1 0 0 1]
# [1 0 0 0]
# [0 1 1 0]
# [1 1 1 0]
# [1 1 1 0]
# [1 1 1 0]]
我一直在试验 tensorflow 数据集,但我不知道如何有效地创建 RLE-masks。 仅供参考,我正在使用 Kaggle 中空中客车船舶检测挑战赛的数据:https://www.kaggle.com/c/airbus-ship-detection/data
我知道我的 RLE 解码功能可以(借用)一个内核:
def rle_decode(mask_rle, shape=(768, 768)):
'''
mask_rle: run-length as string formated (start length)
shape: (height,width) of array to return
Returns numpy array, 1 - mask, 0 - background
'''
if not isinstance(mask_rle, str):
img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
return img.reshape(shape).T
s = mask_rle.split()
starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
starts -= 1
ends = starts + lengths
img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
for lo, hi in zip(starts, ends):
img[lo:hi] = 1
return img.reshape(shape).T
.. 但它似乎不能很好地处理管道:
list_ds = tf.data.Dataset.list_files(train_paths_abs)
ds = list_ds.map(parse_img)
使用以下解析函数,一切正常:
def parse_img(file_path,new_size=[128,128]):
img_content = tf.io.read_file(file_path)
img = tf.image.decode_jpeg(img_content)
img = tf.image.convert_image_dtype(img, tf.float32)
img = tf.image.resize(img,new_size)
return img
但是如果我加上面具,事情就会变得很糟糕:
def parse_img(file_path,new_size=[128,128]):
# Image
img_content = tf.io.read_file(file_path)
img = tf.image.decode_jpeg(img_content)
img = tf.image.convert_image_dtype(img, tf.float32)
img = tf.image.resize(img,new_size)
# Mask
file_id = tf.strings.split(file_path,'/')[-1]
objects = [rle_decode(m) for m in df2[df.ImageId==file_id]]
mask = np.sum(objects,axis=0)
mask = np.expand_dims(mask,3) # Force mask to have 3 channels, necessary for resize step
mask = tf.image.convert_image_dtype(mask, tf.int8)
mask = tf.clip_by_value(mask,0,1)
mask = tf.image.resize(mask,new_size)
mask = tf.squeeze(mask) # squeeze back
mask = tf.image.convert_image_dtype(mask, tf.int8)
return img, mask
虽然我的 parse_img
函数工作正常(我已经在样本上检查过它,每个 运行 需要 271 µs ± 67.9 µs); list_ds.map
这一步在挂起之前需要很长时间(> 5 分钟)。
我不知道出了什么问题,这让我发疯!
有什么想法吗?
你可以像这样用tensorflow重写函数rle_decode
(这里我不做最后的转置以使其更通用,但你可以稍后再做):
import tensorflow as tf
def rle_decode_tf(mask_rle, shape):
shape = tf.convert_to_tensor(shape, tf.int64)
size = tf.math.reduce_prod(shape)
# Split string
s = tf.strings.split(mask_rle)
s = tf.strings.to_number(s, tf.int64)
# Get starts and lengths
starts = s[::2] - 1
lens = s[1::2]
# Make ones to be scattered
total_ones = tf.reduce_sum(lens)
ones = tf.ones([total_ones], tf.uint8)
# Make scattering indices
r = tf.range(total_ones)
lens_cum = tf.math.cumsum(lens)
s = tf.searchsorted(lens_cum, r, 'right')
idx = r + tf.gather(starts - tf.pad(lens_cum[:-1], [(1, 0)]), s)
# Scatter ones into flattened mask
mask_flat = tf.scatter_nd(tf.expand_dims(idx, 1), ones, [size])
# Reshape into mask
return tf.reshape(mask_flat, shape)
一个小测试(TensorFlow 2.0):
mask_rle = '1 2 4 3 9 4 15 5'
shape = [4, 6]
# Original NumPy function
print(rle_decode(mask_rle, shape))
# [[1 0 0 1]
# [1 0 0 0]
# [0 1 1 0]
# [1 1 1 0]
# [1 1 1 0]
# [1 1 1 0]]
# TensorFlow function (transposing is done out of the function)
tf.print(tf.transpose(rle_decode_tf(mask_rle, shape)))
# [[1 0 0 1]
# [1 0 0 0]
# [0 1 1 0]
# [1 1 1 0]
# [1 1 1 0]
# [1 1 1 0]]