使用数据集 api 在 TensorFlow 中进行数据扩充的正确方法?
Correct way of doing data augmentation in TensorFlow with the dataset api?
所以,我一直在使用 TensorFlow 数据集 API 来加载图像和分割蒙版(用于语义分割项目),我希望能够生成批量图像和蒙版,每张图像都随机经历了亮度变化、对比度变化、裁剪、饱和度变化等预处理功能的任意组合。因此,我的批次中的第一张图像可能没有预处理,第二张可能有饱和度变化,第三张可能有亮度和饱和度等等。
我尝试了以下方法:
import tensorflow as tf
from tensorflow.contrib.data import Dataset, Iterator
import random
def _resize_image(image, mask):
image = tf.image.resize_bicubic(image, [480, 640], True)
mask = tf.image.resize_bicubic(mask, [480, 640], True)
return image, mask
def _corrupt_contrast(image, mask):
image = tf.image.random_contrast(image, 0, 5)
return image, mask
def _corrupt_saturation(image, mask):
image = tf.image.random_saturation(image, 0, 5)
return image, mask
def _corrupt_brightness(image, mask):
image = tf.image.random_brightness(image, 5)
return image, mask
def _random_crop(image, mask):
seed = random.random()
image = tf.random_crop(image, [240, 320, 3], seed=seed)
mask = tf.random_crop(mask, [240, 320, 1], seed=seed)
return image, mask
def _flip_image_horizontally(image, mask):
seed = random.random()
image = tf.image.random_flip_left_right(image, seed=seed)
mask = tf.image.random_flip_left_right(mask, seed=seed)
return image, mask
def _flip_image_vertically(image, mask):
seed = random.random()
image = tf.image.random_flip_up_down(image, seed=seed)
mask = tf.image.random_flip_up_down(mask, seed=seed)
return image, mask
def _normalize_data(image, mask):
image = tf.cast(image, tf.float32)
image = image / 255.0
mask = tf.cast(mask, tf.float32)
mask = mask / 255.0
return image, mask
def _parse_data(image_paths, mask_paths):
image_content = tf.read_file(image_paths)
mask_content = tf.read_file(mask_paths)
images = tf.image.decode_png(image_content, channels=3)
masks = tf.image.decode_png(mask_content, channels=1)
return images, masks
def data_batch(image_paths, mask_paths, params, batch_size=4, num_threads=2):
# Convert lists of paths to tensors for tensorflow
images_name_tensor = tf.constant(image_paths)
mask_name_tensor = tf.constant(mask_paths)
# Create dataset out of the 2 files:
data = Dataset.from_tensor_slices(
(images_name_tensor, mask_name_tensor))
# Parse images and labels
data = data.map(
_parse_data, num_threads=num_threads, output_buffer_size=6 * batch_size)
# Normalize images and masks for vals. between 0 and 1
data = data.map(_normalize_data, num_threads=num_threads, output_buffer_size=6 * batch_size)
if params['crop'] and not random.randint(0, 1):
data = data.map(_random_crop, num_threads=num_threads,
output_buffer_size=6 * batch_size)
if params['brightness'] and not random.randint(0, 1):
data = data.map(_corrupt_brightness, num_threads=num_threads,
output_buffer_size=6 * batch_size)
if params['contrast'] and not random.randint(0, 1):
data = data.map(_corrupt_contrast, num_threads=num_threads,
output_buffer_size=6 * batch_size)
if params['saturation'] and not random.randint(0, 1):
data = data.map(_corrupt_saturation, num_threads=num_threads,
output_buffer_size=6 * batch_size)
if params['flip_horizontally'] and not random.randint(0, 1):
data = data.map(_flip_image_horizontally,
num_threads=num_threads, output_buffer_size=6 * batch_size)
if params['flip_vertically'] and not random.randint(0, 1):
data = data.map(_flip_image_vertically, num_threads=num_threads,
output_buffer_size=6 * batch_size)
# Shuffle the data queue
data = data.shuffle(len(image_paths))
# Create a batch of data
data = data.batch(batch_size)
data = data.map(_resize_image, num_threads=num_threads,
output_buffer_size=6 * batch_size)
# Create iterator
iterator = Iterator.from_structure(data.output_types, data.output_shapes)
# Next element Op
next_element = iterator.get_next()
# Data set init. op
init_op = iterator.make_initializer(data)
return next_element, init_op
但是由此返回的所有批次都应用了相同的转换,而不是不同的组合,我的猜测是 random.randint 持续存在,并且实际上不是每个批次的 运行,如果是这样的话,我该如何解决这个问题以获得预期的结果?
有关我打算如何使用它的示例(我觉得这与问题无关,但人们可能仍然想知道)可以找到 here
所以问题确实是带有 if 语句的控制流带有 Python 变量,并且只在创建图形时执行一次,为了做我想做的事,我必须定义一个包含是否应用函数的布尔值的占位符(并在每次迭代中输入一个新的布尔张量以更改扩充),并且控制流由 tf.cond 处理。如果有人感兴趣,我将新代码推送到 GitHub link 我在上面的问题中发布。
所以,我一直在使用 TensorFlow 数据集 API 来加载图像和分割蒙版(用于语义分割项目),我希望能够生成批量图像和蒙版,每张图像都随机经历了亮度变化、对比度变化、裁剪、饱和度变化等预处理功能的任意组合。因此,我的批次中的第一张图像可能没有预处理,第二张可能有饱和度变化,第三张可能有亮度和饱和度等等。
我尝试了以下方法:
import tensorflow as tf
from tensorflow.contrib.data import Dataset, Iterator
import random
def _resize_image(image, mask):
image = tf.image.resize_bicubic(image, [480, 640], True)
mask = tf.image.resize_bicubic(mask, [480, 640], True)
return image, mask
def _corrupt_contrast(image, mask):
image = tf.image.random_contrast(image, 0, 5)
return image, mask
def _corrupt_saturation(image, mask):
image = tf.image.random_saturation(image, 0, 5)
return image, mask
def _corrupt_brightness(image, mask):
image = tf.image.random_brightness(image, 5)
return image, mask
def _random_crop(image, mask):
seed = random.random()
image = tf.random_crop(image, [240, 320, 3], seed=seed)
mask = tf.random_crop(mask, [240, 320, 1], seed=seed)
return image, mask
def _flip_image_horizontally(image, mask):
seed = random.random()
image = tf.image.random_flip_left_right(image, seed=seed)
mask = tf.image.random_flip_left_right(mask, seed=seed)
return image, mask
def _flip_image_vertically(image, mask):
seed = random.random()
image = tf.image.random_flip_up_down(image, seed=seed)
mask = tf.image.random_flip_up_down(mask, seed=seed)
return image, mask
def _normalize_data(image, mask):
image = tf.cast(image, tf.float32)
image = image / 255.0
mask = tf.cast(mask, tf.float32)
mask = mask / 255.0
return image, mask
def _parse_data(image_paths, mask_paths):
image_content = tf.read_file(image_paths)
mask_content = tf.read_file(mask_paths)
images = tf.image.decode_png(image_content, channels=3)
masks = tf.image.decode_png(mask_content, channels=1)
return images, masks
def data_batch(image_paths, mask_paths, params, batch_size=4, num_threads=2):
# Convert lists of paths to tensors for tensorflow
images_name_tensor = tf.constant(image_paths)
mask_name_tensor = tf.constant(mask_paths)
# Create dataset out of the 2 files:
data = Dataset.from_tensor_slices(
(images_name_tensor, mask_name_tensor))
# Parse images and labels
data = data.map(
_parse_data, num_threads=num_threads, output_buffer_size=6 * batch_size)
# Normalize images and masks for vals. between 0 and 1
data = data.map(_normalize_data, num_threads=num_threads, output_buffer_size=6 * batch_size)
if params['crop'] and not random.randint(0, 1):
data = data.map(_random_crop, num_threads=num_threads,
output_buffer_size=6 * batch_size)
if params['brightness'] and not random.randint(0, 1):
data = data.map(_corrupt_brightness, num_threads=num_threads,
output_buffer_size=6 * batch_size)
if params['contrast'] and not random.randint(0, 1):
data = data.map(_corrupt_contrast, num_threads=num_threads,
output_buffer_size=6 * batch_size)
if params['saturation'] and not random.randint(0, 1):
data = data.map(_corrupt_saturation, num_threads=num_threads,
output_buffer_size=6 * batch_size)
if params['flip_horizontally'] and not random.randint(0, 1):
data = data.map(_flip_image_horizontally,
num_threads=num_threads, output_buffer_size=6 * batch_size)
if params['flip_vertically'] and not random.randint(0, 1):
data = data.map(_flip_image_vertically, num_threads=num_threads,
output_buffer_size=6 * batch_size)
# Shuffle the data queue
data = data.shuffle(len(image_paths))
# Create a batch of data
data = data.batch(batch_size)
data = data.map(_resize_image, num_threads=num_threads,
output_buffer_size=6 * batch_size)
# Create iterator
iterator = Iterator.from_structure(data.output_types, data.output_shapes)
# Next element Op
next_element = iterator.get_next()
# Data set init. op
init_op = iterator.make_initializer(data)
return next_element, init_op
但是由此返回的所有批次都应用了相同的转换,而不是不同的组合,我的猜测是 random.randint 持续存在,并且实际上不是每个批次的 运行,如果是这样的话,我该如何解决这个问题以获得预期的结果? 有关我打算如何使用它的示例(我觉得这与问题无关,但人们可能仍然想知道)可以找到 here
所以问题确实是带有 if 语句的控制流带有 Python 变量,并且只在创建图形时执行一次,为了做我想做的事,我必须定义一个包含是否应用函数的布尔值的占位符(并在每次迭代中输入一个新的布尔张量以更改扩充),并且控制流由 tf.cond 处理。如果有人感兴趣,我将新代码推送到 GitHub link 我在上面的问题中发布。