使用 Tensorflow 进行图像增强,因此所有 类 都具有完全相同数量的图像
Image augmentation with Tensorflow so All classes have EXACT SAME number of images
我想对动物进行多 class 图像 class 化 class 化。问题是我的数据集每个 classes 都有不同数量的图像,而且差异非常大。例如:
In this example the dataset contains 320 images of 3 classes. The class A has 125 images, the class B has 170 images, and the class C has only 25 images and I wish to augment those classes therefore there will be 200 images for each classes which means 600 images that uniformly distributed to those 3 classes.
但是,就我而言,我的数据集中有 60 classes。我怎样才能增加所有这些,以便它们对所有 classes 具有 完全相同的 数量的图像?
这需要大量编码,但您可以使用 ImageDataGenerator 生成增强图像并将它们存储在指定目录中。生成器的文档是 here. 或者,您可以使用像 cv2 或 PIL 这样提供图像转换功能的模块。以下是您可以使用 cv2 的代码。请注意查找 cv2 文档以了解如何指定代码注释中指出的图像转换。代码如下
import os
import cv2
file_number =130 # set this to the number of files you want
sdir=r'C:\Temp\dummydogs\train' # set this to the main directory that contains yor class directories
slist=os.listdir(sdir)
for klass in slist:
class_path=os.path.join(sdir, klass)
filelist=os.listdir(class_path)
file_count=len(filelist)
if file_count > file_number:
# delete files from the klass directory because you have more than you need
delta=file_count-file_number
for i in range(delta):
file=filelist[i]
fpath=os.path.join (class_path,file)
os.remove(fpath)
else:
# need to add files to this klass so do augmentation using cv3 image transforms
label='-aug' # set this to a string that will be part of the augmented images file name
delta=file_number-file_count
for i in range(delta):
file=filelist[i]
file_split=os.path.split(file)
index=file_split[1].rfind('.')
fname=file[:index]
ext=file[index:]
fnew_name=fname + '-' +str(i) +'-' + label + ext
fpath=os.path.join(class_path,file)
img=cv2.imread(fpath)
img= cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# look up cv2 documentation and apply image transformation code here
dest_path=os.path.join(class_path, fnew_name)
cv2.imwrite(dest_path,img)
def dataGenerator(type_, number):
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
'''
type_ :str
ex 'CAT' or 'DOG'
number :int
duplicate img x {number}
'''
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
fill_mode='nearest',
horizontal_flip=True,
)
for filename in os.listdir(f'train/{type_}/'):
if filename.endswith('.jpeg'):
img = load_img(f'train/{type_}/{filename}')
x = img_to_array(img)
x = x.reshape((1,) + x.shape)
i = 0
for batch in datagen.flow(x, batch_size=1, save_to_dir=f'generate_data/{type_}', save_prefix='IMG', save_format='.jpeg'):
i += 1
if i == number:
break
正在使用这个
我想对动物进行多 class 图像 class 化 class 化。问题是我的数据集每个 classes 都有不同数量的图像,而且差异非常大。例如:
In this example the dataset contains 320 images of 3 classes. The class A has 125 images, the class B has 170 images, and the class C has only 25 images and I wish to augment those classes therefore there will be 200 images for each classes which means 600 images that uniformly distributed to those 3 classes.
但是,就我而言,我的数据集中有 60 classes。我怎样才能增加所有这些,以便它们对所有 classes 具有 完全相同的 数量的图像?
这需要大量编码,但您可以使用 ImageDataGenerator 生成增强图像并将它们存储在指定目录中。生成器的文档是 here. 或者,您可以使用像 cv2 或 PIL 这样提供图像转换功能的模块。以下是您可以使用 cv2 的代码。请注意查找 cv2 文档以了解如何指定代码注释中指出的图像转换。代码如下
import os
import cv2
file_number =130 # set this to the number of files you want
sdir=r'C:\Temp\dummydogs\train' # set this to the main directory that contains yor class directories
slist=os.listdir(sdir)
for klass in slist:
class_path=os.path.join(sdir, klass)
filelist=os.listdir(class_path)
file_count=len(filelist)
if file_count > file_number:
# delete files from the klass directory because you have more than you need
delta=file_count-file_number
for i in range(delta):
file=filelist[i]
fpath=os.path.join (class_path,file)
os.remove(fpath)
else:
# need to add files to this klass so do augmentation using cv3 image transforms
label='-aug' # set this to a string that will be part of the augmented images file name
delta=file_number-file_count
for i in range(delta):
file=filelist[i]
file_split=os.path.split(file)
index=file_split[1].rfind('.')
fname=file[:index]
ext=file[index:]
fnew_name=fname + '-' +str(i) +'-' + label + ext
fpath=os.path.join(class_path,file)
img=cv2.imread(fpath)
img= cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# look up cv2 documentation and apply image transformation code here
dest_path=os.path.join(class_path, fnew_name)
cv2.imwrite(dest_path,img)
def dataGenerator(type_, number):
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
'''
type_ :str
ex 'CAT' or 'DOG'
number :int
duplicate img x {number}
'''
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
fill_mode='nearest',
horizontal_flip=True,
)
for filename in os.listdir(f'train/{type_}/'):
if filename.endswith('.jpeg'):
img = load_img(f'train/{type_}/{filename}')
x = img_to_array(img)
x = x.reshape((1,) + x.shape)
i = 0
for batch in datagen.flow(x, batch_size=1, save_to_dir=f'generate_data/{type_}', save_prefix='IMG', save_format='.jpeg'):
i += 1
if i == number:
break
正在使用这个