Tensorflow:从图像预测点,用点标签训练模型
Tensorflow: predicting a point from an image, training model with a point labels
我想创建一个可以从图像中预测点的模型。
我有一个包含训练图像的数据集。这些图像被分成 24 个目录。
我准备了一个 json 文件,其中包含每个图像的 (x, y) 值。
示例:
"dir22": {
"frame_00001_rgb": {
"x": 363.693829827852,
"y": 278.2191728859505
},
"frame_00002_rgb": {
"x": 330.9709780765119,
"y": 283.34142472069004
},
...
...
"dir23": {
"frame_00001_rgb": {
"x": 212.5232358000000,
"y": 156.3342191728855
},
"frame_00002_rgb": {
"x": 230.69497097807351,
"y": 253.75341424720690
},
我的模型是这样的:
img_width, img_height = 640, 480
train_data_dir = 'v_data/train'
epochs = 10
batch_size = 16
input_tensor = tf.keras.Input(shape=(img_width,img_height,3))
base_model = tf.keras.applications.ResNet50(weights='imagenet',include_top=False ,input_tensor=input_tensor)
top_model = tf.keras.Sequential()
top_model.add(tf.keras.Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(tf.keras.Dense(128, activation='relu'))
top_model.add(tf.keras.Dense(128, activation='relu'))
top_model.add(tf.keras.Dense(2))
model = tf.keras.Model(input= base_model.input, output= top_model(base_model.output))
for layer in model.layers[-15:]:
layer.trainable = False
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
现在我已经从我的目录加载图像:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator()
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size)
Found 15678 images belonging to 24 classes.
现在如何为每张图片分配标签并用它训练我的模型?
为此,您需要编写自定义数据生成器。
导入必要的库
import os
import pandas as pd
from skimage.io import imread # Used for image processing
from skimage.transform import resize # Used for image processing
import json
import numpy as np
定义我们自己的数据生成器
我跟着 this link 了解了如何做到这一点。并根据您的问题对其进行定制。
我们需要填写以下功能
class DataGenerator(tf.keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, directory, target_json, batch_size=32, target_size=(128, 128), shuffle=True):
...
def __len__(self):
'Denotes the number of batches per epoch'
...
def __getitem__(self, index):
'Generate one batch of data'
...
def on_epoch_end(self):
'Updates indexes after each epoch'
...
def __data_generation(self, list_paths, list_paths_wo_ext):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
...
看看我们定义了哪些变量
self.target_size = # Final size of the images
self.batch_size = # Batch size
self.target_json = # Path to the json file
self.directory = # Where the training data is
self.img_paths = # Contains image paths with extension
self.img_paths_wo_ext = # Contains the image paths without extension
self.targets = # The dataframe containing targets loaded from the json
self.shuffle = # Shuffle data at start of each epoch?
JSON 文件
您的 JSON 文件需要完全 这种格式。这可能也是你所拥有的。但请确保它是 100% 这种格式。
{'dir20': {'frame_00001_rgb': {'x': 363.693829827852, 'y': 278.2191728859505}, 'frame_00002_rgb': {'x': 330.9709780765119, 'y': 283.34142472069004}}, 'dir21': {'frame_00001_rgb': {'x': 363.693829827852, 'y': 278.2191728859505}, 'frame_00002_rgb': {'x': 330.9709780765119, 'y': 283.34142472069004}}, 'dir22': {'frame_00001_rgb': {'x': 363.693829827852, 'y': 278.2191728859505}, 'frame_00002_rgb': {'x': 330.9709780765119, 'y': 283.34142472069004}}, 'dir23': {'frame_00001_rgb': {'x': 363.693829827852, 'y': 278.2191728859505}, 'frame_00002_rgb': {'x': 330.9709780765119, 'y': 283.34142472069004}}, 'dir24': {'frame_00001_rgb': {'x': 212.5232358, 'y': 156.3342191728855}, 'frame_00002_rgb': {'x': 230.6949709780735, 'y': 253.7534142472069}}}
接下来我们需要将其转换为 pandas 数据帧。为此,我们定义了以下函数。由于文件的性质,它有点复杂。但这是正在发生的事情。
- 加载 json 并创建一个数据框,其中包含像
dir20.frame_00002_rgb.x
. 这样的列
- 通过将列拆分为 3 个级别(例如 dir20、frame_00002、x)来创建多索引
- 使用
stack
将dir*
和frame_*
都作为索引
- 重新格式化索引,使其包含每个图像的完整路径,并且每个记录都有两列(
x
和 y
)。
def json_to_df(json_path, directory):
with open(json_path,'r') as f:
s = json.load(f)
df = pd.io.json.json_normalize(s)
ind = pd.MultiIndex.from_tuples([col.split('.') for col in df.columns])
df.columns = ind
df = df.stack(level=[0,1])
df = df.set_index(df.index.droplevel(0))
df = df.set_index(pd.Index([os.path.sep.join([directory]+list(c)) for c in df.index.values]))
return df
其余代码
我不会详细介绍其他部分发生的事情,因为它非常简单。但我们实际上是通过读取图像、调整大小并从我们生成的数据帧中获取正确的 x
、y
值来获取单批数据。
完整代码
这是数据生成器的完整代码。
class DataGenerator(tf.keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, directory, target_json, batch_size=32, target_size=(128, 128), shuffle=True):
'Initialization'
self.target_size = target_size
self.batch_size = batch_size
self.target_json = target_json
self.directory = directory
self.img_paths = []
self.img_paths_wo_ext = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.lower().endswith(".jpg") or file.lower().endswith(".png"):
self.img_paths.append(os.path.join(root, file))
self.img_paths_wo_ext.append(os.path.splitext(os.path.join(root, file))[0])
def json_to_df(json_path, directory):
with open(json_path,'r') as f:
s = json.load(f)
df = pd.io.json.json_normalize(s)
ind = pd.MultiIndex.from_tuples([col.split('.') for col in df.columns])
df.columns = ind
df = df.stack(level=[0,1])
df = df.set_index(df.index.droplevel(0))
df = df.set_index(pd.Index([os.path.sep.join([directory]+list(c)) for c in df.index.values]))
return df
self.targets = json_to_df(self.target_json, self.directory)
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.img_paths) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
# Find list of IDs
list_paths = [self.img_paths[k] for k in indexes]
list_paths_wo_ext = [self.img_paths_wo_ext[k] for k in indexes]
# Generate data
X, y = self.__data_generation(list_paths, list_paths_wo_ext)
return X, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.img_paths))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_paths, list_paths_wo_ext):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
# Initialization
X = np.empty((self.batch_size, *self.target_size, 3))
y = self.targets.loc[list_paths_wo_ext].values
# Generate data
for i, ID in enumerate(list_paths):
# Store sample
X[i,] = resize(imread(ID),self.target_size)
return X, y
使用数据生成器
以下是数据生成器的使用方法。
train_datagen = iter(DataGenerator(train_data_dir, './train/data.json', batch_size=2))
x, y = next(train_datagen)
print(x)
print(y)
这给出了,
[[0.01377145 0.01377145 0.01377145]
[0.00242393 0.00242393 0.00242393]
[0. 0. 0. ]
...
[0.0037837 0.0037837 0.0037837 ]
[0.0037837 0.0037837 0.0037837 ]
[0.0037837 0.0037837 0.0037837 ]]
...
[[0.37398897 0.3372549 0.17647059]
[0.38967525 0.35294118 0.19215686]
[0.42889093 0.39215686 0.23137255]
...
[0.72156863 0.62889093 0.33085172]
[0.71372549 0.61176471 0.31764706]
[0.70588235 0.59359681 0.30340074]]]]
[[363.69382983 278.21917289]
[330.97097808 283.34142472]]
我想创建一个可以从图像中预测点的模型。 我有一个包含训练图像的数据集。这些图像被分成 24 个目录。 我准备了一个 json 文件,其中包含每个图像的 (x, y) 值。
示例:
"dir22": {
"frame_00001_rgb": {
"x": 363.693829827852,
"y": 278.2191728859505
},
"frame_00002_rgb": {
"x": 330.9709780765119,
"y": 283.34142472069004
},
...
...
"dir23": {
"frame_00001_rgb": {
"x": 212.5232358000000,
"y": 156.3342191728855
},
"frame_00002_rgb": {
"x": 230.69497097807351,
"y": 253.75341424720690
},
我的模型是这样的:
img_width, img_height = 640, 480
train_data_dir = 'v_data/train'
epochs = 10
batch_size = 16
input_tensor = tf.keras.Input(shape=(img_width,img_height,3))
base_model = tf.keras.applications.ResNet50(weights='imagenet',include_top=False ,input_tensor=input_tensor)
top_model = tf.keras.Sequential()
top_model.add(tf.keras.Flatten(input_shape=base_model.output_shape[1:]))
top_model.add(tf.keras.Dense(128, activation='relu'))
top_model.add(tf.keras.Dense(128, activation='relu'))
top_model.add(tf.keras.Dense(2))
model = tf.keras.Model(input= base_model.input, output= top_model(base_model.output))
for layer in model.layers[-15:]:
layer.trainable = False
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
现在我已经从我的目录加载图像:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator()
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_height, img_width),
batch_size=batch_size)
Found 15678 images belonging to 24 classes.
现在如何为每张图片分配标签并用它训练我的模型?
为此,您需要编写自定义数据生成器。
导入必要的库
import os
import pandas as pd
from skimage.io import imread # Used for image processing
from skimage.transform import resize # Used for image processing
import json
import numpy as np
定义我们自己的数据生成器
我跟着 this link 了解了如何做到这一点。并根据您的问题对其进行定制。
我们需要填写以下功能
class DataGenerator(tf.keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, directory, target_json, batch_size=32, target_size=(128, 128), shuffle=True):
...
def __len__(self):
'Denotes the number of batches per epoch'
...
def __getitem__(self, index):
'Generate one batch of data'
...
def on_epoch_end(self):
'Updates indexes after each epoch'
...
def __data_generation(self, list_paths, list_paths_wo_ext):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
...
看看我们定义了哪些变量
self.target_size = # Final size of the images
self.batch_size = # Batch size
self.target_json = # Path to the json file
self.directory = # Where the training data is
self.img_paths = # Contains image paths with extension
self.img_paths_wo_ext = # Contains the image paths without extension
self.targets = # The dataframe containing targets loaded from the json
self.shuffle = # Shuffle data at start of each epoch?
JSON 文件
您的 JSON 文件需要完全 这种格式。这可能也是你所拥有的。但请确保它是 100% 这种格式。
{'dir20': {'frame_00001_rgb': {'x': 363.693829827852, 'y': 278.2191728859505}, 'frame_00002_rgb': {'x': 330.9709780765119, 'y': 283.34142472069004}}, 'dir21': {'frame_00001_rgb': {'x': 363.693829827852, 'y': 278.2191728859505}, 'frame_00002_rgb': {'x': 330.9709780765119, 'y': 283.34142472069004}}, 'dir22': {'frame_00001_rgb': {'x': 363.693829827852, 'y': 278.2191728859505}, 'frame_00002_rgb': {'x': 330.9709780765119, 'y': 283.34142472069004}}, 'dir23': {'frame_00001_rgb': {'x': 363.693829827852, 'y': 278.2191728859505}, 'frame_00002_rgb': {'x': 330.9709780765119, 'y': 283.34142472069004}}, 'dir24': {'frame_00001_rgb': {'x': 212.5232358, 'y': 156.3342191728855}, 'frame_00002_rgb': {'x': 230.6949709780735, 'y': 253.7534142472069}}}
接下来我们需要将其转换为 pandas 数据帧。为此,我们定义了以下函数。由于文件的性质,它有点复杂。但这是正在发生的事情。
- 加载 json 并创建一个数据框,其中包含像
dir20.frame_00002_rgb.x
. 这样的列
- 通过将列拆分为 3 个级别(例如 dir20、frame_00002、x)来创建多索引
- 使用
stack
将dir*
和frame_*
都作为索引 - 重新格式化索引,使其包含每个图像的完整路径,并且每个记录都有两列(
x
和y
)。
def json_to_df(json_path, directory):
with open(json_path,'r') as f:
s = json.load(f)
df = pd.io.json.json_normalize(s)
ind = pd.MultiIndex.from_tuples([col.split('.') for col in df.columns])
df.columns = ind
df = df.stack(level=[0,1])
df = df.set_index(df.index.droplevel(0))
df = df.set_index(pd.Index([os.path.sep.join([directory]+list(c)) for c in df.index.values]))
return df
其余代码
我不会详细介绍其他部分发生的事情,因为它非常简单。但我们实际上是通过读取图像、调整大小并从我们生成的数据帧中获取正确的 x
、y
值来获取单批数据。
完整代码
这是数据生成器的完整代码。
class DataGenerator(tf.keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, directory, target_json, batch_size=32, target_size=(128, 128), shuffle=True):
'Initialization'
self.target_size = target_size
self.batch_size = batch_size
self.target_json = target_json
self.directory = directory
self.img_paths = []
self.img_paths_wo_ext = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.lower().endswith(".jpg") or file.lower().endswith(".png"):
self.img_paths.append(os.path.join(root, file))
self.img_paths_wo_ext.append(os.path.splitext(os.path.join(root, file))[0])
def json_to_df(json_path, directory):
with open(json_path,'r') as f:
s = json.load(f)
df = pd.io.json.json_normalize(s)
ind = pd.MultiIndex.from_tuples([col.split('.') for col in df.columns])
df.columns = ind
df = df.stack(level=[0,1])
df = df.set_index(df.index.droplevel(0))
df = df.set_index(pd.Index([os.path.sep.join([directory]+list(c)) for c in df.index.values]))
return df
self.targets = json_to_df(self.target_json, self.directory)
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.img_paths) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
# Find list of IDs
list_paths = [self.img_paths[k] for k in indexes]
list_paths_wo_ext = [self.img_paths_wo_ext[k] for k in indexes]
# Generate data
X, y = self.__data_generation(list_paths, list_paths_wo_ext)
return X, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.img_paths))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_paths, list_paths_wo_ext):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
# Initialization
X = np.empty((self.batch_size, *self.target_size, 3))
y = self.targets.loc[list_paths_wo_ext].values
# Generate data
for i, ID in enumerate(list_paths):
# Store sample
X[i,] = resize(imread(ID),self.target_size)
return X, y
使用数据生成器
以下是数据生成器的使用方法。
train_datagen = iter(DataGenerator(train_data_dir, './train/data.json', batch_size=2))
x, y = next(train_datagen)
print(x)
print(y)
这给出了,
[[0.01377145 0.01377145 0.01377145]
[0.00242393 0.00242393 0.00242393]
[0. 0. 0. ]
...
[0.0037837 0.0037837 0.0037837 ]
[0.0037837 0.0037837 0.0037837 ]
[0.0037837 0.0037837 0.0037837 ]]
...
[[0.37398897 0.3372549 0.17647059]
[0.38967525 0.35294118 0.19215686]
[0.42889093 0.39215686 0.23137255]
...
[0.72156863 0.62889093 0.33085172]
[0.71372549 0.61176471 0.31764706]
[0.70588235 0.59359681 0.30340074]]]]
[[363.69382983 278.21917289]
[330.97097808 283.34142472]]