在 Tensorflow 中加载数据时处理文本和解码视频的最佳方式
Best way to process texts and decode videos while loading the data in Tensorflow
我有一个如下所示的数据框:
我正在构建一个将文本和视频作为输入的模型。所以,我的目标是从数据帧加载 Text
和 Media_location
(其中包含视频文件路径),这样当我输入 df['Text']
和视频(从路径加载)时它是可迭代的df['Media_location']
) 在一起。
我在 tensorflow 中找不到任何可以做这种事情的实现,所以请放弃你可能有的任何建议。
您可以使用 tensorflow.keras.utils.Sequence
.
import math
from tensorflow.keras.utils import Sequence
class Dataloader(Sequence):
def __init__(self, df, y_array, batch_size):
self.df, self.y_array = df, y_array
self.batch_size = batch_size
def __len__(self):
return math.ceil(len(self.df) / self.batch_size)
def __getitem__(self, idx):
slices = slice(idx*self.batch_size, (idx+1)*self.batch_size, None)
return [(tuple(a), b) for a, b in zip(self.df[['Text', 'Media_location']].iloc[slices].values, self.y_array[slices])]
示例:
import numpy as np
for batch in Dataloader(df, np.random.randint(0, 2, size=10), 3):
for (text, video), label in batch:
print((text, video), label)
print()
输出:
('E DDC', 'Videos\17.mp4') 0
('CBAD ', 'Videos\80.mp4') 1
('EBBBBB E', 'Videos\07.mp4') 1
('ABB B ', 'Videos\68.mp4') 0
('BCDADDA A', 'Videos\73.mp4') 1
('CDECECADE', 'Videos\04.mp4') 1
('EADBDBC', 'Videos\85.mp4') 1
('ABCCBC AA', 'Videos\50.mp4') 1
('DEBCA', 'Videos\32.mp4') 1
('DD CCCB', 'Videos\24.mp4') 0
您可以尝试使用 tensorflow-io
,这将在图形模式下 运行。只需 运行 pip install tensorflow-io
然后尝试:
import tensorflow as tf
import tensorflow_io as tfio
import pandas as pd
df = pd.DataFrame(data={'Text': ['some text', 'some more text'],
'Media_location': ['/content/sample-mp4-file.mp4', '/content/sample-mp4-file.mp4']})
dataset = tf.data.Dataset.from_tensor_slices((df['Text'], df['Media_location']))
def decode_videos(x, y):
video = tf.io.read_file(y)
video = tfio.experimental.ffmpeg.decode_video(video)
return x, video
dataset = dataset.map(decode_videos)
for x, y in dataset:
print(x, y.shape)
tf.Tensor(b'some text', shape=(), dtype=string) (901, 270, 480, 3)
tf.Tensor(b'some more text', shape=(), dtype=string) (901, 270, 480, 3)
在此示例中,每个视频包含 901 帧。
如果您是 Windows
用户,您可以像这样尝试使用 cv2
:
import tensorflow as tf
import pandas as pd
from cv2 import cv2
import numpy as np
df = pd.DataFrame(data={'Text': ['some text', 'some more text'],
'Media_location': ['/content/sample-mp4-file.mp4', '/content/sample-mp4-file.mp4']})
dataset = tf.data.Dataset.from_tensor_slices((df['Text'], df['Media_location']))
def get_video_asarray(path):
frames = []
cap = cv2.VideoCapture(path.numpy().decode("utf-8"))
read = True
while read:
read, img = cap.read()
if read:
frames.append(img)
return np.stack(frames, axis=0)
def decode_videos(x, y):
y = tf.py_function(get_video_asarray, [y], Tout=[tf.float32])
return x, tf.squeeze(y, axis=0)
dataset = dataset.map(decode_videos)
for x, y in dataset:
print(x, y.shape)
tf.Tensor(b'some text', shape=(), dtype=string) (901, 270, 480, 3)
tf.Tensor(b'some more text', shape=(), dtype=string) (901, 270, 480, 3)
我有一个如下所示的数据框:
我正在构建一个将文本和视频作为输入的模型。所以,我的目标是从数据帧加载 Text
和 Media_location
(其中包含视频文件路径),这样当我输入 df['Text']
和视频(从路径加载)时它是可迭代的df['Media_location']
) 在一起。
我在 tensorflow 中找不到任何可以做这种事情的实现,所以请放弃你可能有的任何建议。
您可以使用 tensorflow.keras.utils.Sequence
.
import math
from tensorflow.keras.utils import Sequence
class Dataloader(Sequence):
def __init__(self, df, y_array, batch_size):
self.df, self.y_array = df, y_array
self.batch_size = batch_size
def __len__(self):
return math.ceil(len(self.df) / self.batch_size)
def __getitem__(self, idx):
slices = slice(idx*self.batch_size, (idx+1)*self.batch_size, None)
return [(tuple(a), b) for a, b in zip(self.df[['Text', 'Media_location']].iloc[slices].values, self.y_array[slices])]
示例:
import numpy as np
for batch in Dataloader(df, np.random.randint(0, 2, size=10), 3):
for (text, video), label in batch:
print((text, video), label)
print()
输出:
('E DDC', 'Videos\17.mp4') 0
('CBAD ', 'Videos\80.mp4') 1
('EBBBBB E', 'Videos\07.mp4') 1
('ABB B ', 'Videos\68.mp4') 0
('BCDADDA A', 'Videos\73.mp4') 1
('CDECECADE', 'Videos\04.mp4') 1
('EADBDBC', 'Videos\85.mp4') 1
('ABCCBC AA', 'Videos\50.mp4') 1
('DEBCA', 'Videos\32.mp4') 1
('DD CCCB', 'Videos\24.mp4') 0
您可以尝试使用 tensorflow-io
,这将在图形模式下 运行。只需 运行 pip install tensorflow-io
然后尝试:
import tensorflow as tf
import tensorflow_io as tfio
import pandas as pd
df = pd.DataFrame(data={'Text': ['some text', 'some more text'],
'Media_location': ['/content/sample-mp4-file.mp4', '/content/sample-mp4-file.mp4']})
dataset = tf.data.Dataset.from_tensor_slices((df['Text'], df['Media_location']))
def decode_videos(x, y):
video = tf.io.read_file(y)
video = tfio.experimental.ffmpeg.decode_video(video)
return x, video
dataset = dataset.map(decode_videos)
for x, y in dataset:
print(x, y.shape)
tf.Tensor(b'some text', shape=(), dtype=string) (901, 270, 480, 3)
tf.Tensor(b'some more text', shape=(), dtype=string) (901, 270, 480, 3)
在此示例中,每个视频包含 901 帧。
如果您是 Windows
用户,您可以像这样尝试使用 cv2
:
import tensorflow as tf
import pandas as pd
from cv2 import cv2
import numpy as np
df = pd.DataFrame(data={'Text': ['some text', 'some more text'],
'Media_location': ['/content/sample-mp4-file.mp4', '/content/sample-mp4-file.mp4']})
dataset = tf.data.Dataset.from_tensor_slices((df['Text'], df['Media_location']))
def get_video_asarray(path):
frames = []
cap = cv2.VideoCapture(path.numpy().decode("utf-8"))
read = True
while read:
read, img = cap.read()
if read:
frames.append(img)
return np.stack(frames, axis=0)
def decode_videos(x, y):
y = tf.py_function(get_video_asarray, [y], Tout=[tf.float32])
return x, tf.squeeze(y, axis=0)
dataset = dataset.map(decode_videos)
for x, y in dataset:
print(x, y.shape)
tf.Tensor(b'some text', shape=(), dtype=string) (901, 270, 480, 3)
tf.Tensor(b'some more text', shape=(), dtype=string) (901, 270, 480, 3)