如何在 Darknet 架构上进行批量检测?
How to do Batch Detection on Darknet architecture?
我正在尝试使用 Darknet\YoloV4
进行批量检测。它适用于一批,然后第二批因 CUDA 错误而失败。我是否遗漏了以下代码段中的其他内容? RTX GPU 卡 Batch 的正确参数是什么,如何确定正确的 Batch 大小?
我的系统配置如下-
System: Host: ubox Kernel: 5.4.0-42-generic x86_64 bits: 64
Desktop: Gnome 3.28.4 Distro: Ubuntu 18.04.4 LTS
Machine: Device: desktop System: Alienware product: Alienware Aurora R9 v: 1.0.7 serial: N/A
Mobo: Alienware model: 0T76PD v: A01 serial: N/A
UEFI: Alienware v: 1.0.7 date: 12/23/2019
CPU: 8 core Intel Core i7-9700K (-MCP-) cache: 12288 KB
clock speeds: max: 4900 MHz 1: 800 MHz 2: 800 MHz 3: 800 MHz
4: 800 MHz 5: 801 MHz 6: 803 MHz 7: 808 MHz 8: 810 MHz
Graphics: Card-1: Intel Device 3e98
Card-2: NVIDIA Device 1e84
Display Server: x11 (X.Org 1.20.8 )
drivers: modesetting,nvidia (unloaded: fbdev,vesa,nouveau)
Resolution: 2560x1440@59.95hz, 1920x1080@60.00hz
OpenGL: renderer: GeForce RTX 2070 SUPER/PCIe/SSE2
version: 4.6.0 NVIDIA 450.57
当我执行批量大小 3 的 performBatchDetectV2()
时,我得到 CUDA Error: out of memory
。
如何在Yolov4架构上正确进行Batching?在我的用例中,我正在从相机获取帧,我想将 10 帧合并为一个并在下面调用。如果我只调用一次,下面的函数将完美运行,这意味着它会在 second batch of frames
.
上抛出 Cuda 错误
def performBatchDetectV2(image_list, thresh= 0.25, configPath = "./cfg/yolov4.cfg", weightPath = "yolov4.weights", metaPath= "./cfg/coco.data", hier_thresh=.5, nms=.45, batch_size=3):
net = load_net_custom(configPath.encode('utf-8'), weightPath.encode('utf-8'), 0, batch_size)
meta = load_meta(metaPath.encode('utf-8'))
pred_height, pred_width, c = image_list[0].shape
net_width, net_height = (network_width(net), network_height(net))
img_list = []
for custom_image_bgr in image_list:
custom_image = cv2.cvtColor(custom_image_bgr, cv2.COLOR_BGR2RGB)
custom_image = cv2.resize(
custom_image, (net_width, net_height), interpolation=cv2.INTER_NEAREST)
custom_image = custom_image.transpose(2, 0, 1)
img_list.append(custom_image)
arr = np.concatenate(img_list, axis=0)
arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0
data = arr.ctypes.data_as(POINTER(c_float))
im = IMAGE(net_width, net_height, c, data)
batch_dets = network_predict_batch(net, im, batch_size, pred_width,
pred_height, thresh, hier_thresh, None, 0, 0)
batch_boxes = []
batch_scores = []
batch_classes = []
for b in range(batch_size):
num = batch_dets[b].num
dets = batch_dets[b].dets
if nms:
do_nms_obj(dets, num, meta.classes, nms)
boxes = []
scores = []
classes = []
for i in range(num):
det = dets[i]
score = -1
label = None
for c in range(det.classes):
p = det.prob[c]
if p > score:
score = p
label = c
if score > thresh:
box = det.bbox
left, top, right, bottom = map(int,(box.x - box.w / 2, box.y - box.h / 2,
box.x + box.w / 2, box.y + box.h / 2))
boxes.append((top, left, bottom, right))
scores.append(score)
classes.append(label)
# boxColor = (int(255 * (1 - (score ** 2))), int(255 * (score ** 2)), 0)
# cv2.rectangle(image_list[b], (left, top),
# (right, bottom), boxColor, 2)
# cv2.imwrite(os.path.basename(img_samples[b]),image_list[b])
batch_boxes.append(boxes)
batch_scores.append(scores)
batch_classes.append(classes)
free_batch_detections(batch_dets, batch_size)
return batch_boxes, batch_scores, batch_classes
问题是,每次调用 performBatchDetectV2 方法时加载网络。因此,您必须以恒定的批量大小一次性加载网络,并使用加载的网络进行预测。
将您的 netwrok 变量设置为全局变量。因为你是
这是你的功能
def performBatchDetectV2(image_list, thresh= 0.25, configPath = "./cfg/yolov4.cfg", weightPath = "yolov4.weights", metaPath= "./cfg/coco.data", hier_thresh=.5, nms=.45, batch_size=3):
net = load_net_custom(configPath.encode('utf-8'), weightPath.encode('utf-8'), 0, batch_size)
meta = load_meta(metaPath.encode('utf-8'))
<your code>
改成这个
configPath = "./cfg/yolov4.cfg"
weightPath = "yolov4.weights"
metaPath= "./cfg/coco.data",
net = load_net_custom(configPath.encode('utf-8'), weightPath.encode('utf-8'), 0, batch_size)
meta = load_meta(metaPath.encode('utf-8'))
def performBatchDetectV2(image_list, thresh= 0.25, hier_thresh=.5, nms=.45, batch_size=3):
global net, meta
<your code>
我正在尝试使用 Darknet\YoloV4
进行批量检测。它适用于一批,然后第二批因 CUDA 错误而失败。我是否遗漏了以下代码段中的其他内容? RTX GPU 卡 Batch 的正确参数是什么,如何确定正确的 Batch 大小?
我的系统配置如下-
System: Host: ubox Kernel: 5.4.0-42-generic x86_64 bits: 64
Desktop: Gnome 3.28.4 Distro: Ubuntu 18.04.4 LTS
Machine: Device: desktop System: Alienware product: Alienware Aurora R9 v: 1.0.7 serial: N/A
Mobo: Alienware model: 0T76PD v: A01 serial: N/A
UEFI: Alienware v: 1.0.7 date: 12/23/2019
CPU: 8 core Intel Core i7-9700K (-MCP-) cache: 12288 KB
clock speeds: max: 4900 MHz 1: 800 MHz 2: 800 MHz 3: 800 MHz
4: 800 MHz 5: 801 MHz 6: 803 MHz 7: 808 MHz 8: 810 MHz
Graphics: Card-1: Intel Device 3e98
Card-2: NVIDIA Device 1e84
Display Server: x11 (X.Org 1.20.8 )
drivers: modesetting,nvidia (unloaded: fbdev,vesa,nouveau)
Resolution: 2560x1440@59.95hz, 1920x1080@60.00hz
OpenGL: renderer: GeForce RTX 2070 SUPER/PCIe/SSE2
version: 4.6.0 NVIDIA 450.57
当我执行批量大小 3 的 performBatchDetectV2()
时,我得到 CUDA Error: out of memory
。
如何在Yolov4架构上正确进行Batching?在我的用例中,我正在从相机获取帧,我想将 10 帧合并为一个并在下面调用。如果我只调用一次,下面的函数将完美运行,这意味着它会在 second batch of frames
.
def performBatchDetectV2(image_list, thresh= 0.25, configPath = "./cfg/yolov4.cfg", weightPath = "yolov4.weights", metaPath= "./cfg/coco.data", hier_thresh=.5, nms=.45, batch_size=3):
net = load_net_custom(configPath.encode('utf-8'), weightPath.encode('utf-8'), 0, batch_size)
meta = load_meta(metaPath.encode('utf-8'))
pred_height, pred_width, c = image_list[0].shape
net_width, net_height = (network_width(net), network_height(net))
img_list = []
for custom_image_bgr in image_list:
custom_image = cv2.cvtColor(custom_image_bgr, cv2.COLOR_BGR2RGB)
custom_image = cv2.resize(
custom_image, (net_width, net_height), interpolation=cv2.INTER_NEAREST)
custom_image = custom_image.transpose(2, 0, 1)
img_list.append(custom_image)
arr = np.concatenate(img_list, axis=0)
arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0
data = arr.ctypes.data_as(POINTER(c_float))
im = IMAGE(net_width, net_height, c, data)
batch_dets = network_predict_batch(net, im, batch_size, pred_width,
pred_height, thresh, hier_thresh, None, 0, 0)
batch_boxes = []
batch_scores = []
batch_classes = []
for b in range(batch_size):
num = batch_dets[b].num
dets = batch_dets[b].dets
if nms:
do_nms_obj(dets, num, meta.classes, nms)
boxes = []
scores = []
classes = []
for i in range(num):
det = dets[i]
score = -1
label = None
for c in range(det.classes):
p = det.prob[c]
if p > score:
score = p
label = c
if score > thresh:
box = det.bbox
left, top, right, bottom = map(int,(box.x - box.w / 2, box.y - box.h / 2,
box.x + box.w / 2, box.y + box.h / 2))
boxes.append((top, left, bottom, right))
scores.append(score)
classes.append(label)
# boxColor = (int(255 * (1 - (score ** 2))), int(255 * (score ** 2)), 0)
# cv2.rectangle(image_list[b], (left, top),
# (right, bottom), boxColor, 2)
# cv2.imwrite(os.path.basename(img_samples[b]),image_list[b])
batch_boxes.append(boxes)
batch_scores.append(scores)
batch_classes.append(classes)
free_batch_detections(batch_dets, batch_size)
return batch_boxes, batch_scores, batch_classes
问题是,每次调用 performBatchDetectV2 方法时加载网络。因此,您必须以恒定的批量大小一次性加载网络,并使用加载的网络进行预测。 将您的 netwrok 变量设置为全局变量。因为你是
这是你的功能
def performBatchDetectV2(image_list, thresh= 0.25, configPath = "./cfg/yolov4.cfg", weightPath = "yolov4.weights", metaPath= "./cfg/coco.data", hier_thresh=.5, nms=.45, batch_size=3):
net = load_net_custom(configPath.encode('utf-8'), weightPath.encode('utf-8'), 0, batch_size)
meta = load_meta(metaPath.encode('utf-8'))
<your code>
改成这个
configPath = "./cfg/yolov4.cfg"
weightPath = "yolov4.weights"
metaPath= "./cfg/coco.data",
net = load_net_custom(configPath.encode('utf-8'), weightPath.encode('utf-8'), 0, batch_size)
meta = load_meta(metaPath.encode('utf-8'))
def performBatchDetectV2(image_list, thresh= 0.25, hier_thresh=.5, nms=.45, batch_size=3):
global net, meta
<your code>