Tensorflow 姿势估计奇怪的行为
Tensorflow pose estimation strange behaviour
我正在尝试从图片中检测 body parts/landmarks,但我遇到了问题。
出于某种原因,即使图片中没有膝盖,它也会打印膝盖点。
知道如何以及为什么要解决这个问题吗?或者 better/faster 检测 body 点的方法是什么。谢谢
这是我的代码:
import tensorflow as tf
import numpy as np
import cv2
image_path = "test3.jpg"
image = tf.io.read_file(image_path)
image = tf.image.decode_jpeg(image)
input_image = tf.expand_dims(image, axis=0)
input_image = tf.image.resize_with_pad(input_image, 192, 192)
model_path = "movenet_lightning_fp16.tflite"
interpreter = tf.lite.Interpreter(model_path)
interpreter.allocate_tensors()
input_image = tf.cast(input_image, dtype=tf.uint8)
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
interpreter.invoke()
keypoints = interpreter.get_tensor(output_details[0]['index'])
width = 640
height = 640
KEYPOINT_EDGES = [(0, 1), (0, 2), (1, 3), (2, 4), (0, 5), (0, 6), (5, 7),
(7, 9), (6, 8), (8, 10), (5, 6), (5, 11), (6, 12), (11, 12), (11, 13),
(13, 15), (12, 14), (14, 16)]
input_image = tf.expand_dims(image, axis=0)
input_image = tf.image.resize_with_pad(input_image, width, height)
input_image = tf.cast(input_image, dtype=tf.uint8)
image_np = np.squeeze(input_image.numpy(), axis=0)
image_np = cv2.resize(image_np, (width, height))
image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
for keypoint in keypoints[0][0]:
x = int(keypoint[1] * width)
y = int(keypoint[0] * height)
cv2.circle(image_np, (x, y), 4, (0, 0, 255), -1)
for edge in KEYPOINT_EDGES:
x1 = int(keypoints[0][0][edge[0]][1] * width)
y1 = int(keypoints[0][0][edge[0]][0] * height)
x2 = int(keypoints[0][0][edge[1]][1] * width)
y2 = int(keypoints[0][0][edge[1]][0] * height)
cv2.line(image_np, (x1, y1), (x2, y2), (0, 255, 0), 2)
print(keypoints)
cv2.imshow("pose estimation", image_np)
cv2.waitKey()
这些是正在打印的 17 个点....
[[[[0.14580254 0.44932607 0.49171054]
[0.12085933 0.48325056 0.76345515]
[0.12439865 0.4332864 0.6319262 ]
[0.14748134 0.54644144 0.69355035]
[0.1498755 0.4215817 0.47992003]
[0.36506626 0.63139945 0.85730654]
[0.34724534 0.3317352 0.7910126 ]
[0.61043286 0.6646681 0.76448154]
[0.5989852 0.29230848 0.8800807 ]
[0.8311419 0.7306837 0.7297675 ]
[0.8425422 0.26081967 0.63438255]
[0.85355556 0.5752684 0.79087543]
[0.8471971 0.37801507 0.79199016]
[0.9836348 0.5910964 0.00867963]
[1.0096381 0.33657807 0.01041293]
[0.86401206 0.7281677 0.03190452]
[0.8798219 0.265369 0.01451936]]]]
姿势模型总是输出它们应该检测到的所有点。如果图片中没有膝盖,模型会估计图片中膝盖的大概点并将该点作为输出,但该点的置信度分数会很低。因此,您可以按置信度分数过滤点。
您可以添加一个变量作为置信度的阈值和过滤点。我在下面的代码中称它为 conf_thrs
:
conf_thrs = 0.5
for keypoint in keypoints[0][0]:
if keypoint[2] > conf_thrs:
# if confidence score is more than 0.5 do the following.
x = int(keypoint[1] * width)
y = int(keypoint[0] * height)
cv2.circle(image_np, (x, y), 4, (0, 0, 255), -1)
并通过 KEYPOINT_EDGES
.
循环执行相同的操作
我正在尝试从图片中检测 body parts/landmarks,但我遇到了问题。 出于某种原因,即使图片中没有膝盖,它也会打印膝盖点。
知道如何以及为什么要解决这个问题吗?或者 better/faster 检测 body 点的方法是什么。谢谢
这是我的代码:
import tensorflow as tf
import numpy as np
import cv2
image_path = "test3.jpg"
image = tf.io.read_file(image_path)
image = tf.image.decode_jpeg(image)
input_image = tf.expand_dims(image, axis=0)
input_image = tf.image.resize_with_pad(input_image, 192, 192)
model_path = "movenet_lightning_fp16.tflite"
interpreter = tf.lite.Interpreter(model_path)
interpreter.allocate_tensors()
input_image = tf.cast(input_image, dtype=tf.uint8)
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.set_tensor(input_details[0]['index'], input_image.numpy())
interpreter.invoke()
keypoints = interpreter.get_tensor(output_details[0]['index'])
width = 640
height = 640
KEYPOINT_EDGES = [(0, 1), (0, 2), (1, 3), (2, 4), (0, 5), (0, 6), (5, 7),
(7, 9), (6, 8), (8, 10), (5, 6), (5, 11), (6, 12), (11, 12), (11, 13),
(13, 15), (12, 14), (14, 16)]
input_image = tf.expand_dims(image, axis=0)
input_image = tf.image.resize_with_pad(input_image, width, height)
input_image = tf.cast(input_image, dtype=tf.uint8)
image_np = np.squeeze(input_image.numpy(), axis=0)
image_np = cv2.resize(image_np, (width, height))
image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
for keypoint in keypoints[0][0]:
x = int(keypoint[1] * width)
y = int(keypoint[0] * height)
cv2.circle(image_np, (x, y), 4, (0, 0, 255), -1)
for edge in KEYPOINT_EDGES:
x1 = int(keypoints[0][0][edge[0]][1] * width)
y1 = int(keypoints[0][0][edge[0]][0] * height)
x2 = int(keypoints[0][0][edge[1]][1] * width)
y2 = int(keypoints[0][0][edge[1]][0] * height)
cv2.line(image_np, (x1, y1), (x2, y2), (0, 255, 0), 2)
print(keypoints)
cv2.imshow("pose estimation", image_np)
cv2.waitKey()
这些是正在打印的 17 个点....
[[[[0.14580254 0.44932607 0.49171054]
[0.12085933 0.48325056 0.76345515]
[0.12439865 0.4332864 0.6319262 ]
[0.14748134 0.54644144 0.69355035]
[0.1498755 0.4215817 0.47992003]
[0.36506626 0.63139945 0.85730654]
[0.34724534 0.3317352 0.7910126 ]
[0.61043286 0.6646681 0.76448154]
[0.5989852 0.29230848 0.8800807 ]
[0.8311419 0.7306837 0.7297675 ]
[0.8425422 0.26081967 0.63438255]
[0.85355556 0.5752684 0.79087543]
[0.8471971 0.37801507 0.79199016]
[0.9836348 0.5910964 0.00867963]
[1.0096381 0.33657807 0.01041293]
[0.86401206 0.7281677 0.03190452]
[0.8798219 0.265369 0.01451936]]]]
姿势模型总是输出它们应该检测到的所有点。如果图片中没有膝盖,模型会估计图片中膝盖的大概点并将该点作为输出,但该点的置信度分数会很低。因此,您可以按置信度分数过滤点。
您可以添加一个变量作为置信度的阈值和过滤点。我在下面的代码中称它为 conf_thrs
:
conf_thrs = 0.5
for keypoint in keypoints[0][0]:
if keypoint[2] > conf_thrs:
# if confidence score is more than 0.5 do the following.
x = int(keypoint[1] * width)
y = int(keypoint[0] * height)
cv2.circle(image_np, (x, y), 4, (0, 0, 255), -1)
并通过 KEYPOINT_EDGES
.