When running my MoveNet on live feed from Webcam the points are too high for my shoulders when the subject is showing only face and top of shoulders. When subject moves back, the keypoints are good for the shoulders but the eyes are too low and the arms are not fully extended to the wrist (they stop at the elbow). I am using the Macbook Pro 13in with the M2 chip. Here is my code:
import numpy as np
from matplotlib import pyplot as plt
import cv2
import tensorflow as tf
EDGES = {
(0, 1): 'm',
(0, 2): 'c',
(1, 3): 'm',
(2, 4): 'c',
(0, 5): 'm',
(0, 6): 'c',
(5, 7): 'm',
(7, 9): 'm',
(6, 8): 'c',
(8, 10): 'c',
(5, 6): 'y',
(5, 11): 'm',
(6, 12): 'c',
(11, 12): 'y',
(11, 13): 'm',
(13, 15): 'm',
(12, 14): 'c',
(14, 16): 'c'
}#This line of code is used to define the edges, which are the connections between the keypoints
def draw_keypoints(frame, keypoints, confidence_threshold):
y, x, c = frame.shape
shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
for kp in shaped:
ky, kx, kp_conf = kp
if kp_conf > confidence_threshold:
cv2.circle(frame, (int(kx), int(ky)), 4, (0,255,0), -1)
def draw_connections(frame, keypoints, edges, confidence_threshold):
y, x, c = frame.shape
shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
for edge, color in edges.items():
p1, p2 = edge
y1, x1, c1 = shaped[p1]
y2, x2, c2 = shaped[p2]
if (c1 > confidence_threshold) & (c2 > confidence_threshold):
cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)
def preprocess_image(frame):
# Define the target size
target_size = 256
# Calculate the aspect ratio of the original frame
orig_height, orig_width, _ = frame.shape
aspect_ratio = orig_width / orig_height
# Resize the frame
if aspect_ratio >= 1: # If width >= height
new_width = target_size
new_height = round(target_size / aspect_ratio)
else: # If height > width
new_height = target_size
new_width = round(target_size * aspect_ratio)
frame = cv2.resize(frame, (new_width, new_height))
# Pad the frame
pad_top = (target_size - new_height) // 2
pad_bottom = target_size - new_height - pad_top
pad_left = (target_size - new_width) // 2
pad_right = target_size - new_width - pad_left
frame = cv2.copyMakeBorder(frame, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT)
return frame
interpreter = tf.lite.Interpreter(model_path='lite-model_movenet_singlepose_thunder_3.tflite') #This line of code is used to load the model
interpreter.allocate_tensors() #This line of code is used to allocate memory for the model
img = any
cap = cv2.VideoCapture(0)
while cap.isOpened():
ret, frame = cap.read()
# Reshape image
img = frame.copy()
img = preprocess_image(img)
# Convert to float32 and add an extra dimension for the batch size
input_image = np.expand_dims(img.astype(np.float32), axis=0)
# Setup input and output
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Make predictions
interpreter.set_tensor(input_details[0]['index'], input_image)
interpreter.invoke()
keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
# Rendering
draw_connections(frame, keypoints_with_scores, EDGES, 0.1)
draw_keypoints(frame, keypoints_with_scores, 0.1)
cv2.imshow('MoveNet Lightning', frame)
if cv2.waitKey(10) & 0xFF==ord('q'):
break
cap.release()
cv2.destroyAllWindows()
plt.imshow(img) #This line of code is used to show the image
print(img.shape)
right_hand = keypoints_with_scores[0][0][9] #This line of code is used to get the right hand
left_hand = keypoints_with_scores[0][0][10] #This line of code is used to get the left hand
px_cordinates = np.array(left_hand[:2]*[720,1280]).astype(int) #This line of code is used to get the pixel cordinates
I have tried to change the drawing functions and the preprocessing function but but it sometimes moves the points to the top left of the screen and does not cover the body at all.
I suggest you draw on the preprocessed image, not the original frame, because it is resized in the preprocessing function. The keypoints refer to the resized image.