How to Find the Face Orientation using Mediapipe?

783 Views Asked by At

I'm encountering an issue with determining the orientation of a person's face. While I can successfully find the face orientation using MediaPipe Face Mesh when the person is within 1 meter from the camera, I need to extend this capability to work at distances greater than 3 meters. I attempted to use pose estimation from MediaPipe to address this, but it does not perform well when I focus on face landmarks (e.g., nose). Interestingly, it works effectively for estimating body orientation based on the hip point.

Is there a solution or workaround to enable accurate face orientation estimation at longer distances using MediaPipe or any other method?

Here is the code that I am using,

from cvzone.PoseModule import PoseDetector
import cv2
from ultralytics import YOLO
import torch
import math
import numpy as np
import pickle
import imutils

cap = cv2.VideoCapture(1)

detector = PoseDetector(staticMode=False,
                        modelComplexity=1,
                        smoothLandmarks=True,
                        enableSegmentation=False,
                        smoothSegmentation=True,
                        detectionCon=0.5,
                        trackCon=0.5)

while True:
    # Capture each frame from the webcam
    success, img = cap.read()
    org_img = img.copy()
    img_h,img_w,_ = org_img.shape
    # Find the human pose in the frame
    img = detector.findPose(img,draw=False)

    # Find the landmarks, bounding box, and center of the body in the frame
    # Set draw=True to draw the landmarks and bounding box on the image
    lmList, bboxInfo = detector.findPosition(img, draw=False, bboxWithHands=False)

    # Check if any body landmarks are detected
    
    if lmList:
        body_2d = []
        body_3d = []
        for (idx,landmark) in enumerate(lmList):
            x,y,z = landmark
            if idx==0 or idx==1 or idx==2 or idx==3 or idx==4 or idx==5 or idx==6 or idx==7 or idx==8 or idx==9 or idx==10:
            # if idx==23 or idx==24:
                if idx == 0:
                    nose_2d = (x,y)
                    nose_3d = (x,y,z)
                    
                body_2d.append([x,y])
                body_3d.append([x,y,z/3000])
                
                cv2.circle(img, (x,y), 5, (0, 0, 255), cv2.FILLED)
        # point1 = body_3d[0]
        # point2 = body_3d[1]
        # point2 = body_3d[1]

        body_2d = np.array(body_2d,dtype=np.float64)
        body_3d = np.array(body_3d,dtype=np.float64)

        focal_length = 1 * img_w
        cameraMatrix = camera_matrix = np.array([
                                                [focal_length, 0.0, img_h / 2],  # Principal point at the image center
                                                [0.0, focal_length, img_w / 2],
                                                [0.0, 0.0, 1.0]
                                            ], dtype=float)

        dist_matrix = np.zeros((4,1),dtype=np.float64)
                
        success,rot_vec,trans_vec = cv2.solvePnP(body_3d,body_2d,cameraMatrix,dist_matrix)

        r_mat,jac = cv2.Rodrigues(rot_vec)

        angles,mtxR,mtxQ,Qx,Qy,Qz = cv2.RQDecomp3x3(r_mat)

        x = angles[0] * 360
        y = angles[1] * 360
        z = angles[2] * 360

        if y < -10:
            text = "Straight"
        elif y > 10:
            text = "Turn Right"
        # else:
        #     text = "Straight"

        hip_3d_projection, jacobian = cv2.projectPoints(hip_3d,rot_vec,trans_vec,cameraMatrix,dist_matrix)
        
        p1 = (int(nose_2d[0]),int(nose_2d[1]))
        p2 = (int(nose_2d[0]+(y*10)),int(nose_2d[1] - (x*10))
        cv2.line(img,p1,p2,(255,0,0),3)

        cv2.putText(img,text,(20,50),1,2,(0,255,0),2)
        cv2.putText(img,f"Y: {round(y,2)}",(img_w-100,50),1,2,(0,255,0),2)
    # Display the frame in a window
    cv2.imshow("Image", imutils.resize(img,height=480))

    # Wait for 1 millisecond between each frame
    key = cv2.waitKey(1)

    if key == 115:
        break

need help to find face orientation.

1

There are 1 best solutions below

2
On

You can take into consideration an alternative method that incorporates both facial landmarks and body position prediction in order to increase the accuracy of face orientation estimation using MediaPipe at extended distances. Here is a detailed how-to.

  1. Import necessary libraries and initialize the MediaPipe Face Mesh and Pose modules.
import cv2
import mediapipe as mp

# Initialize MediaPipe Face and Pose modules
mp_face = mp.solutions.face_detection
mp_pose = mp.solutions.pose

# Initialize the MediaPipe drawing utilities
mp_drawing = mp.solutions.drawing_utils

  1. Initialize the webcam capture and set up the detection models.
cap = cv2.VideoCapture(0)  # Use the desired camera index (e.g., 0 for the default camera)

# Initialize Face Detection and Pose Estimation models
face_detection = mp_face.FaceDetection(min_detection_confidence=0.5)
pose_estimation = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)

  1. Create a loop to continuously capture frames and perform face and pose estimation.
while True:
    success, frame = cap.read()
    if not success:
        continue

    # Convert the frame to RGB format for MediaPipe
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Perform face detection
    face_results = face_detection.process(frame_rgb)

    if face_results.detections:
        for detection in face_results.detections:
            bboxC = detection.location_data.relative_bounding_box
            ih, iw, _ = frame.shape
            x, y, w, h = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

            # Perform pose estimation using the face region
            face_center = (x + w // 2, y + h // 2)
            frame_pose = frame[y:y + h, x:x + w]
            frame_pose_rgb = cv2.cvtColor(frame_pose, cv2.COLOR_BGR2RGB)

            # Perform pose estimation on the face region
            pose_results = pose_estimation.process(frame_pose_rgb)

            if pose_results.pose_landmarks:
                # You can access pose landmarks and orientation information here
                landmarks = pose_results.pose_landmarks
                # Extract relevant pose landmarks and calculate face orientation

                # Draw landmarks and orientation lines
                mp_drawing.draw_landmarks(frame, pose_results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

    cv2.imshow("Face Orientation Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()