The problem of applying the homography matrix

150 Views Asked by At

I have a problem, I need to implement the display of the path traveled by a person on a video from a bird's-eye view using a homography matrix, take the lower middle point of the bounding box as a reference point. A reference to a dataset that http://www.santhoshsunderrajan.com/datasets.html#hfh_tracking . The description of the dataset says: All videos are stored in the video folder, and the corresponding ground plane homography files are stored in the homography folder. Also, the .mat (MATLAB) file information containing tomography is also provided. camera_placement.pdf shows the network location. When collecting a dataset, all videos are fully synchronized using global timestamps.

Here is a code snippet for projecting the location of the image plane onto the earth plane:

gp Temp = Homography * image position; // image position = [ x y 1 ]'

gp position = [gp Temp(1)/temp(3); gp Temp(2)/temp(3)]';

However, I don't understand how to write this in python and OpenCV, if I use the cv2.warpPerspective function and pass it the homography matrix and image dimensions, then the wrong view is obtained. In addition, there was a deque problem, namely how to make the heap be implemented for each person found separately.

I tried a lot to do everything myself, but I couldn't get the proper result. Here is my code:

` import cv2 import numpy as np import time import imutils from collections import deque from google.colab.patches import cv2_imshow

net = cv2.dnn.readNetFromDarknet("/content/YOLOv3-TensorFlow-2.x/yolov3.cfg",
                             "/content/YOLOv3-TensorFlow-2.x/model_data/yolov3.weights")

net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

labels = open("/content/YOLOv3-TensorFlow-2.x/coco.names").read().strip().split("\n")

cap = cv2.VideoCapture('/MyDrive/HallWayTracking/HallWayTracking/videos/001.avi')

layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

N = 200
pts = deque(maxlen=N)
counter = 0
(dX, dY) = (0, 0)
direction = ""

homography_path = "/MyDrive/HallWayTracking/HallWayTracking/homography/001.txt"
H = np.loadtxt(homography_path, delimiter=",")
H = H.astype(np.float32)
homography_matrix = H.reshape((3, 3))

colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")
count = 0

while count < 20:
    count += 1
    ret, frame = cap.read()
    if ret:
        frame = imutils.resize(frame, width=600)
        blurred = cv2.GaussianBlur(frame, (11, 11), 0)
        hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV)
        height, width = frame.shape[:2]
        blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
        net.setInput(blob)
        start = time.perf_counter()
        outs = net.forward(output_layers)
        time_took = time.perf_counter() - start
        print("Time took:", time_took)
        confidences, boxes, class_ids = [], [], []

        for output in outs:
            for detection in output:
                scores = detection[5:]
                class_id = np.argmax(scores)
                if class_id != 0:
                    continue
                confidence = scores[class_id]
                if confidence > 0.3:
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)

                    w = int(detection[2] * width)
                    h = int(detection[3] * height)
                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)
                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

        for i in range(len(boxes)):
            if i in indexes:
                x, y, w, h = boxes[i]
                pts.appendleft([x + w // 2, y + h])

        font_scale = 1
        thickness = 1
        if len(indexes) > 0:
            for i in indexes.flatten():
                x, y = boxes[i][0], boxes[i][1]
                w, h = boxes[i][2], boxes[i][3]
                color = [int(c) for c in colors[class_ids[i]]]
                cv2.rectangle(frame, (x, y), (x + w, y + h), color=color, thickness=thickness)
                text = f"{labels[class_ids[i]]}: {confidences[i]:.2f}"
                (text_width, text_height) = \
                    cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]
                text_offset_x = x
                text_offset_y = y - 5
                box_coords = (
                    (text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))
                overlay = frame.copy()
                cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)
                frame = cv2.addWeighted(overlay, 0.6, frame, 0.4, 0)
                cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
                        fontScale=font_scale, color=(0, 0, 0), thickness=thickness)
                cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
                cv2.circle(frame, (x + w // 2, y + h), 2, (0, 0, 255), 2)
            for i in np.arange(1, len(pts)):
                if pts[i - 1] is None or pts[i] is None:
                    continue
                if counter >= 10 and i == 1 and pts[-10] is not None:
                    dX = pts[-10][0] - pts[i][0]
                    dY = pts[-10][1] - pts[i][1]
                    (dirX, dirY) = ("", "")
                    if np.abs(dX) > 3:
                        dirX = "East" if np.sign(dX) == 1 else "West"
                    if np.abs(dY) > 3:
                        dirY = "North" if np.sign(dY) == 1 else "South"
                    if dirX != "" and dirY != "":
                        direction = "{}-{}".format(dirY, dirX)
                    else:
                        direction = dirX if dirX != "" else dirY
                thickness = int(2)  # np.sqrt(N / float(i + 1)) * 2.5)
                cv2.line(frame, pts[i - 1], pts[i], (0, 0, 255), thickness)
            cv2.putText(frame, direction, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                    0.65, (0, 0, 255), 3)
            cv2.putText(frame, "dx: {}, dy: {}".format(dX, dY),
                    (10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX,
                    0.35, (0, 0, 255), 1)

        mapped_im = cv2.warpPerspective(frame, homography_matrix, (width, height))

    cv2_imshow(frame)
    counter += 1
    if cv2.waitKey(1) == 27:
        break

cap.release()
cv2.destroyAllWindows()`
0

There are 0 best solutions below