I have a problem, I need to implement the display of the path traveled by a person on a video from a bird's-eye view using a homography matrix, take the lower middle point of the bounding box as a reference point. A reference to a dataset that http://www.santhoshsunderrajan.com/datasets.html#hfh_tracking . The description of the dataset says: All videos are stored in the video folder, and the corresponding ground plane homography files are stored in the homography folder. Also, the .mat (MATLAB) file information containing tomography is also provided. camera_placement.pdf shows the network location. When collecting a dataset, all videos are fully synchronized using global timestamps.
Here is a code snippet for projecting the location of the image plane onto the earth plane:
gp Temp = Homography * image position; // image position = [ x y 1 ]'
gp position = [gp Temp(1)/temp(3); gp Temp(2)/temp(3)]';
However, I don't understand how to write this in python and OpenCV, if I use the cv2.warpPerspective function and pass it the homography matrix and image dimensions, then the wrong view is obtained. In addition, there was a deque problem, namely how to make the heap be implemented for each person found separately.
I tried a lot to do everything myself, but I couldn't get the proper result. Here is my code:
` import cv2 import numpy as np import time import imutils from collections import deque from google.colab.patches import cv2_imshow
net = cv2.dnn.readNetFromDarknet("/content/YOLOv3-TensorFlow-2.x/yolov3.cfg",
"/content/YOLOv3-TensorFlow-2.x/model_data/yolov3.weights")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
labels = open("/content/YOLOv3-TensorFlow-2.x/coco.names").read().strip().split("\n")
cap = cv2.VideoCapture('/MyDrive/HallWayTracking/HallWayTracking/videos/001.avi')
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
N = 200
pts = deque(maxlen=N)
counter = 0
(dX, dY) = (0, 0)
direction = ""
homography_path = "/MyDrive/HallWayTracking/HallWayTracking/homography/001.txt"
H = np.loadtxt(homography_path, delimiter=",")
H = H.astype(np.float32)
homography_matrix = H.reshape((3, 3))
colors = np.random.randint(0, 255, size=(len(labels), 3), dtype="uint8")
count = 0
while count < 20:
count += 1
ret, frame = cap.read()
if ret:
frame = imutils.resize(frame, width=600)
blurred = cv2.GaussianBlur(frame, (11, 11), 0)
hsv = cv2.cvtColor(blurred, cv2.COLOR_BGR2HSV)
height, width = frame.shape[:2]
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
start = time.perf_counter()
outs = net.forward(output_layers)
time_took = time.perf_counter() - start
print("Time took:", time_took)
confidences, boxes, class_ids = [], [], []
for output in outs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
if class_id != 0:
continue
confidence = scores[class_id]
if confidence > 0.3:
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
pts.appendleft([x + w // 2, y + h])
font_scale = 1
thickness = 1
if len(indexes) > 0:
for i in indexes.flatten():
x, y = boxes[i][0], boxes[i][1]
w, h = boxes[i][2], boxes[i][3]
color = [int(c) for c in colors[class_ids[i]]]
cv2.rectangle(frame, (x, y), (x + w, y + h), color=color, thickness=thickness)
text = f"{labels[class_ids[i]]}: {confidences[i]:.2f}"
(text_width, text_height) = \
cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=font_scale, thickness=thickness)[0]
text_offset_x = x
text_offset_y = y - 5
box_coords = (
(text_offset_x, text_offset_y), (text_offset_x + text_width + 2, text_offset_y - text_height))
overlay = frame.copy()
cv2.rectangle(overlay, box_coords[0], box_coords[1], color=color, thickness=cv2.FILLED)
frame = cv2.addWeighted(overlay, 0.6, frame, 0.4, 0)
cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
fontScale=font_scale, color=(0, 0, 0), thickness=thickness)
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
cv2.circle(frame, (x + w // 2, y + h), 2, (0, 0, 255), 2)
for i in np.arange(1, len(pts)):
if pts[i - 1] is None or pts[i] is None:
continue
if counter >= 10 and i == 1 and pts[-10] is not None:
dX = pts[-10][0] - pts[i][0]
dY = pts[-10][1] - pts[i][1]
(dirX, dirY) = ("", "")
if np.abs(dX) > 3:
dirX = "East" if np.sign(dX) == 1 else "West"
if np.abs(dY) > 3:
dirY = "North" if np.sign(dY) == 1 else "South"
if dirX != "" and dirY != "":
direction = "{}-{}".format(dirY, dirX)
else:
direction = dirX if dirX != "" else dirY
thickness = int(2) # np.sqrt(N / float(i + 1)) * 2.5)
cv2.line(frame, pts[i - 1], pts[i], (0, 0, 255), thickness)
cv2.putText(frame, direction, (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
0.65, (0, 0, 255), 3)
cv2.putText(frame, "dx: {}, dy: {}".format(dX, dY),
(10, frame.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX,
0.35, (0, 0, 255), 1)
mapped_im = cv2.warpPerspective(frame, homography_matrix, (width, height))
cv2_imshow(frame)
counter += 1
if cv2.waitKey(1) == 27:
break
cap.release()
cv2.destroyAllWindows()`