I'm encountering difficulties with the accuracy of my epipolar lines in stereo vision. Despite meticulous calibration and implementation of established algorithms, the generated epipolar lines seem to deviate significantly from the expected results. I'm uncertain whether the root cause lies in the calibration process or within my code implementation. this is my code
#Importing necessary libraries
import cv2
import numpy as np
import matplotlib.pyplot as plt
import math
import copy
from tqdm import *
#Function to perform preprocessing operations on the images
def preprocessing(img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
processed_img = cv2.GaussianBlur(gray, (15,15), 0)
return processed_img
#Function to find features in the two imagess
def feature_detection(img1, img2,th):
processed_img1, processed_img2 = preprocessing(img1), preprocessing(img2)
sift = cv2.SIFT_create()
kp1, desc1 = sift.detectAndCompute(processed_img1, None)
kp2, desc2 = sift.detectAndCompute(processed_img2, None)
bf = cv2.BFMatcher()
matches = bf.knnMatch(desc1, desc2, k=2)
#Extracting good matches
good = []
for m, n in matches:
if m.distance < th*n.distance:
good.append([m])
left_pts = np.float32([kp1[m[0].queryIdx].pt for m in good])
right_pts = np.float32([kp2[m[0].trainIdx].pt for m in good])
return left_pts, right_pts
#Function to find start and end co-ordinates given equation of an epiline
def epiline_coordinates(lines, img):
lines = lines.reshape(-1, 3)
c = img.shape[1]
co_ordinates = []
for line in lines:
x0, y0 = map(int, [0, -line[2] / line[1]])
x1, y1 = map(int, [c, -(line[2] + line[0] * c) / line[1]])
co_ordinates.append([[x0, y0], [x1, y1]])
return co_ordinates
#Function to draw epilines on the two corresponding images
def draw_epilines(l_epiline_coords, r_epiline_coords, left_pts, right_pts, img1, img2):
img1_copy = np.copy(img1)
img2_copy = np.copy(img2)
for l_epiline_coord, r_epiline_coord, left_pt, right_pt in zip(l_epiline_coords, r_epiline_coords, np.int32(left_pts), np.int32(right_pts)):
color = tuple(np.random.randint(0,255,3).tolist())
# Draw epilines
img1_copy = cv2.line(img1_copy, tuple(l_epiline_coord[0]), tuple(l_epiline_coord[1]), color, 2)
img2_copy = cv2.line(img2_copy, tuple(r_epiline_coord[0]), tuple(r_epiline_coord[1]), color, 2)
# Draw keypoints
img1_copy = cv2.circle(img1_copy, tuple(left_pt[0]), 7, color, -1)
img2_copy = cv2.circle(img2_copy, tuple(right_pt[0]), 7, color, -1)
return img1_copy, img2_copy
#Function to calcalate sum of absolte differences
def sum_of_abs_diff(pixel_vals_1, pixel_vals_2):
if pixel_vals_1.shape != pixel_vals_2.shape:
return -1
return np.sum(abs(pixel_vals_1 - pixel_vals_2))
#Function to find corresponding block in the other images using SAD
def compare_blocks(y, x, block_left, right_array, window, search_range):
x_min = max(0, x - search_range)
x_max = min(right_array.shape[1], x + search_range)
first = True
min_sad = None
min_index = None
for x in range(x_min, x_max):
block_right = right_array[y: y+window, x: x+window]
sad = sum_of_abs_diff(block_left, block_right)
if first:
min_sad = sad
min_index = (y, x)
first = False
else:
if sad < min_sad:
min_sad = sad
min_index = (y, x)
return min_index
def stereoRectify(img1, img2, K1, d1, K2, d2, R, T):
h, w = img1.shape[:2]
imgSize = (w, h)
# Calculating rectification parameters
flags = cv2.CALIB_ZERO_DISPARITY
alpha = 0
R1, R2, P1, P2, Q, validROI1, validROI2 = cv2.stereoRectify(K1, d1, K2, d2, imgSize, R, T, alpha=-1)
# Performing rectification
map1x, map1y = cv2.initUndistortRectifyMap(K1, d1, R1, P1, imgSize, cv2.CV_32FC1)
map2x, map2y = cv2.initUndistortRectifyMap(K2, d2, R2, P2, imgSize, cv2.CV_32FC1)
img1_rectified = cv2.remap(img1, map1x, map1y, cv2.INTER_LINEAR)
img2_rectified = cv2.remap(img2, map2x, map2y, cv2.INTER_LINEAR)
return img1_rectified, img2_rectified, R1, R2, P1, P2
# Load calibration matrices
calibration_data = np.load('calibration_matrices.npz')
K1, d1, K2, d2, R, T, E, F = [calibration_data[i] for i in calibration_data.files]
img1 = cv2.imread("../assets/notused5/l.jpg")
img2 = cv2.imread("../assets/notused5/r.jpg")
# Use the original images for feature detection
left_pts_orig, right_pts_orig = feature_detection(img1, img2, 0.4)
# Rectify images
img1_rectified, img2_rectified , R1, R2, P1, P2= stereoRectify(img1, img2, K1, d1, K2, d2, R, T)
# Apply rectification transformation to keypoints
left_pts_rectified = cv2.undistortPoints(np.expand_dims(left_pts_orig, axis=1), K1, d1, R=R1, P=P1)
right_pts_rectified = cv2.undistortPoints(np.expand_dims(right_pts_orig, axis=1), K2, d2, R=R2, P=P2)
# Convert keypoints to numpy arrays
left_pts_rectified = np.squeeze(left_pts_rectified)
right_pts_rectified = np.squeeze(right_pts_rectified)
# Compute epilines using rectified images and essential matrix
l_epilines = cv2.computeCorrespondEpilines(right_pts_rectified.reshape(-1, 1, 2), 2, F)
r_epilines = cv2.computeCorrespondEpilines(left_pts_rectified.reshape(-1, 1, 2), 1, F)
# Convert epilines to coordinates
l_epiline_coords = epiline_coordinates(l_epilines, img1_rectified)
r_epiline_coords = epiline_coordinates(r_epilines, img2_rectified)
# Draw epilines on rectified images using keypoints from original images
img1_with_epilines, img2_with_epilines = draw_epilines(l_epiline_coords, r_epiline_coords, left_pts_rectified.reshape(-1, 1, 2), right_pts_rectified.reshape(-1, 1, 2), img1_rectified, img2_rectified)
# Save or display the images with epilines
cv2.imwrite('img1_with_epilines.png', img1_with_epilines)
cv2.imwrite('img2_with_epilines.png', img2_with_epilines)
out = np.hstack((img1_with_epilines, img2_with_epilines))
cv2.imwrite('epilines.png', out)
and this is my calibration code :
import numpy as np
import cv2
def preprocessing(img):
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
processed_img = cv2.GaussianBlur(gray, (15,15), 0)
return processed_img
# Define the number of corners in the chessboard pattern
# Change these values according to your chessboard
chessboard_size = (9, 6)
# Prepare object points, like (0,0,0), (1,0,0), (2,0,0) ....,(6,5,0)
objp = np.zeros((chessboard_size[0]*chessboard_size[1], 3), np.float32)
objp[:, :2] = np.mgrid[0:chessboard_size[0], 0:chessboard_size[1]].T.reshape(-1, 2)
# Arrays to store object points and image points from all the images.
objpoints = [] # 3d points in real world space
imgpoints1 = [] # 2d points in image plane.
imgpoints2 = []
# Load the videos
# Video capture from the two files
cap1 = cv2.VideoCapture('../assets/notused5/vleft.mp4')
cap2 = cv2.VideoCapture('../assets/notused5/vright.mp4')
frame_counter = 0
while True:
ret1, frame1 = cap1.read()
ret2, frame2 = cap2.read()
if not (ret1 and ret2):
break
frame_counter += 1
if frame_counter % 30 != 0: # Process every 10th frame
continue
# Convert the frames to grayscale
gray1 = preprocessing(frame1)
gray2 = preprocessing(frame2)
# Find the chessboard corners
ret1, corners1 = cv2.findChessboardCorners(gray1, chessboard_size, None)
ret2, corners2 = cv2.findChessboardCorners(gray2, chessboard_size, None)
# If found, add object points, image points
if ret1 and ret2:
objpoints.append(objp)
imgpoints1.append(corners1)
imgpoints2.append(corners2)
cv2.destroyAllWindows()
# Perform camera calibration
ret1, mtx1, dist1, rvecs1, tvecs1 = cv2.calibrateCamera(objpoints, imgpoints1, gray1.shape[::-1], None, None)
ret2, mtx2, dist2, rvecs2, tvecs2 = cv2.calibrateCamera(objpoints, imgpoints2, gray2.shape[::-1], None, None)
# Stereo calibration
flags = 0
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
stereocalib_criteria = (cv2.TERM_CRITERIA_MAX_ITER + cv2.TERM_CRITERIA_EPS , 100, 1e-5)
flags = 0
criteria_stereo= (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.0001)
ret, K1, d1, K2, d2, R, T, E, F = cv2.stereoCalibrate(objpoints, imgpoints1, imgpoints2, mtx1, dist1, mtx2, dist2, gray1.shape[::-1], criteria=criteria_stereo, flags=flags)
print(K1)
# Save matrices
np.savez('calibration_matrices.npz', K1=mtx1, d1=d1, K2=K2, d2=d2, R=R, T=T, E=E, F=F)
this is the result:
I have exhaustively explored various avenues to address issues with the accuracy of my epipolar lines in stereo vision. Despite numerous attempts, including experimenting with different video inputs and extensively modifying the codebase, I remain uncertain about the root cause of the problem plaguing my implementation.
