Hope that everyone is doing well.
I'm working in visual SLAM. The video I'm using right now is a car driving down a road. The issue is that the rotation matrix seems right, however, the translation vector I get from decomposeEssentialMat
seems to have very high x
and y
values and a near 0
z
value even though the camera should mostly only be moving along the z axis as the car is driving forwards.
Attached is the code and some sample output:
TRANSLATION: [-0.9247823026950661;
-0.3804958988176681;
-0.0007507362692929707]
TRANSLATION: [0.8785245421613305;
0.4776968154779486;
0.0006174969242414622]
TRANSLATION: [-0.8762639669385905;
-0.4808546299239073;
0.03066406896983919]
TRANSLATION: [0.8820850695684039;
0.4710895097519138;
0.000777075418202299]
TRANSLATION: [-0.838846170104902;
-0.5443683104889697;
-0.000495414697272841]
#include "readProcess.h"
int readProcess(char *videoPath) {
VideoCapture cap;
cap.open(videoPath);
render();
Ptr<ORB> orb = ORB::create();
BFMatcher matcher(NORM_HAMMING);
Mat descriptors, prevDescriptors;
bool descriptorInit = false;
Mat prevFrame;
std::vector<KeyPoint> prevKeypoints;
int totalPositive = 0, maxPositive = 0;
int index = 0;
Mat essentialMatrix;
Mat rotation, translation;
while(true) {
Mat frame;
cap >> frame;
if (frame.empty()) {
break;
}
Mat grey;
cvtColor(frame, grey, COLOR_BGR2GRAY);
std::vector<Point2f> corners;
goodFeaturesToTrack(grey, corners, 3000, 0.01, 3);
std::vector<KeyPoint> keypoints;
for(const Point2f& corner : corners) {
keypoints.emplace_back(corner, 1.0f);
}
orb->compute(grey, keypoints, descriptors);
if(descriptorInit) {
std::vector<std::vector<DMatch>> matches;
matcher.knnMatch(prevDescriptors, descriptors, matches, 2);
std::vector<DMatch> threshMatches;
for (size_t i = 0; i < matches.size(); ++i) {
if (matches[i][0].distance < 0.7 * matches[i][1].distance) {
threshMatches.push_back(matches[i][0]);
}
}
std::vector<Point2f> prevMatches, currMatches;
for(const DMatch& match : threshMatches) {
prevMatches.push_back(prevKeypoints[match.queryIdx].pt);
currMatches.push_back(keypoints[match.trainIdx].pt);
//std::cout << "PREV: " << prevKeypoints[match.queryIdx].pt << std::endl;
//std::cout << "CURR: " << keypoints[match.trainIdx].pt << std::endl;
}
//Fix coefficients somehow after
essentialMatrix = findEssentialMat(prevMatches, currMatches, 1.0, Point2d(0,0), RANSAC);
Mat R1, R2, t;
decomposeEssentialMat(essentialMatrix, R1, R2, t);
std::cout << "TRANSLATION: " << t << std::endl;
Mat imgMatches;
drawMatches(prevFrame, prevKeypoints, frame, keypoints, threshMatches, imgMatches);
imshow("ORB Matches", imgMatches);
}
/*for (const Point2f& corner : corners) {
circle(frame, corner, 5, Scalar(0, 255, 0), 2);
}*/
prevDescriptors = descriptors;
prevFrame = frame;
prevKeypoints = keypoints;
descriptorInit = true;
//imshow("ORB", frame);
for(MatchCoord x : matchCoordVec) {
//std::cout << x.x << " " << x.y << " " << x.z << std::endl;
}
if(waitKey(1) == 'q') {
break;
}
}
cap.release();
destroyAllWindows();
return 0;
}
I have tried looking through all the code and can't seem to really find anything that could cause this
The pose returned by
decomposeEssentialMat
is only up to scale, meaning the translation part only encodes the direction information, even though it is a 3D vector.To recover scale, you will need additional sensor(s), either a stereo pair or an IMU would help.