I'm trying to test a custom 3D reconstruction pipeline that receives ground truth depth maps as input:
To see the full code you can proceed to the end of the question, now I will just outline my approach without getting into the details.
Suppose I have a mesh
bunny_mesh = o3d.data.BunnyMesh()
mesh = o3d.io.read_triangle_mesh(bunny_mesh.path)
And I extract some poses using the p
key while visualizing the mesh with o3d.visualization.draw_geometries([mesh])
.
Then I create a trajectory by linearly interpolating subsequent poses.
Then, at each pose I extract the depth information through depth = np.array(vis.capture_depth_float_buffer(True))
and I save it to create a dataset that looks like this:
(this is just a portion of the dataset)
Finally, the code to reconstruct the mesh looks like the following
VOLUME_SIZE = 0.5
N_VOXELS_PER_SIDE = 1440
SDF_TRUNC = 3 * VOLUME_SIZE / N_VOXELS_PER_SIDE # 3
volume = o3d.pipelines.integration.ScalableTSDFVolume(
voxel_length= VOLUME_SIZE / N_VOXELS_PER_SIDE,
sdf_trunc=SDF_TRUNC,
color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8
)
for depth,pose in zip(depth_maps, poses):
color = ... # Create dummy rgb image
rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
o3d.geometry.Image(color),
o3d.geometry.Image(depth),
depth_trunc=1.0,
convert_rgb_to_intensity=False,
depth_scale = 1.0
)
volume.integrate(
rgbd,
intrinsics, # the intrinsics were retrieved from the previously loaded poses
pose,
)
And this is what I obtain:
My question is: How can I obtain an output mesh that is as similar as possible to the original one? Why the poses extracted from the visualizer seem to not work when trying to integrate the TSDF volume?
FULL CODE
- Load mesh and create key poses
bunny_mesh = o3d.data.BunnyMesh()
mesh = o3d.io.read_triangle_mesh(bunny_mesh.path)
o3d.visualization.draw_geometries([mesh])
# create poses by pressing 'p' in correspondence of different views of the mesh
- Create trajectory from poses
TRAJECTORY_PATH = "path/to/output/trajectory.json"
FPS = 30
NUM_FRAMES_BETWEEN_POSES = FPS * 5
poses = ... # Ordered list of poses paths (obtained with the 'p' key in the visualizer)
fx = 935.3074360871938
fy = 935.3074360871938
cx = 959.5
cy = 539.5
width = 1920
height = 1080
intrinsic = o3d.camera.PinholeCameraIntrinsic(
width,
height,
fx,
fy,
cx,
cy
)
for i in tqdm(range(1, len(poses))):
pose_a = poses[i-1]
pose_b = poses[i]
with open(pose_a) as f_a:
x = json.load(f_a)
extrinsic_matrix_a = np.array(x["extrinsic"])
with open(pose_b) as f_b:
x = json.load(f_b)
extrinsic_matrix_b = np.array(x["extrinsic"])
extrinsic_matrix_list = np.linspace(extrinsic_matrix_a,extrinsic_matrix_b,N_FRAMES_BETWEEN_POSES)
for j in range(len(extrinsic_matrix_list)):
p = o3d.camera.PinholeCameraParameters()
p.intrinsic = intrinsic
p.extrinsic = np.array(extrinsic_matrix_list[j]).reshape((4,4)).T
params.append(p)
trajectory.parameters = params
if os.path.isfile(TRAJECTORY_PATH):
os.remove(TRAJECTORY_PATH)
o3d.io.write_pinhole_camera_trajectory(TRAJECTORY_PATH, trajectory)
print(f"Traejctory saved @ {TRAJECTORY_PATH}")
- Create depth maps
EXTENSIONS = "pfm"
OUTPUT_DEPTH_MAPS_FOLDER = "path/to/an/empty/folder/"
MINIMUM_DEPTH_VALUE = 0.0008
MAXIMUM_DEPTH_VALUE = 0.25
def custom_draw_geometry_with_camera_trajectory(pcd):
custom_draw_geometry_with_camera_trajectory.index = -1
custom_draw_geometry_with_camera_trajectory.trajectory = o3d.io.read_pinhole_camera_trajectory(TRAJECTORY_PATH)
custom_draw_geometry_with_camera_trajectory.vis = o3d.visualization.Visualizer()
if os.path.isdir(OUTPUT_DEPTH_MAPS_FOLDER):
print("Removing old depth maps...")
for pth in tqdm(glob.glob(os.path.join(OUTPUT_DEPTH_MAPS_FOLDER,f"*.{EXTENSION}"))):
os.remove(pth)
else:
os.makedirs(OUTPUT_DEPTH_MAPS_FOLDER)
def move_forward(vis):
ctr = vis.get_view_control()
glb = custom_draw_geometry_with_camera_trajectory
if glb.index >= 0:
depth = np.array(vis.capture_depth_float_buffer(True))
depth[depth < MINIMUM_DEPTH_VALUE] = 0.0
depth[depth > MAXIMUM_DEPTH_VALUE] = 0.0
f_name = os.path.join(OUTPUT_DEPTH_MAPS_FOLDER,"{:06d}_d".format(glb.index) + f".{EXTENSION}")
res = cv2.imwrite(f_name,depth.astype(np.float32))
assert res
glb.index = glb.index + 1
if glb.index < len(glb.trajectory.parameters):
ctr.convert_from_pinhole_camera_parameters(
glb.trajectory.parameters[glb.index]
)
else:
custom_draw_geometry_with_camera_trajectory.vis.\
register_animation_callback(None)
return False
vis = custom_draw_geometry_with_camera_trajectory.vis
vis.create_window(width = width, height = height)
vis.add_geometry(pcd)
vis.register_animation_callback(move_forward)
print("Running...")
vis.run()
vis.destroy_window()
custom_draw_geometry_with_camera_trajectory(mesh)
- Create a video for a visual check
def create_video(paths: List[str], output_path: str, fps: int = 24):
# paths should be already sorted.
assert "." in output_path and output_path.split(".")[-1] == "mp4"
shape = cv2.imread(paths[0], cv2.IMREAD_UNCHANGED).shape
if len(shape) == 3:
height, width, channels = shape
elif len(shape) == 2:
height, width = shape
channels = 1
else:
raise ValueError("Wrong shape")
# choose codec according to format needed
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
for path in tqdm(paths):
img = cv2.imread(path,cv2.IMREAD_UNCHANGED)/SCALING_FACTOR
img = img / MAXIMUM_DEPTH_VALUE
img = img.reshape(HEIGHT,WIDTH,1)*255
img = img.astype(np.uint8)
if channels==1:
img = cv2.cvtColor(img,cv2.COLOR_GRAY2RGB)
video.write(img)
video.release()
paths = sorted(glob.glob(os.path.join(OUTPUT_DEPTH_MAPS_FOLDER,f"*.{EXTENSION}")))
create_video(paths,OUTPUT_VIDEO_PATH,FPS)
- Create TSDF Volume and extract mesh
# Retrieve GT poses
with open(TRAJECTORY_PATH) as f:
gt_json = json.load(f)
poses_gt = []
for param in tqdm(gt_json["parameters"]):
p = np.array(param["extrinsic"]).reshape((4,4)).T
poses_gt.append(p)
print(f"Retrieved {len(poses_gt)} poses for {TARGET_NAME}")
# Initialize TSDF Volume
VOLUME_SIZE = 0.5
N_VOXELS_PER_SIDE = 1440
SDF_TRUNC = 3 * VOLUME_SIZE / N_VOXELS_PER_SIDE
volume = o3d.pipelines.integration.ScalableTSDFVolume(
voxel_length= VOLUME_SIZE / N_VOXELS_PER_SIDE,
sdf_trunc=SDF_TRUNC,
color_type=o3d.pipelines.integration.TSDFVolumeColorType.RGB8
)
n = len(poses_gt)
assert len(depth_map_paths) == n
for i in tqdm(range(n)):
path = depth_map_paths[i]
depth = cv2.imread(path,cv2.IMREAD_UNCHANGED)
pose = poses_gt[i]
color = np.zeros((depth.shape[0], depth.shape[1], 3)).astype(np.uint8)
color[depth > 0.0 ] = 100
rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
o3d.geometry.Image(color),
o3d.geometry.Image(depth),
depth_trunc=1.0,
convert_rgb_to_intensity=False,
depth_scale = 1.0
)
if USE_GT_POSES:
extr = pose
else:
extr = np.linalg.inv(pose)
volume.integrate(
rgbd,
INTRINSICS, # o3d.camera.PinholeCameraIntrinsic(o3d.camera.PinholeCameraIntrinsicParameters.PrimeSenseDefault),
extr,
)
old_depth = depth
# Extract mesh
output_mesh = volume.extract_triangle_mesh()
output_mesh.compute_vertex_normals()
o3d.io.write_triangle_mesh("output.ply", output_mesh)