I am working with a large dataset of videos preprocessed and converted in frames. So I have the following folder tree:
- data
training_set
- class1
- video1
- Frame0.png, ..., FrameN.png
- videoN
- Frame0.png, ..., FrameN.png
- video1
- class2
- video1
- Frame0.png, ..., FrameN.png
- videoN
- Frame0.png, ..., FrameN.png
- video1
- class1
validation_set
- class1
- video1
- Frame0.png, ..., FrameN.png
- videoN
- Frame0.png, ..., FrameN.png
- video1
- class2
- video1
- Frame0.png, ..., FrameN.png
- videoN
- Frame0.png, ..., FrameN.png
- video1
- class1
I need to load all these data and everytime I read a video from the class1 folder I add a 0 to the labels list and everytime I read a video from the class2 folder I add a 1 to the labels.
Since the videos are thousands I wish to multiprocess but I don't understand how to do that maintaining the coherence between videos and labels.
sets = [TRAINING_DIR, VALIDATION_DIR]
for dataset in sets:
for folder in classes:
subfolder = os.path.join(dataset, folder)
if max_videos > -1:
bar_length = min(max_videos, len(os.listdir(subfolder)))
else:
bar_length = len(os.listdir(subfolder))
bar = ChargingBar('Loading ' + subfolder, max=bar_length)
bar_interval = int(min(max_videos, len(os.listdir(subfolder)))/bar_length)
if bar_interval == 0:
bar_interval = 1
for index, video_folder_name in enumerate(os.listdir(subfolder)):
if index == max_videos:
break
video_path = os.path.join(subfolder, video_folder_name)
frames_number = len(os.listdir(video_path))
frames_interval = int(frames_number / min_video_frames)
frames_paths = os.listdir(video_path)
if frames_interval > 0:
frames_paths = frames_paths[::frames_interval]
frames = []
for index, frame_image in enumerate(frames_paths):
if len(frames) >= min_video_frames:
break
image = np.asarray(cv2.imread(os.path.join(video_path, frame_image)))
image = cv2.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
frames.append(image)
while len(frames) < min_video_frames:
frames.append(frames[-1])
if dataset == TRAINING_DIR:
videos.append(frames)
if "class1" in folder:
labels.append(0)
else:
labels.append(1)
else:
validation_videos.append(frames)
if "class1" in folder:
validation_labels.append(0)
else:
validation_labels.append(1)
if index % bar_interval == 0:
bar.next()
bar.finish()