I'm trying to load my custom data for this tutorial. Basically it's an object detector with retina-net. The problem starts with the format of the data. From the tutorial, it is mentioned,
KerasCV has a predefined specificication for bounding boxes. To comply with this, you should package your bounding boxes into a dictionary matching the specification below:
bounding_boxes = {
# num_boxes may be a Ragged dimension
'boxes': Tensor(shape=[batch, num_boxes, 4]),
'classes': Tensor(shape=[batch, num_boxes])}
My data are images and the .xml
files which contains the bounding boxes coordinates. I put the bounding boxes in the recommended format in the tutorial (xywh). The problem is how to introduce the class with each box.
My code so far:
import tensorflow as tf
import xml.etree.ElementTree as et
import os
import numpy as np
img_path = '/home/joaquin/TFM/Doom_KerasCV/IA_training_data_reduced_640/'
img_list = []
xml_list = []
box_list = []
box_dict = {}
img_norm = []
def list_creation (img_path):
for subdir, dirs, files in os.walk(img_path):
for file in files:
if file.endswith('.png'):
img_list.append(subdir+"/"+file)
img_list.sort()
if file.endswith('.xml'):
xml_list.append(subdir+"/"+file)
xml_list.sort()
return img_list, xml_list
def box_extraction (xml_list):
for element in xml_list:
root = et.parse(element)
boxes = list()
for box in root.findall('.//object'):
label = box.find('name').text
xmin = int(box.find('./bndbox/xmin').text)
ymin = int(box.find('./bndbox/ymin').text)
xmax = int(box.find('./bndbox/xmax').text)
ymax = int(box.find('./bndbox/ymax').text)
width = xmax - xmin
height = ymax - ymin
data = np.array([xmin,ymax,width,height])
box_dict = {'boxes':data,'classes':label}
# boxes.append(data)
box_list.append(box_dict)
return box_list
list_creation(img_path)
boxes_dataset = tf.data.Dataset.from_tensor_slices(box_extraction(xml_list))
def loader (img_list):
for image in img_list:
img = tf.keras.utils.load_img(image) # loads the image
# Normalizamos los pixeles de la imagen entre 0 y 1:
img = tf.image.per_image_standardization(img)
img = tf.keras.utils.img_to_array(img) # converts the image to numpy array
img_norm.append(img)
return img_norm
img_dataset = tf.data.Dataset.from_tensor_slices(loader(img_list))
dataset = tf.data.Dataset.zip((img_dataset, boxes_dataset))
def get_dataset_partitions_tf(ds, ds_size, train_split=0.8, val_split=0.1, test_split=0.1, shuffle=True, shuffle_size=10):
assert (train_split + test_split + val_split) == 1
if shuffle:
ds = ds.shuffle(shuffle_size, seed=12)
train_size = int(train_split * ds_size)
val_size = int(val_split * ds_size)
train_ds = ds.take(train_size)
val_ds = ds.skip(train_size).take(val_size)
test_ds = ds.skip(train_size).skip(val_size)
return train_ds, val_ds, test_ds
train,validation,test = get_dataset_partitions_tf(dataset, len(dataset))
My main concern is how can I convert the dict to a valid tensor. Thanks in advance.