I'm trying to make a data pipeline for using my custom data with this tutorial. So basically my code so far is:
import random
import enum
import numpy as np
import cv2 as cv
import imageio as iio
import matplotlib.pyplot as plt
import xml.etree.ElementTree as et
import os
import tensorflow as tf
import tensorflow.keras.layers as layers
import tensorflow_datasets as tfds
import scipy.io
from tensorflow import keras
img_path = '/home/joaquin/TFM/Doom_KerasCV/IA_training_data_reduced_640/'
img_list = []
xml_list = []
image_size = 640
def list_creation (img_path):
# Exploramos los subdirectorios y separamos en .png y .xml:
for subdir, dirs, files in os.walk(img_path):
for file in files:
if file.endswith('.png'):
img_list.append(subdir+"/"+file)
img_list.sort()
if file.endswith('.xml'):
xml_list.append(subdir+"/"+file)
xml_list.sort()
return img_list, xml_list
img_list, xml_list = list_creation(img_path)
def box_extraction (xml_list):
boxes = list()
labels = list()
for element in xml_list:
root = et.parse(element)
for box in root.findall('.//object'):
label = box.find('name').text # Dejar esta linea por si se modifica para varias clases por imagen
xmin = int(box.find('./bndbox/xmin').text)
ymin = int(box.find('./bndbox/ymin').text)
xmax = int(box.find('./bndbox/xmax').text)
ymax = int(box.find('./bndbox/ymax').text)
width = xmax - xmin
height = ymax - ymin
data = np.array([xmin,ymax,width,height]) # Añadir la etiqueta?
labels.append(label)
boxes.append(data)
boxes = tf.cast(boxes, dtype=tf.float32)
return boxes, labels
list_creation(img_path)
box_info, labels = box_extraction(xml_list)
def loader (img_list):
img_norm = list()
for image in img_list:
img = tf.keras.utils.load_img(image) # loads the image
# (w,h) = img.size[:2]
# Normalizamos los pixeles de la imagen entre 0 y 1:
img = tf.image.per_image_standardization(img)
img = tf.keras.utils.img_to_array(img) # converts the image to numpy array
img_norm.append(img)
img_norm = tf.cast(img_norm, dtype=tf.float32)
return img_norm
images = loader(img_list)
(x_train), (y_train) = (
(images[: int(len(images) * 0.8)]),
(box_info[: int(len(box_info) * 0.8)]),
)
(x_test), (y_test) = (
(images[int(len(images) * 0.8) :]),
(box_info[int(len(box_info) * 0.8) :]),
)
In the tutorial mentioned above it says:
In KerasCV object detection tasks it is recommended that users use ragged batches. This is due to the fact that images may be of different sizes in PascalVOC and that there may be different numbers of bounding boxes per image. The easiest way to construct a ragged dataset in a tf.data pipeline is to use tf.data.experimental.dense_to_ragged_batch.
So basically I have two tensors; images, with shape [168, 640, 640, 3], containing the batch of images, and the box_info batch, with shape [168, 4], containing the coords of the bounding boxes.
So as far as I understand from the code from the tutorial, I have to merge both tensors to give the image + bounding boxes. If I try:
c = tf.concat([images, box_info], axis=0)
I get this error:
InvalidArgumentError: {{function_node __wrapped__ConcatV2_N_2_device_/job:localhost/replica:0/task:0/device:GPU:0}} ConcatOp : Ranks of all input tensors should match: shape[0] = [168,640,640,3] vs. shape[1] = [168,4] [Op:ConcatV2] name: concat
So how to concatenate them? I'm using Tensorflow 2.11.0
Meh, in this case you can't concat them.
In order to concat 2 tensors there are several rules you should follow:
In this case you have one tensor of 4D and one tensor of 2D, the ranks are not the same.