Problem with creating dataset for visual object tracker

36 Views Asked by Reza shahriari At 27 February 2024 at 13:22

I wanted to train ET-Track(a nice video object tracker). which is based on Ocean(another video object tracker).

the problem is they did not share any code for training it before. now we have the code(changing codes from Ocean) but we have a big problem with it. we have a random shift when trying to crop images for our template-image. this is an example:

as you can see the search2 image which the model will train with it have a shift in it. I can figure out why I have this shift. would you help me please?

        (template,search,out_label,reg_label,reg_weight,bbox,) = dataset[0]
        x1, y1, x2, y2 = map(int, bbox)            
        search = search.transpose((1, 2, 0)).astype(np.uint8)
        
        search = cv2.rectangle((search) , (x1, y1), (x2, y2), (200, 100, 150),1)
        
        reg_weight = cv2.cvtColor(reg_weight.astype(np.uint8), cv2.COLOR_GRAY2RGB)
        reg_weight = cv2.resize(reg_weight, (search.shape[1], search.shape[0]))
        out_label = cv2.cvtColor(out_label.astype(np.uint8) * 255, cv2.COLOR_GRAY2RGB)
        out_label = cv2.resize(out_label, (search.shape[1], search.shape[0]))
        x1, y1, x2, y2 = map(int, bbox)
        search2 = cv2.rectangle(search * reg_weight, (x1, y1), (x2, y2), (200, 100, 150))
        cv2.imshow("search2", search2)
        cv2.imshow("search",search)
        cv2.imshow("out_label", out_label)

This is how these outputs are created.

and this is my dataset __getitem__:

        if self.random_data:
             template, search = self._get_pairs(index)
             #choose 2 random image for search and template
        
        template_image = cv2.imread(template[0].as_posix())
        search_image = cv2.imread(search[0].as_posix())
        # change bboxes format and pick the first one
        template_target_bbox = self.yolo2ocean(template[1], template_image)
        search_target_bbox = self.yolo2ocean(search[1], search_image)
        _, template_image = crop_like_SiamFC(
            template_image,
            bbox=template_target_bbox,
            exemplar_size=self.template_size,
            instance_size=self.search_size,
        )
        _, search_image = crop_like_SiamFC(
            search_image,
            bbox=search_target_bbox,
            exemplar_size=self.template_size,
            instance_size=self.search_size + self.search_margin,
        )
        template_box = self._toBBox(template_image, template_target_bbox)
        search_box = self._toBBox(search_image, search_target_bbox)

        template, _, _ = self._augmentation(
            template_image, template_box, self.template_size
        )
        search, bbox, dag_param = self._augmentation(
            search_image, search_box, self.search_size, search=True
        )
        #No augment i have turned off all of them! 
        # from PIL image to numpy
        template = np.array(template)
        search = np.array(search)
        out_label = self._dynamic_label([self.size, self.size], dag_param.shift)

        reg_label, reg_weight = self.reg_label(bbox)

I think the self.reg_label is actual problem but I don't know why? this is the function:

def reg_label(self, bbox):
        """
        generate regression label
        :param bbox: [x1, y1, x2, y2]
        :return: [l, t, r, b]
        """
        x1, y1, x2, y2 = bbox
        l = self.grid_to_search_x - x1  # [17, 17]
        t = self.grid_to_search_y - y1
        r = x2 - self.grid_to_search_x
        b = y2 - self.grid_to_search_y
        l, t, r, b = map(lambda x: np.expand_dims(x, axis=-1), [l, t, r, b])
        
        
        reg_label = np.concatenate((l, t, r, b), axis=-1)  # [17, 17, 4]
        reg_label_min = np.min(reg_label, axis=-1)
        inds_nonzero = (reg_label_min > 0).astype(float)

        return reg_label, inds_nonzero
def grids(self):
        """
        each element of feature map on input search image
        :return: H*W*2 (position for each element)
        """
        sz = self.size #25

        sz_x = sz // 2
        sz_y = sz // 2

        x, y = np.meshgrid(
            np.arange(0, sz) - np.floor(float(sz_x)),
            np.arange(0, sz) - np.floor(float(sz_y)),
        )
        self.grid_to_search = {}
        self.stride = 8
        self.grid_to_search_x = x * self.stride + self.search_size // 2
        self.grid_to_search_y = y * self.stride + self.search_size // 2

Any idea would be helpful. Thanks a lot

Original Q&A

Problem with creating dataset for visual object tracker

There are 0 best solutions below

Related Questions in PYTHON

Related Questions in PYTORCH

Related Questions in DATASET

Related Questions in TRACKING

Related Questions in TRAINING-DATA

Trending Questions

Popular # Hahtags

Popular Questions