Problem with creating dataset for visual object tracker

36 Views Asked by At

I wanted to train ET-Track(a nice video object tracker). which is based on Ocean(another video object tracker).

the problem is they did not share any code for training it before. now we have the code(changing codes from Ocean) but we have a big problem with it. we have a random shift when trying to crop images for our template-image. this is an example:

crop results

as you can see the search2 image which the model will train with it have a shift in it. I can figure out why I have this shift. would you help me please?

        (template,search,out_label,reg_label,reg_weight,bbox,) = dataset[0]
        x1, y1, x2, y2 = map(int, bbox)            
        search = search.transpose((1, 2, 0)).astype(np.uint8)
        
        search = cv2.rectangle((search) , (x1, y1), (x2, y2), (200, 100, 150),1)
        
        reg_weight = cv2.cvtColor(reg_weight.astype(np.uint8), cv2.COLOR_GRAY2RGB)
        reg_weight = cv2.resize(reg_weight, (search.shape[1], search.shape[0]))
        out_label = cv2.cvtColor(out_label.astype(np.uint8) * 255, cv2.COLOR_GRAY2RGB)
        out_label = cv2.resize(out_label, (search.shape[1], search.shape[0]))
        x1, y1, x2, y2 = map(int, bbox)
        search2 = cv2.rectangle(search * reg_weight, (x1, y1), (x2, y2), (200, 100, 150))
        cv2.imshow("search2", search2)
        cv2.imshow("search",search)
        cv2.imshow("out_label", out_label)

This is how these outputs are created.

and this is my dataset __getitem__:

        if self.random_data:
             template, search = self._get_pairs(index)
             #choose 2 random image for search and template
        
        template_image = cv2.imread(template[0].as_posix())
        search_image = cv2.imread(search[0].as_posix())
        # change bboxes format and pick the first one
        template_target_bbox = self.yolo2ocean(template[1], template_image)
        search_target_bbox = self.yolo2ocean(search[1], search_image)
        _, template_image = crop_like_SiamFC(
            template_image,
            bbox=template_target_bbox,
            exemplar_size=self.template_size,
            instance_size=self.search_size,
        )
        _, search_image = crop_like_SiamFC(
            search_image,
            bbox=search_target_bbox,
            exemplar_size=self.template_size,
            instance_size=self.search_size + self.search_margin,
        )
        template_box = self._toBBox(template_image, template_target_bbox)
        search_box = self._toBBox(search_image, search_target_bbox)

        template, _, _ = self._augmentation(
            template_image, template_box, self.template_size
        )
        search, bbox, dag_param = self._augmentation(
            search_image, search_box, self.search_size, search=True
        )
        #No augment i have turned off all of them! 
        # from PIL image to numpy
        template = np.array(template)
        search = np.array(search)
        out_label = self._dynamic_label([self.size, self.size], dag_param.shift)

        reg_label, reg_weight = self.reg_label(bbox)

I think the self.reg_label is actual problem but I don't know why? this is the function:

def reg_label(self, bbox):
        """
        generate regression label
        :param bbox: [x1, y1, x2, y2]
        :return: [l, t, r, b]
        """
        x1, y1, x2, y2 = bbox
        l = self.grid_to_search_x - x1  # [17, 17]
        t = self.grid_to_search_y - y1
        r = x2 - self.grid_to_search_x
        b = y2 - self.grid_to_search_y
        l, t, r, b = map(lambda x: np.expand_dims(x, axis=-1), [l, t, r, b])
        
        
        reg_label = np.concatenate((l, t, r, b), axis=-1)  # [17, 17, 4]
        reg_label_min = np.min(reg_label, axis=-1)
        inds_nonzero = (reg_label_min > 0).astype(float)

        return reg_label, inds_nonzero
def grids(self):
        """
        each element of feature map on input search image
        :return: H*W*2 (position for each element)
        """
        sz = self.size #25

        sz_x = sz // 2
        sz_y = sz // 2

        x, y = np.meshgrid(
            np.arange(0, sz) - np.floor(float(sz_x)),
            np.arange(0, sz) - np.floor(float(sz_y)),
        )
        self.grid_to_search = {}
        self.stride = 8
        self.grid_to_search_x = x * self.stride + self.search_size // 2
        self.grid_to_search_y = y * self.stride + self.search_size // 2

Any idea would be helpful. Thanks a lot

0

There are 0 best solutions below