AssertionError: Results do not correspond to current coco set - wrong types and sizes

1.8k Views Asked by At

I am struggling to stand up the torchvision tutorial MaskRCNN with my own use case. I have images with [0, N] instances of 3 classes each. I am attempting to identify and classify each segment. This breaks on the evaluation step of pycocotools/coco.py here:

assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
               'Results do not correspond to current coco set'

When I debugged it, I found that not only were the two sets drastically different sizes, but different types as well.

>>> annsImgIds
[tensor([344])]
>>> self.getImgIds()
{0, 1, 2, 3, 4, 5, 6, ... }  # len: 1633, the length of my dataset

I'm sure this is a simple error in the DataLoader but for the life of me I can't find it and could use help debugging the types and the size. Other posts around this issue here and on GitHub often refer to saving json weights to the same location, but I'm not doing that (or I'm not aware I am doing that.)

Here is how I am loading the data:

ds_train = MyDataset(
    data_dir="train/",
    annotations_path="annotations.json",
    transforms=get_transform(train=True),
)
ds_test = MyDataset(
    data_dir="train/",
    annotations_path="annotations.json",
    transforms=get_transform(train=False),
)

dataset_size = len(ds_train)
indices = torch.randperm(dataset_size).tolist()
# 80-20 split
dataset = torch.utils.data.Subset(ds_train, indices[: -int(dataset_size * 0.2)])
dataset_test = torch.utils.data.Subset(
    ds_test, indices[-int(dataset_size * 0.2) :]
)

data_loader = DataLoader(
    dataset,
    batch_size=2,
    shuffle=True,
    num_workers=1,
    collate_fn=my_collate,
    pin_memory=True,
)
data_loader_test = DataLoader(
    dataset_test,
    batch_size=1,
    shuffle=True,
    num_workers=1,
    collate_fn=my_collate,
    pin_memory=True,
)

Here is my collate function:

def my_collate(batch):
    data = [item[0] for item in batch]
    target = [item[1] for item in batch]
    data = torch.stack(data)
    # target = torch.LongTensor(target)  # threw errors until I removed it

    return [data, target]

and the MyDataset class:

class HubmapDataset(Dataset):
    def __init__(self, data_dir: str, annotations_path: str, transforms=None) -> None:
        super().__init__()
        self.data_dir = data_dir
        self.annotations = self._extract_annotations(annotations_path)
        self.transforms = transforms
        self._labels = {
            "cat": 0,
            "dog": 1,
            "unsure": 2,
        }
        self.image_list = [
            f for f in os.listdir(data_dir) if f[:-4] in self.annotations
        ]  # might need to filter this down

    def _calc_area(self, box):
        return (box[2] - box[0]) * (box[3] - box[1])

    def _extract_annotations(self, fp) -> Dict[str, Any]:
        l = []
        with open(fp) as polygon:
            j = polygon.read()
            l = j.split("\n")
        z = {}
        for i, row in enumerate(l):
            try:
                r = json.loads(row)
                z[r["id"]] = r["annotations"]
            except json.JSONDecodeError as jde:
                print(i, jde, row)
        return z

    def _get_bbox(self, coords: np.ndarray):
        xmin = int(np.min(coords[:, 1]))
        ymin = int(np.min(coords[:, 0]))
        xmax = int(np.max(coords[:, 1]))
        ymax = int(np.max(coords[:, 0]))
        return [xmin, ymin, xmax, ymax]

    def __len__(self):
        return len(self.image_list)

    def _valid_labels(self, labels):
        try:
            torch.where(labels > 0)[0]
            return True
        except Exception:
            return False

    def _convert_labels(self, labels: np.ndarray):
        """Convert labels to be from 0-2"""
        labels_ = labels.copy()

        if len(np.unique(labels)) == 1:
            labels_ = np.zeros(labels.shape)

        elif np.min(labels) == 2:
            labels_ = labels - 2

        elif np.min(labels) == 1:
            labels_ = labels - 1

        elif np.min(labels) == 0 and len(np.where(labels == 1)[0]) == 0:
            labels[np.where(labels == 2)[0]] = 1
            labels_ = labels

        assert len(np.unique(labels_)) - 1 == np.max(labels_)

        return labels_

    def __getitem__(self, index) -> Any:
        img_name = self.image_list[index]
        image_path = os.path.join(self.data_dir, img_name)
        image = Image.open(image_path)

        annotations = self.annotations[img_name[:-4]]
        num_objs = len(annotations)

        # create the masks of size (512,512,num_objs)
        masks = np.zeros((num_objs, 512, 512), dtype=np.uint8)
        boxes = [None] * num_objs
        areas = [None] * num_objs
        labels = []

        # for each mask, add labels, boxes
        for i in range(num_objs):
            l_type = annotations[i]["type"]
            label_color = self._labels[l_type]
            coords = np.array(annotations[i]["coordinates"])[0]

            # set the mask coordinates equal to the label color
            m = np.zeros((512, 512))
            m[coords[:, 1], coords[:, 0]] = label_color
            cv2.fillPoly(m, pts=[coords], color=label_color)
            masks[i, :, :] = m

            # update the label
            labels.append(label_color)

            # create the bounding boxes
            bbox = self._get_bbox(coords)
            areas[i] = self._calc_area(bbox)
            boxes[i] = bbox

        labels = self._convert_labels(np.array(labels))

        # labels = np.array(list(np.unique(labels)))

        target = {}
        target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
        target["area"] = torch.as_tensor(areas, dtype=torch.float32)
        target["labels"] = torch.as_tensor(labels, dtype=torch.int64) - 1
        target["masks"] = torch.as_tensor(masks, dtype=torch.uint8)
        assert target["masks"].shape[0] == num_objs
        target["image_id"] = torch.tensor([index])

        if self.transforms is not None:
            image, target = self.transforms(image, target)
        else:
            image = PILToTensor()(image)

        return image, target

Training Loop:

running_loss = 0
num_epochs = 10

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

print("Complete!")
3

There are 3 best solutions below

0
On

I meet the similar issue.

Just change the line

target["image_id"] = torch.tensor([index])

to

target["image_id"] = index

can fix the problem.

0
On

Did you skip some of the samples in the origin data file? Make sure the number of samples is consistent with the total number of ["image_id"]. I encountered the same bug as yours and this fixed my bug.

0
On

While following the tutorial and adapting the code to my dataset, I encountered the same assertion error.

The immediate cause of this error is that the intersection between annsImgIds and the result of self.getImgIds() is empty. This is because the image ID in annsImgIds is represented as a tensor:

import torch

# Id tensor of single image (batch_size=1) from test dataloader
image_batch_ids = set(torch.tensor([334]))
image_dataset_ids = set(range(0, 1634)) # All ids of the dataset
intersection = image_batch_ids & image_dataset_ids
print(intersection) # Empty set
print(image_batch_ids == intersection) # False

image_batch_ids = set([torch.tensor([334]).item()]) # Convert tensor to standard Python number
intersection = image_batch_ids & image_dataset_ids
print(intersection) # {334}
print(image_batch_ids == intersection) # True

The assert statement checks whether the IDs from the predictions are contained within the IDs of the entire dataset.

In the engine.py script, within the evaluate function, a possible solution is to convert the ID to a number using the item() method:

res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}

This approach worked for me, although I'm unsure if this change might lead to any unintended side effects. It's possible that there is a better way to handle this particular error, e. g. changing the image_id in your dataset class, but this might potentially give rise to other errors.