Tensorflow 2 Object Detection API Low mAP

962 Views Asked by At

I am trying to train a faster r-cnn model using the Tensorflow 2.0 Object Detection however I am getting extremely low mAP at 0.01.

I have had a look at the training images in Tensorboard and the training images do not look to be loaded in correctly or I have done something wrong in the configuration file. . I am following the RoboFlow tutorial using the Hardhat sample dataset. This is my colab notebook (https://colab.research.google.com/drive/1cjHpLYq8NAEce36mJGGg0Lec31wSdtF9?usp=sharing).

The top image shows an image that was used in the training dataset that has been loaded in Tensorboard and the image below that is the original.

Training Image loaded in tensorboard

Original Image in Roboflow Hard Hat Sample

I am completely new to this and I am unsure where I am going wrong. Below is the configuration file that I am using.

model {
  faster_rcnn {
    num_classes: 3
    image_resizer {
      keep_aspect_ratio_resizer {
        min_dimension: 640
        max_dimension: 640
        pad_to_max_dimension: true
      }
    }
    feature_extractor {
      type: 'faster_rcnn_resnet101_keras'
      batch_norm_trainable: true
    }
    first_stage_anchor_generator {
      grid_anchor_generator {
        scales: [0.25, 0.5, 1.0, 2.0]
        aspect_ratios: [0.5, 1.0, 2.0]
        height_stride: 16
        width_stride: 16
      }
    }
    first_stage_box_predictor_conv_hyperparams {
      op: CONV
      regularizer {
        l2_regularizer {
          weight: 0.0
        }
      }
      initializer {
        truncated_normal_initializer {
          stddev: 0.01
        }
      }
    }
    first_stage_nms_score_threshold: 0.0
    first_stage_nms_iou_threshold: 0.7
    first_stage_max_proposals: 300
    first_stage_localization_loss_weight: 2.0
    first_stage_objectness_loss_weight: 1.0
    initial_crop_size: 14
    maxpool_kernel_size: 2
    maxpool_stride: 2
    second_stage_box_predictor {
      mask_rcnn_box_predictor {
        use_dropout: false
        dropout_keep_probability: 1.0
        fc_hyperparams {
          op: FC
          regularizer {
            l2_regularizer {
              weight: 0.0
            }
          }
          initializer {
            variance_scaling_initializer {
              factor: 1.0
              uniform: true
              mode: FAN_AVG
            }
          }
        }
        share_box_across_classes: true
      }
    }
    second_stage_post_processing {
      batch_non_max_suppression {
        score_threshold: 0.0
        iou_threshold: 0.6
        max_detections_per_class: 100
        max_total_detections: 300
      }
      score_converter: SOFTMAX
    }
    second_stage_localization_loss_weight: 2.0
    second_stage_classification_loss_weight: 1.0
    use_static_shapes: true
    use_matmul_crop_and_resize: true
    clip_anchors_to_image: true
    use_static_balanced_label_sampler: true
    use_matmul_gather_in_matcher: true
  }
}

train_config: {
  batch_size: 1
  sync_replicas: true
  startup_delay_steps: 0
  replicas_to_aggregate: 8
  num_steps: 2000
  optimizer {
    momentum_optimizer: {
      learning_rate: {
        cosine_decay_learning_rate {
          learning_rate_base: .04
          total_steps: 25000
          warmup_learning_rate: .013333
          warmup_steps: 2000
        }
      }
      momentum_optimizer_value: 0.9
    }
    use_moving_average: false
  }
  fine_tune_checkpoint_version: V2
  fine_tune_checkpoint: "/content/models/research/deploy/faster_rcnn_resnet101_v1_640x640_coco17_tpu-8/checkpoint/ckpt-0"
  fine_tune_checkpoint_type: "detection"
  data_augmentation_options {
    random_horizontal_flip {
    }
  }

  max_number_of_boxes: 100
  unpad_groundtruth_tensors: false
  use_bfloat16: true  # works only on TPUs
}

train_input_reader: {
  label_map_path: "/content/train/Workers_label_map.pbtxt"
  tf_record_input_reader {
    input_path: "/content/train/Workers.tfrecord"
  }
}

eval_config: {
  metrics_set: "coco_detection_metrics"
  use_moving_averages: false
  batch_size: 1;
}

eval_input_reader: {
  label_map_path: "/content/train/Workers_label_map.pbtxt"
  shuffle: false
  num_epochs: 1
  tf_record_input_reader {
    input_path: "/content/valid/Workers.tfrecord"
  }
}

Thank you in advance !

1

There are 1 best solutions below

0
On

Looking at your training output, it looks like you should try one of a few things:

  1. The dataset you used was only 100 images. Try increasing its size via augmentation (or, in this case, using the full dataset instead which is over 7000 images).
  2. Each epoch is taking less than a second so your total training time was less than 5 minutes. You could try boosting num_steps from 2,000 to 20,000 or 100,000. From prior experience, these TF2 models tend to take quite a while to converge.
  3. Try a different model (I've seen YOLOv5 and YOLOv4 converge much more quickly on smaller datasets due to their built in augmentations).