Auto-scaling group based on SQS depth

119 Views Asked by At

I am writing Terraform code that does the following: sets up S3 bucket and S3 event notifications, also Lambda that is being triggered by S3 notifications. Then it sends the messages to SQS. And then there is an auto-scaling group based on the SQS depth (using the formula number of messages / number of instances) with a cap of 5 instances (so never more that 5 instances running in ASG). I created the auto-scaling policy for that and read that this type of policy can trigger itself and doesn't need an alarm (?). terraform apply runs successfully but then I test in AWS by sending messages in my SQS but don't see any instances being added in the ASG. What am doing wrong / missing?

resource "aws_sqs_queue" "file_checker_sqs_queue" {
  name                      = var.sqs_queue_name
  max_message_size          = 256000
  message_retention_seconds = 86400
  visibility_timeout_seconds = 30
    receive_wait_time_seconds = 10
  }

 data "aws_availability_zones" "available" {
   state = "available"
   # Only retrieve the main AZs, no local zones.
   filter {
     name = "opt-in-status"
     values = ["opt-in-not-required"]
   }
 }

 data "aws_ami" "audio_ami" {
   most_recent = true

   filter {
     name   = "image-id"
     values = ["ami-067d1e60475437ey2"]
   }
 }

 resource "aws_launch_configuration" "audio_asg_launch_config" {
   name_prefix   = var.launch_config_name_prefix
   image_id      = data.aws_ami.audio_ami.id
   instance_type = "t2.micro"

   lifecycle {
     create_before_destroy = true
   }
 }

 resource "aws_autoscaling_group" "audio_auto_scaling_group" {
   availability_zones        = data.aws_availability_zones.available.names
   name                      = var.asg_name
   max_size                  = 5
   min_size                  = 0
   health_check_grace_period = 300
   health_check_type         = "EC2"
   force_delete              = true
   launch_configuration      = aws_launch_configuration.audio_asg_launch_config.name
 }

 resource "aws_autoscaling_policy" "asg_audio_scale_up_policy" {
   autoscaling_group_name = var.asg_name
   name                   = var.asg_policy_name
   policy_type            = "TargetTrackingScaling"
   depends_on             = [ aws_autoscaling_group.audio_auto_scaling_group ]
      
   target_tracking_configuration {
      target_value = 1
      customized_metric_specification {
        metrics {
          label = "Get the queue size (the number of messages waiting to be processed)"
          id    = "m1"
          metric_stat {
            metric {
              namespace   = "AWS/SQS"
              metric_name = "ApproximateNumberOfMessagesVisible"
              dimensions {
              name  = "QueueName"
              value = var.sqs_queue_name
           }
        }
        stat = "Sum"
      }
      return_data = false
    }
    metrics {
      label = "Get the group size (the number of InService instances)"
       id    = "m2"
       metric_stat {
         metric {
           namespace   = "AWS/AutoScaling"
           metric_name = "GroupInServiceInstances"
           dimensions {
             name  = "AutoScalingGroupName"
             value = var.asg_name
           }
         }
         stat = "Average"
       }
       return_data = false
     }
     metrics {
       label       = "Calculate the backlog per instance"
       id          = "e1"
       expression  = "IF(m2 == 0, m1, m1 / m2)"
       return_data = true
      }
    }
  }
}

Update: tried to address the comments and here are the updated parts of my code (still nothing is happening in my ASG after I send messages in SQS):

resource "aws_launch_template" "audio_asg_launch_template" {
  name_prefix   = var.launch_config_name_prefix
  image_id      = data.aws_ami.audio_ami.id
  instance_type = "t2.micro"
}
resource "aws_autoscaling_group" "audio_auto_scaling_group" {
  availability_zones        = data.aws_availability_zones.available.names
  name                      = var.asg_name
  max_size                  = var.asg_max_size
  min_size                  = var.asg_min_size
  desired_capacity = 0
  health_check_grace_period = var.asg_health_check_grace_period
  health_check_type         = "EC2"
  force_delete              = true
  launch_template {
    id      = aws_launch_template.audio_asg_launch_template.id
    version = "$Latest"
  }
}
resource "aws_autoscaling_policy" "asg_audio_scale_up_policy" {
  autoscaling_group_name = aws_autoscaling_group.audio_auto_scaling_group.name
  name                   = var.asg_policy_name
  policy_type            = "TargetTrackingScaling"
  
  target_tracking_configuration {
    target_value = 1
    customized_metric_specification {
      metrics {
        label = "Get the queue size (the number of messages waiting to be processed)"
        id    = "m1"
        metric_stat {
          metric {
            namespace   = "AWS/SQS"
            metric_name = "ApproximateNumberOfMessagesVisible"
            dimensions {
              name  = "QueueName"
              value = aws_sqs_queue.file_checker_sqs_queue.name
            }
          }
          stat = "Sum"
        }
        return_data = false
      }
      metrics {
        label = "Get the group size (the number of InService instances)"
        id    = "m2"
        metric_stat {
          metric {
            namespace   = "AWS/AutoScaling"
            metric_name = "GroupInServiceInstances"
            dimensions {
              name  = "AutoScalingGroupName"
              value = aws_autoscaling_group.audio_auto_scaling_group.name
            }
          }
          stat = "Average"
        }
        return_data = false
      }
      metrics {
        label       = "Calculate the backlog per instance"
        id          = "e1"
        expression  = "IF(m2 == 0, m1, m1 / m2)"
        return_data = true
      }
    }
  }
}
1

There are 1 best solutions below

0
On

Figured it out! Everything is okay with the policy itself, the issue was that ASG doesn't enable metrics by default. Once I included it into my code - everything started working. So the initial code in my question stays the same and we're only adding the following:

resource "aws_autoscaling_group" "audio_auto_scaling_group" {
  # the rest of the code stays the same

  # Enabling metrics collection for AutoScaling Group
  enabled_metrics = ["GroupInServiceInstances"]
}