adding multiple lookup in mongo query affects the performance though the fields are indexed?

319 Views Asked by At

I have a collections of department, student & student_attendance below.

department:

{
  "_id": "abc101",
  "department": {
    "title" : "CSE",
    "start_date" : "2011-06-02",
    "department_id" : "CSE01",
    "total_staffs" : "23",
  },
  "department_id" : "CSE01",
  "parent_department_id" : "CSE01",
  "college_code" : "IEC"
}

{
  "_id": "mno101",
  "department": {
    "title" : "ME",
    "start_date" : "2000-06-13",
    "department_id" : "ME01",
    "total_staffs" : "45",
  },
  "department_id" : "ME01",
  "parent_department_id" : "ME01",
  "college_code" : "SMEC"
}

{
  "_id": "abc102",
  "department": {
    "title" : "ECE",
    "start_date" : "2011-06-02",
    "department_id" : "ECE01",
    "total_staffs" : "16",
  },
  "department_id" : "ECE01",
  "parent_department_id" : "ECE01",
  "college_code" : "IEC"
}

student:

{
    "_id" : "abc103",
    "student" : {
        "first_name" : "Venu",
        "last_name"  : "Gopal",
        "department_id" : "CSE01",
        "student_id" : "20IECCSE45",
        "date_of_admission" : "2020-06-12"
    },
    "student_id" : "20IECCSE45",
    "parent_department_id" : "CSE01",
    "college_code" : "IEC"
}

student_attendance :

{
    "_id" : "abc104",
    "student_attendance" : {
        "attendance_id" : "20061201",
        "attendance_pct" : "100",
        "absence_days" : "0",
        "student_id" : "20IECCSE45"
        "submit_time" : {
            "$numberLong": "1497019230447"
          },
        "student_fee" : {
            "version" : "1",
            data : [
                {
                    "fee_amount" : "1000",
                    "fee_paid"   : "1000",
                    "fee_date"   : {
                        "$numberLong": "1497019230447"
                        },
                    "fee_type"   : "tuition fee"
                }
            ]
        }
    },
    "attendance_id" : "20061201",
    "parent_department_id" : "CSE01",
    "college_code" : "IEC"
}

{
    "_id" : "abc105",
    "student_attendance" : {
        "attendance_id" : "20061211",
        "attendance_pct" : "90",
        "absence_days" : "1",
        "student_id" : "20IECCSE45"
        "submit_time" : {
            "$numberLong": "1497019230447"
          },
        "student_fee" : {
            "version" : "1",
            data : [
                {
                    "fee_amount" : "1100",
                    "fee_paid"   : "1100",
                    "fee_date"   : {
                        "$numberLong": "1497019230447"
                        },
                    "fee_type"   : "tuition fee"
                }
            ]
        }
    },
    "attendance_id" : "20061211",
    "parent_department_id" : "CSE01",
    "college_code" : "IEC"
}

I've written the below aggregate query:

[
  {
    '$lookup': {
      'from': 'student', 
      'localField': 'student_attendance.student_id', 
      'foreignField': 'student_id', 
      'as': 'st'
    }
  }, {
    '$unwind': {
      'path': '$st', 
      'preserveNullAndEmptyArrays': true
    }
  }, {
    '$lookup': {
      'from': 'department', 
      'localField': 'parent_department_id', 
      'foreignField': 'department_id', 
      'as': 'dp'
    }
  }, {
    '$unwind': {
      'path': '$dp', 
      'preserveNullAndEmptyArrays': true
    }
  }, {
    '$project': {
      'student_attendance': '$student_attendance', 
      'first_name': '$st.student.first_name', 
      'last_name': '$st.student.last_name', 
      'title': '$dp.department.title'
    }
  }
]

All the fields used in the lookup are indexed. In my real time case each of these 3 collections will have 200k+ documents. Similarly, if I add 1 or more lookup the performance is going down. The $project fields are the expected result, how can I improve performance or how this query can be written better. Also, Is it not advisable to use lookup?

0

There are 0 best solutions below