I have a collections of department, student & student_attendance below.
department:
{
"_id": "abc101",
"department": {
"title" : "CSE",
"start_date" : "2011-06-02",
"department_id" : "CSE01",
"total_staffs" : "23",
},
"department_id" : "CSE01",
"parent_department_id" : "CSE01",
"college_code" : "IEC"
}
{
"_id": "mno101",
"department": {
"title" : "ME",
"start_date" : "2000-06-13",
"department_id" : "ME01",
"total_staffs" : "45",
},
"department_id" : "ME01",
"parent_department_id" : "ME01",
"college_code" : "SMEC"
}
{
"_id": "abc102",
"department": {
"title" : "ECE",
"start_date" : "2011-06-02",
"department_id" : "ECE01",
"total_staffs" : "16",
},
"department_id" : "ECE01",
"parent_department_id" : "ECE01",
"college_code" : "IEC"
}
student:
{
"_id" : "abc103",
"student" : {
"first_name" : "Venu",
"last_name" : "Gopal",
"department_id" : "CSE01",
"student_id" : "20IECCSE45",
"date_of_admission" : "2020-06-12"
},
"student_id" : "20IECCSE45",
"parent_department_id" : "CSE01",
"college_code" : "IEC"
}
student_attendance :
{
"_id" : "abc104",
"student_attendance" : {
"attendance_id" : "20061201",
"attendance_pct" : "100",
"absence_days" : "0",
"student_id" : "20IECCSE45"
"submit_time" : {
"$numberLong": "1497019230447"
},
"student_fee" : {
"version" : "1",
data : [
{
"fee_amount" : "1000",
"fee_paid" : "1000",
"fee_date" : {
"$numberLong": "1497019230447"
},
"fee_type" : "tuition fee"
}
]
}
},
"attendance_id" : "20061201",
"parent_department_id" : "CSE01",
"college_code" : "IEC"
}
{
"_id" : "abc105",
"student_attendance" : {
"attendance_id" : "20061211",
"attendance_pct" : "90",
"absence_days" : "1",
"student_id" : "20IECCSE45"
"submit_time" : {
"$numberLong": "1497019230447"
},
"student_fee" : {
"version" : "1",
data : [
{
"fee_amount" : "1100",
"fee_paid" : "1100",
"fee_date" : {
"$numberLong": "1497019230447"
},
"fee_type" : "tuition fee"
}
]
}
},
"attendance_id" : "20061211",
"parent_department_id" : "CSE01",
"college_code" : "IEC"
}
I've written the below aggregate query:
[
{
'$lookup': {
'from': 'student',
'localField': 'student_attendance.student_id',
'foreignField': 'student_id',
'as': 'st'
}
}, {
'$unwind': {
'path': '$st',
'preserveNullAndEmptyArrays': true
}
}, {
'$lookup': {
'from': 'department',
'localField': 'parent_department_id',
'foreignField': 'department_id',
'as': 'dp'
}
}, {
'$unwind': {
'path': '$dp',
'preserveNullAndEmptyArrays': true
}
}, {
'$project': {
'student_attendance': '$student_attendance',
'first_name': '$st.student.first_name',
'last_name': '$st.student.last_name',
'title': '$dp.department.title'
}
}
]
All the fields used in the lookup are indexed. In my real time case each of these 3 collections will have 200k+ documents. Similarly, if I add 1 or more lookup the performance is going down. The $project fields are the expected result, how can I improve performance or how this query can be written better. Also, Is it not advisable to use lookup?