MongoDB inconsistent aggregate call between queries

158 Views Asked by At

I have two tables. videos and youtubes. I want to do a $lookup on videos.youtube and match that to youtubes._id and then $match that data based on a youtubes field. Which is working fine, but there are some huge inconsistencies between queries that should be identical in nature, or at the very least close to.

Query 1: returns 8261 documents. Takes [40, 50]ms to execute

db.getCollection('videos').aggregate([
  { '$sort': { date: -1 } },
  {
    '$lookup': {
      from: 'youtubes',
      localField: 'youtube',
      foreignField: '_id',
      as: 'youtube'
    }
  },
  { '$match': { 'youtube.talent': true } },
])

Query 2: returns 760 documents. Takes [470, 500]ms to execute

db.getCollection('videos').aggregate([
  { '$sort': { date: -1 } },
  {
    '$lookup': {
      from: 'youtubes',
      localField: 'youtube',
      foreignField: '_id',
      as: 'youtube'
    }
  },
  { '$match': { 'youtube.id': 7 } },
])

Query 3: returns 760 documents. Takes [90, 100]ms to execute

db.getCollection('videos').aggregate([
  // { '$sort': { date: -1 } },
  {
    '$lookup': {
      from: 'youtubes',
      localField: 'youtube',
      foreignField: '_id',
      as: 'youtube'
    }
  },
  { '$match': { 'youtube.id': 7 } },
])

All fields used in the queries are indexed. What stands out is that the $sort statement in Query 2, apparently uses roughly 400ms to execute, yet in Query 1 that uses the same $sort statement in the same location in the pipeline and it only uses [40, 50]ms.

I've used the { explain: true } option to look for differences between Query 1 and Query 2 that could explain the speed differences, but they are identical except for the $match portion.

Any solution/suggestions for bringing Query 2 up to speed with Query 1? Or at the very least an explanation for the huge differences in speed?


Another weird thing discovered while making this post

Query 4: returns 9378 documents. Takes [25, 35]ms to execute

db.getCollection('videos').aggregate([
  { '$sort': { date: -1 } },
  {
    '$lookup': {
      from: 'youtubes',
      localField: 'youtube',
      foreignField: '_id',
      as: 'youtube'
    }
  },
  { '$match': { 'youtube.clipper': true } }
])

Query 5: returns 9378 documents. Takes [600, 680]ms to execute

db.getCollection('videos').aggregate([
  //{ '$sort': { date: -1 } },
  {
    '$lookup': {
      from: 'youtubes',
      localField: 'youtube',
      foreignField: '_id',
      as: 'youtube'
    }
  },
  { '$match': { 'youtube.clipper': true } }
])

At this point I'm stumped as to what is happening. Originally I thought it had to do with Number vs Boolean, but as Query 4 and Query 5 shows it clearly has 0 impact. And it seems random.

Indexes just in case (for youtubes)

[
    {
        "v" : 2,
        "key" : {
            "_id" : 1
        },
        "name" : "_id_",
        "ns" : "holo-watcher.youtubes"
    },
    {
        "v" : 2,
        "unique" : true,
        "key" : {
            "id" : 1
        },
        "name" : "id_1",
        "ns" : "holo-watcher.youtubes",
        "background" : true
    },
    {
        "v" : 2,
        "key" : {
            "name" : 1
        },
        "name" : "name_1",
        "ns" : "holo-watcher.youtubes",
        "background" : true
    },
    {
        "v" : 2,
        "unique" : true,
        "key" : {
            "channelId" : 1
        },
        "name" : "channelId_1",
        "ns" : "holo-watcher.youtubes",
        "background" : true
    },
    {
        "v" : 2,
        "key" : {
            "clipper" : 1
        },
        "name" : "clipper_1",
        "ns" : "holo-watcher.youtubes",
        "background" : true
    },
    {
        "v" : 2,
        "key" : {
            "talent" : 1
        },
        "name" : "talent_1",
        "ns" : "holo-watcher.youtubes",
        "background" : true
    },
    {
        "v" : 2,
        "key" : {
            "debut" : 1
        },
        "name" : "debut_1",
        "ns" : "holo-watcher.youtubes",
        "background" : true
    }
]

indexes (for videos)

[
    {
        "v" : 2,
        "key" : {
            "_id" : 1
        },
        "name" : "_id_",
        "ns" : "holo-watcher.videos"
    },
    {
        "v" : 2,
        "unique" : true,
        "key" : {
            "videoId" : 1
        },
        "name" : "videoId_1",
        "ns" : "holo-watcher.videos",
        "background" : true
    },
    {
        "v" : 2,
        "key" : {
            "title" : 1
        },
        "name" : "title_1",
        "ns" : "holo-watcher.videos",
        "background" : true
    },
    {
        "v" : 2,
        "key" : {
            "date" : 1
        },
        "name" : "date_1",
        "ns" : "holo-watcher.videos",
        "background" : true
    }
]

{ explain: true } output for Query 5 (nearly identical to Query 1 and Query 2):

{
    "stages" : [ 
        {
            "$cursor" : {
                "query" : {},
                "sort" : {
                    "date" : -1
                },
                "queryPlanner" : {
                    "plannerVersion" : 1,
                    "namespace" : "holo-watcher.videos",
                    "indexFilterSet" : false,
                    "parsedQuery" : {},
                    "winningPlan" : {
                        "stage" : "FETCH",
                        "inputStage" : {
                            "stage" : "IXSCAN",
                            "keyPattern" : {
                                "date" : 1
                            },
                            "indexName" : "date_1",
                            "isMultiKey" : false,
                            "multiKeyPaths" : {
                                "date" : []
                            },
                            "isUnique" : false,
                            "isSparse" : false,
                            "isPartial" : false,
                            "indexVersion" : 2,
                            "direction" : "backward",
                            "indexBounds" : {
                                "date" : [ 
                                    "[MaxKey, MinKey]"
                                ]
                            }
                        }
                    },
                    "rejectedPlans" : []
                }
            }
        }, 
        {
            "$lookup" : {
                "from" : "youtubes",
                "as" : "youtube",
                "localField" : "youtube",
                "foreignField" : "_id"
            }
        }, 
        {
            "$match" : {
                "youtube.clipper" : {
                    "$eq" : true
                }
            }
        }
    ],
    "ok" : 1.0
}
0

There are 0 best solutions below