Elasticsearch Filter Aggregation vs Query Filter

68 Views Asked by At

I'm trying in Elasticsearch to use filters in aggregations, since I need to use different filters for different aggregations. Currently the filter I have in query I'm putting exactly the same filter in aggregation filter but I'm getting wrong results.

I have products which have 3 levels of categories, named categories, subcategories and subsubcategories. All of the have type nested.

Case 1 using query filter:

Input:

{
    "size": 800,
    "sort": [],
    "query": {
        "bool": {
            "filter": [
                {
                    "nested": {
                        "query": {
                            "bool": {
                                "filter": [
                                    {
                                        "terms": {
                                            "categories.urlTitle.keyword": [
                                                "lingerie"
                                            ],
                                            "boost": 1.0
                                        }
                                    }
                                ],
                                "boost": 1.0
                            }
                        },
                        "path": "categories",
                        "ignore_unmapped": false,
                        "score_mode": "avg",
                        "boost": 1.0
                    }
                },
                {
                    "nested": {
                        "query": {
                            "nested": {
                                "query": {
                                    "bool": {
                                        "filter": [
                                            {
                                                "terms": {
                                                    "categories.subcategories.urlTitle.keyword": [
                                                        "panties"
                                                    ],
                                                    "boost": 1.0
                                                }
                                            }
                                        ],
                                        "boost": 1.0
                                    }
                                },
                                "path": "categories.subcategories",
                                "ignore_unmapped": false,
                                "score_mode": "avg",
                                "boost": 1.0
                            }
                        },
                        "path": "categories",
                        "ignore_unmapped": false,
                        "score_mode": "avg",
                        "boost": 1.0
                    }
                }
            ],
            "boost": 1.0
        }
    },
    "aggregations": {
        "subsubcategories": {
            "nested": {
                "path": "categories.subcategories.subsubcategories"
            },
            "aggregations": {
                "title": {
                    "terms": {
                        "field": "categories.subcategories.subsubcategories.title.keyword_normalized",
                        "size": 1000,
                        "min_doc_count": 1,
                        "shard_min_doc_count": 0,
                        "show_term_doc_count_error": false,
                        "order": [
                            {
                                "_count": "desc"
                            },
                            {
                                "_key": "asc"
                            }
                        ]
                    }
                },
                "urlTitle": {
                    "terms": {
                        "field": "categories.subcategories.subsubcategories.urlTitle.keyword",
                        "size": 1000,
                        "min_doc_count": 1,
                        "shard_min_doc_count": 0,
                        "show_term_doc_count_error": false,
                        "order": [
                            {
                                "_count": "desc"
                            },
                            {
                                "_key": "asc"
                            }
                        ]
                    }
                }
            }
        }
    }
}

Output:

"aggregations": {
        "subsubcategories": {
            "doc_count": 36,
            "urlTitle": {
                "doc_count_error_upper_bound": 0,
                "sum_other_doc_count": 0,
                "buckets": [
                    {
                        "key": "briefs",
                        "doc_count": 22
                    },
                    {
                        "key": "thongs-v-strings",
                        "doc_count": 6
                    },
                    {
                        "key": "brazilians",
                        "doc_count": 4
                    },
                    {
                        "key": "boxers",
                        "doc_count": 2
                    },
                    {
                        "key": "no-show",
                        "doc_count": 2
                    }
                ]
            }

Case 2: Using Aggregation filter

Input:

{
    "size": 800,
    "sort": [],
    "post_filter": {
        "bool": {
            "filter": [
                {
                    "nested": {
                        "query": {
                            "bool": {
                                "filter": [
                                    {
                                        "terms": {
                                            "categories.urlTitle.keyword": [
                                                "lingerie"
                                            ],
                                            "boost": 1.0
                                        }
                                    }
                                ],
                                "boost": 1.0
                            }
                        },
                        "path": "categories",
                        "ignore_unmapped": false,
                        "score_mode": "avg",
                        "boost": 1.0
                    }
                },
                {
                    "nested": {
                        "query": {
                            "nested": {
                                "query": {
                                    "bool": {
                                        "filter": [
                                            {
                                                "terms": {
                                                    "categories.subcategories.urlTitle.keyword": [
                                                        "panties"
                                                    ],
                                                    "boost": 1.0
                                                }
                                            }
                                        ],
                                        "boost": 1.0
                                    }
                                },
                                "path": "categories.subcategories",
                                "ignore_unmapped": false,
                                "score_mode": "avg",
                                "boost": 1.0
                            }
                        },
                        "path": "categories",
                        "ignore_unmapped": false,
                        "score_mode": "avg",
                        "boost": 1.0
                    }
                }
            ],
            "boost": 1.0
        }
    },
    "aggregations": {
        "subsubcategories": {
            "nested": {
                "path": "categories.subcategories.subsubcategories"
            },
            "aggregations": {
                "aggs": {
                    "filter": {
                        "bool": {
                            "filter": [
                                {
                                    "nested": {
                                        "query": {
                                            "bool": {
                                                "filter": [
                                                    {
                                                        "terms": {
                                                            "categories.urlTitle.keyword": [
                                                                "lingerie"
                                                            ]
                                                        }
                                                    }
                                                ]
                                            }
                                        },
                                        "path": "categories"
                                    }
                                }
                            ]
                        }
                    },
                    "aggs": {
                        "title": {
                            "terms": {
                                "field": "categories.subcategories.subsubcategories.title.keyword_normalized",
                                "size": 1000,
                                "min_doc_count": 1,
                                "shard_min_doc_count": 0,
                                "show_term_doc_count_error": false,
                                "order": [
                                    {
                                        "_count": "desc"
                                    },
                                    {
                                        "_key": "asc"
                                    }
                                ]
                            }
                        },
                        "urlTitle": {
                            "terms": {
                                "field": "categories.subcategories.subsubcategories.urlTitle.keyword",
                                "size": 1000,
                                "min_doc_count": 1,
                                "shard_min_doc_count": 0,
                                "show_term_doc_count_error": false,
                                "order": [
                                    {
                                        "_count": "desc"
                                    },
                                    {
                                        "_key": "asc"
                                    }
                                ]
                            }
                        }
                    }
                }
            }
        }
    }
}

Output (which is wrong and different than case 1):

    "aggregations": {
        "subsubcategories": {
            "doc_count": 1273,
            "aggs": {
                "doc_count": 319,
                "urlTitle": {
                    "doc_count_error_upper_bound": 0,
                    "sum_other_doc_count": 0,
                    "buckets": [
                        {
                            "key": "briefs",
                            "doc_count": 125
                        },
                        {
                            "key": "plus-size",
                            "doc_count": 36
                        },
                        {
                            "key": "push-ups",
                            "doc_count": 36
                        },
                        {
                            "key": "brazilians",
                            "doc_count": 18
                        },
                        {
                            "key": "strapless",
                            "doc_count": 17
                        },
                        {
                            "key": "super-push-ups",
                            "doc_count": 12
                        },
                        {
                            "key": "bra-accessories",
                            "doc_count": 10
                        },
                        {
                            "key": "bustiers-and-corsets",
                            "doc_count": 10
                        },
                        {
                            "key": "bralettes",
                            "doc_count": 7
                        },
                        {
                            "key": "singlets",
                            "doc_count": 6
                        },
                        {
                            "key": "women",
                            "doc_count": 6
                        },
                        {
                            "key": "thongs",
                            "doc_count": 5
                        },
                        {
                            "key": "boxers",
                            "doc_count": 4
                        },
                        {
                            "key": "thongs-v-strings",
                            "doc_count": 4
                        },
                        {
                            "key": "short",
                            "doc_count": 3
                        },
                        {
                            "key": "tops-bottoms",
                            "doc_count": 3
                        },
                        {
                            "key": "basic-collection",
                            "doc_count": 2
                        },
                        {
                            "key": "no-show",
                            "doc_count": 2
                        },
                        {
                            "key": "strappy-back",
                            "doc_count": 2
                        },
                        {
                            "key": "bottoms",
                            "doc_count": 1
                        },
                        {
                            "key": "cream",
                            "doc_count": 1
                        },
                        {
                            "key": "dress",
                            "doc_count": 1
                        },
                        {
                            "key": "full-length",
                            "doc_count": 1
                        },
                        {
                            "key": "girls",
                            "doc_count": 1
                        },
                        {
                            "key": "junior-girls",
                            "doc_count": 1
                        },
                        {
                            "key": "maternity",
                            "doc_count": 1
                        },
                        {
                            "key": "maternity-collection",
                            "doc_count": 1
                        },
                        {
                            "key": "skirt",
                            "doc_count": 1
                        },
                        {
                            "key": "sports-leggings",
                            "doc_count": 1
                        },
                        {
                            "key": "teens",
                            "doc_count": 1
                        }
                    ]
                },

I tried many different stuff, but never got the results I expected. What I need is to aggregate 3rd level categories based on terms of 1st level categories or 2nd, and I need the filter criteria to be different than the query filter criteria.

Mapping:

"categories": {
                    "type": "nested",
                    "properties": {
                        "id": {
                            "type": "long"
                        },
                        "subcategories": {
                            "type": "nested",
                            "properties": {
                                "id": {
                                    "type": "long"
                                },
                                "subsubcategories": {
                                    "type": "nested",
                                    "properties": {
                                        "id": {
                                            "type": "long"
                                        },
                                        "title": {
                                            "type": "text",
                                            "fields": {
                                                "keyword": {
                                                    "type": "keyword",
                                                    "ignore_above": 256
                                                },
                                                "keyword_normalized": {
                                                    "type": "keyword",
                                                    "normalizer": "case_insensitive"
                                                }
                                            }
                                        },
                                        "urlTitle": {
                                            "type": "text",
                                            "fields": {
                                                "keyword": {
                                                    "type": "keyword",
                                                    "ignore_above": 256
                                                }
                                            }
                                        }
                                    }
                                },
                                "title": {
                                    "type": "text",
                                    "fields": {
                                        "keyword": {
                                            "type": "keyword",
                                            "ignore_above": 256
                                        },
                                        "keyword_normalized": {
                                            "type": "keyword",
                                            "normalizer": "case_insensitive"
                                        }
                                    }
                                },
                                "urlTitle": {
                                    "type": "text",
                                    "fields": {
                                        "keyword": {
                                            "type": "keyword",
                                            "ignore_above": 256
                                        }
                                    }
                                }
                            }
                        },
                        "title": {
                            "type": "text",
                            "fields": {
                                "keyword": {
                                    "type": "keyword",
                                    "ignore_above": 256
                                },
                                "keyword_normalized": {
                                    "type": "keyword",
                                    "normalizer": "case_insensitive"
                                }
                            }
                        },
                        "urlTitle": {
                            "type": "text",
                            "fields": {
                                "keyword": {
                                    "type": "keyword",
                                    "ignore_above": 256
                                }
                            }
                        }
                    }
                }

Sample Data:

"categories": [
                        {
                            "id": 36,
                            "title": "Lingerie",
                            "urlTitle": "lingerie",
                            "subcategories": [
                                {
                                    "id": 46,
                                    "title": "Panties",
                                    "urlTitle": "panties",
                                    "subsubcategories": [
                                        {
                                            "id": 48,
                                            "title": "Briefs",
                                            "urlTitle": "briefs"
                                        }
                                    ]
                                }
                            ]
                        }
                    ]
0

There are 0 best solutions below