I'm trying in Elasticsearch to use filters in aggregations, since I need to use different filters for different aggregations. Currently the filter I have in query I'm putting exactly the same filter in aggregation filter but I'm getting wrong results.
I have products which have 3 levels of categories, named categories, subcategories and subsubcategories. All of the have type nested.
Case 1 using query filter:
Input:
{
"size": 800,
"sort": [],
"query": {
"bool": {
"filter": [
{
"nested": {
"query": {
"bool": {
"filter": [
{
"terms": {
"categories.urlTitle.keyword": [
"lingerie"
],
"boost": 1.0
}
}
],
"boost": 1.0
}
},
"path": "categories",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1.0
}
},
{
"nested": {
"query": {
"nested": {
"query": {
"bool": {
"filter": [
{
"terms": {
"categories.subcategories.urlTitle.keyword": [
"panties"
],
"boost": 1.0
}
}
],
"boost": 1.0
}
},
"path": "categories.subcategories",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1.0
}
},
"path": "categories",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1.0
}
}
],
"boost": 1.0
}
},
"aggregations": {
"subsubcategories": {
"nested": {
"path": "categories.subcategories.subsubcategories"
},
"aggregations": {
"title": {
"terms": {
"field": "categories.subcategories.subsubcategories.title.keyword_normalized",
"size": 1000,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
},
"urlTitle": {
"terms": {
"field": "categories.subcategories.subsubcategories.urlTitle.keyword",
"size": 1000,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
}
}
}
}
}
Output:
"aggregations": {
"subsubcategories": {
"doc_count": 36,
"urlTitle": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "briefs",
"doc_count": 22
},
{
"key": "thongs-v-strings",
"doc_count": 6
},
{
"key": "brazilians",
"doc_count": 4
},
{
"key": "boxers",
"doc_count": 2
},
{
"key": "no-show",
"doc_count": 2
}
]
}
Case 2: Using Aggregation filter
Input:
{
"size": 800,
"sort": [],
"post_filter": {
"bool": {
"filter": [
{
"nested": {
"query": {
"bool": {
"filter": [
{
"terms": {
"categories.urlTitle.keyword": [
"lingerie"
],
"boost": 1.0
}
}
],
"boost": 1.0
}
},
"path": "categories",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1.0
}
},
{
"nested": {
"query": {
"nested": {
"query": {
"bool": {
"filter": [
{
"terms": {
"categories.subcategories.urlTitle.keyword": [
"panties"
],
"boost": 1.0
}
}
],
"boost": 1.0
}
},
"path": "categories.subcategories",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1.0
}
},
"path": "categories",
"ignore_unmapped": false,
"score_mode": "avg",
"boost": 1.0
}
}
],
"boost": 1.0
}
},
"aggregations": {
"subsubcategories": {
"nested": {
"path": "categories.subcategories.subsubcategories"
},
"aggregations": {
"aggs": {
"filter": {
"bool": {
"filter": [
{
"nested": {
"query": {
"bool": {
"filter": [
{
"terms": {
"categories.urlTitle.keyword": [
"lingerie"
]
}
}
]
}
},
"path": "categories"
}
}
]
}
},
"aggs": {
"title": {
"terms": {
"field": "categories.subcategories.subsubcategories.title.keyword_normalized",
"size": 1000,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
},
"urlTitle": {
"terms": {
"field": "categories.subcategories.subsubcategories.urlTitle.keyword",
"size": 1000,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [
{
"_count": "desc"
},
{
"_key": "asc"
}
]
}
}
}
}
}
}
}
}
Output (which is wrong and different than case 1):
"aggregations": {
"subsubcategories": {
"doc_count": 1273,
"aggs": {
"doc_count": 319,
"urlTitle": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "briefs",
"doc_count": 125
},
{
"key": "plus-size",
"doc_count": 36
},
{
"key": "push-ups",
"doc_count": 36
},
{
"key": "brazilians",
"doc_count": 18
},
{
"key": "strapless",
"doc_count": 17
},
{
"key": "super-push-ups",
"doc_count": 12
},
{
"key": "bra-accessories",
"doc_count": 10
},
{
"key": "bustiers-and-corsets",
"doc_count": 10
},
{
"key": "bralettes",
"doc_count": 7
},
{
"key": "singlets",
"doc_count": 6
},
{
"key": "women",
"doc_count": 6
},
{
"key": "thongs",
"doc_count": 5
},
{
"key": "boxers",
"doc_count": 4
},
{
"key": "thongs-v-strings",
"doc_count": 4
},
{
"key": "short",
"doc_count": 3
},
{
"key": "tops-bottoms",
"doc_count": 3
},
{
"key": "basic-collection",
"doc_count": 2
},
{
"key": "no-show",
"doc_count": 2
},
{
"key": "strappy-back",
"doc_count": 2
},
{
"key": "bottoms",
"doc_count": 1
},
{
"key": "cream",
"doc_count": 1
},
{
"key": "dress",
"doc_count": 1
},
{
"key": "full-length",
"doc_count": 1
},
{
"key": "girls",
"doc_count": 1
},
{
"key": "junior-girls",
"doc_count": 1
},
{
"key": "maternity",
"doc_count": 1
},
{
"key": "maternity-collection",
"doc_count": 1
},
{
"key": "skirt",
"doc_count": 1
},
{
"key": "sports-leggings",
"doc_count": 1
},
{
"key": "teens",
"doc_count": 1
}
]
},
I tried many different stuff, but never got the results I expected. What I need is to aggregate 3rd level categories based on terms of 1st level categories or 2nd, and I need the filter criteria to be different than the query filter criteria.
Mapping:
"categories": {
"type": "nested",
"properties": {
"id": {
"type": "long"
},
"subcategories": {
"type": "nested",
"properties": {
"id": {
"type": "long"
},
"subsubcategories": {
"type": "nested",
"properties": {
"id": {
"type": "long"
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"keyword_normalized": {
"type": "keyword",
"normalizer": "case_insensitive"
}
}
},
"urlTitle": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"keyword_normalized": {
"type": "keyword",
"normalizer": "case_insensitive"
}
}
},
"urlTitle": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"keyword_normalized": {
"type": "keyword",
"normalizer": "case_insensitive"
}
}
},
"urlTitle": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
Sample Data:
"categories": [
{
"id": 36,
"title": "Lingerie",
"urlTitle": "lingerie",
"subcategories": [
{
"id": 46,
"title": "Panties",
"urlTitle": "panties",
"subsubcategories": [
{
"id": 48,
"title": "Briefs",
"urlTitle": "briefs"
}
]
}
]
}
]