I'm using custom skills to OCR and chunk data from images. In parallel to this, I'm pulling additional context data from a CSV file and trying to add this to each search result as additional meta data. I can see the metadata on the parent item but not on the child items. Is there a way to map these properly?
These are the mappings I'm using:
{
"outputFieldMappings": [
{
"sourceFieldName": "/document/metadata/special_code",
"targetFieldName": "metadata_special_code"
},
{
"sourceFieldName": "/document/metadata/document_type",
"targetFieldName": "metadata_document_type"
},
{
"sourceFieldName": "/document/metadata/location",
"targetFieldName": "metadata_location"
}
]
}
And this is the output I'm seeing in the search, with an example of a parent item and a child item. The metadata is absent in the child item, but I'd like this to have the metadata here too.
{
"@search.score": 0.01515151560306549,
"@search.rerankerScore": 0.8941482305526733,
"@search.captions": [
{
"text": "sample file.pdf.",
"highlights": "<em>sample</em> file.pdf."
}],
"chunk_id":"<parent id>",
"parent_id": null,
"chunk": null,
"title": "sample file.pdf",
"metadata_special_code": "12345678",
"metadata_document_type": "pdf",
"metadata_location": "test-store/sample file.pdf"
},
{
"@search.score": 0.032786883413791656,
"@search.rerankerScore": 0.9278492331504822,
"@search.captions": [
{
"text": "sample file.pdf. <text here>"
}],
"chunk_id":"<chunk id>",
"parent_id":"<parent id>",
"chunk": "<text here>",
"title": "sample file.pdf",
"metadata_special_code": null,
"metadata_document_type": null,
"metadata_location": null
}
Edit: Adding Index and Indexer details as per request
Index Definition
{
"@odata.context": "https://search.windows.net/$metadata#indexes/$entity",
"@odata.etag": "",
"name": "test",
"defaultScoringProfile": null,
"fields": [
{
"name": "chunk_id",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"sortable": true,
"facetable": true,
"key": true,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": "keyword",
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"synonymMaps": []
},
{
"name": "parent_id",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"sortable": true,
"facetable": true,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"synonymMaps": []
},
{
"name": "chunk",
"type": "Edm.String",
"searchable": true,
"filterable": false,
"retrievable": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"synonymMaps": []
},
{
"name": "title",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"synonymMaps": []
},
{
"name": "vector",
"type": "Collection(Edm.Single)",
"searchable": true,
"filterable": false,
"retrievable": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": 1536,
"vectorSearchProfile": "full-skill-test-profile",
"synonymMaps": []
},
{
"name": "metadata_cutomer_code",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"sortable": true,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"synonymMaps": []
},
{
"name": "metadata_document_type",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"sortable": true,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": "standard.lucene",
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"synonymMaps": []
},
{
"name": "metadata_content",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"sortable": true,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": "standard.lucene",
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"synonymMaps": []
},
{
"name": "metadata_customer_code",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"sortable": true,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": "standard.lucene",
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"synonymMaps": []
}
],
"scoringProfiles": [],
"corsOptions": null,
"suggesters": [],
"analyzers": [],
"normalizers": [],
"tokenizers": [],
"tokenFilters": [],
"charFilters": [],
"encryptionKey": null,
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
"k1": null,
"b": null
},
"semantic": {
"defaultConfiguration": "full-skill-test-semantic-configuration",
"configurations": [
{
"name": "full-skill-test-semantic-configuration",
"prioritizedFields": {
"titleField": {
"fieldName": "title"
},
"prioritizedContentFields": [
{
"fieldName": "chunk"
}
],
"prioritizedKeywordsFields": []
}
}
]
},
"vectorSearch": {
"algorithms": [
{
"name": "full-skill-test-algorithm",
"kind": "hnsw",
"hnswParameters": {
"metric": "cosine",
"m": 4,
"efConstruction": 400,
"efSearch": 500
},
"exhaustiveKnnParameters": null
}
],
"profiles": [
{
"name": "full-skill-test-profile",
"algorithm": "full-skill-test-algorithm",
"vectorizer": "full-skill-test-vectorizer"
}
],
"vectorizers": [
{
"name": "full-skill-test-vectorizer",
"kind": "azureOpenAI",
"azureOpenAIParameters": {
"resourceUri": "https://openai.azure.com",
"deploymentId": "text-embedding-ada-002",
"apiKey": "<redacted>",
"authIdentity": null
},
"customWebApiParameters": null
}
]
}
}
Indexer Definition
{
"@odata.context": "https://search.windows.net/$metadata#indexers/$entity",
"@odata.etag": "",
"name": "indexer",
"description": null,
"dataSourceName": "datasource",
"skillsetName": "skillset",
"targetIndexName": "index",
"disabled": null,
"schedule": null,
"parameters": {
"batchSize": null,
"maxFailedItems": null,
"maxFailedItemsPerBatch": null,
"base64EncodeKeys": null,
"configuration": {
"dataToExtract": "contentAndMetadata",
"parsingMode": "default",
"imageAction": "generateNormalizedImagePerPage",
"allowSkillsetToReadFileData": true
}
},
"fieldMappings": [
{
"sourceFieldName": "metadata_storage_name",
"targetFieldName": "title",
"mappingFunction": null
}
],
"outputFieldMappings": [
{
"sourceFieldName": "/document/ref_metadata/special_code",
"targetFieldName": "metadata_special_code"
},
{
"sourceFieldName": "/document/ref_metadata/document_type",
"targetFieldName": "metadata_document_type"
},
{
"sourceFieldName": "/document/ref_metadata/location",
"targetFieldName": "metadata_location"
}
],
"cache": null,
"encryptionKey": null
}
Skillset
{
"@odata.type": "#Microsoft.Skills.Custom.WebApiSkill",
"name": "#2",
"description": "",
"context": "/document",
"uri": "https://functionapp.azurewebsites.net/api/MetadataOutput?code=<code>",
"httpMethod": "POST",
"timeout": "PT3M50S",
"batchSize": 1,
"degreeOfParallelism": 1,
"inputs": [
{
"name": "document",
"source": "/document/metadata_storage_name"
}
],
"outputs": [
{
"name": "ref_metadata",
"targetName": "output_metadata"
}
],
"httpHeaders": {}
}