I executed the following code on Colab, but the corresponding image content did not display correctly.
from llama_index.core import SimpleDirectoryReader, GPTVectorStoreIndex
from llama_index.readers.file import ImageReader
from llama_index.core.response.notebook_utils import display_response
from llama_index.core.indices.query.query_transform.base import ImageOutputQueryTransform
from llama_index.core.query_engine import TransformQueryEngine
image_parser = ImageReader(keep_image=True, parse_text=True)
file_extractor = SimpleDirectoryReader.supported_suffix_fn()
file_extractor.update(
{
".jpg": image_parser,
".png": image_parser,
".jpeg": image_parser,
})
# NOTE: we add filename as metadata for all documents
filename_fn = lambda filename: {'file_name': filename}
receipt_reader = SimpleDirectoryReader(
input_dir='./data/receipts',
file_extractor=file_extractor,
file_metadata=filename_fn,
)
receipt_documents = receipt_reader.load_data()
receipts_index = GPTVectorStoreIndex.from_documents(receipt_documents)
query_engine = TransformQueryEngine(query_engine=receipts_index.as_query_engine(similarity_top_k=1),
query_transform=ImageOutputQueryTransform(width=400))
receipts_response = query_engine.query(
'When was the last time I went to McDonald\'s and how much did I spend. \
Also show me the receipt from my visit.'
)
print(type(receipts_response))
print(receipts_response)
print("1. ", receipts_response.response)
print("2. ", receipts_response.source_nodes)
print("3. ", receipts_response.metadata)
display_response(receipts_response)
The ImageOutputQueryTransform class is not accompanied by usage instructions in the official documentation, so I'm uncertain if my usage is correct.
Can someone help me figure out why this piece of code is not displaying the image correctly? If my way of using the ImageOutputQueryTransform class is incorrect, what is the right way to use it?