I started using qdrant today, I am very new to this subject.
I am working on similarity search in images and I added my images to qdrant, but later when I want to add other images to my database, it deletes the existing one and creates a new database, so I lose my old images.
Can I add as many images as I want to my existing collection at any time, can you help me with this? I share my code with you. Thank you for your interest
class ImageEmbedding:
def image_to_database(self):
base_directory = "Images"
all_image_urls = os.listdir(base_directory)
sample_image_urls = all_image_urls
sample_image_urls = list(map(lambda item: f"{base_directory}/{item}",sample_image_urls))
payloads = DataFrame.from_records({"image_url": sample_image_urls})
payloads["model_id"] = 2
images = list(map(lambda el:Image.open(el),payloads["image_url"]))
target_width = 256
def resize_image(image_url):
pil_image = Image.open(image_url)
image_aspect_ratio = pil_image.width / pil_image.height
resized_pil_image = pil_image.resize([target_width,math.floor(target_width * image_aspect_ratio)])
return resized_pil_image
def convert_image_to_base64(pil_image):
image_data = BytesIO()
pil_image.save(image_data,format="JPEG")
base64_string = base64.b64encode(image_data.getvalue()).decode("utf-8")
return base64_string
resized_images = list(map(lambda el: resize_image(el), sample_image_urls))
base64_strings = list(map(lambda el: convert_image_to_base64(el), resized_images))
payloads["base64"] = base64_strings
processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")
inputs = processor(images,return_tensors="pt",)
outputs = model(**inputs)
embeddings = outputs.logits
embedding_length = len(embeddings[0])
load_dotenv()
qclient = QdrantClient(
url = os.getenv('QDRANT_DB_URL'),
api_key = os.getenv('QDRANT_API_KEY'),
)
collection_name = "die_models_images"
collection = qclient.recreate_collection(
collection_name=collection_name,
vectors_config=VectorParams(
size=embedding_length,
distance=Distance.COSINE
)
)
payload_dicts = payloads.to_dict(orient="records")
records = [
models.Record(
id=idx,
payload=payload_dicts[idx],
vector=embeddings[idx]
)
for idx, _ in enumerate(payload_dicts)
]
qclient.upload_records(
collection_name=collection_name,
records=records
)
You're using
recreate_collection
, which will delete and create the specified collection every time it is invoked.Instead, you probably want to use
create_collection
and only call it once:You must also make sure to use unique IDs for each record you insert.