pymongo.errors.OperationFailure: Error in specification

14 Views Asked by At
  • I want to set up a Semantic Search With LangChain and MongoDB
  • I created a free cluster, a database, a collection and a search index.
  • the params (DB_NAME, COLLECTION_NAME, ATLAS_VECTOR_SEARCH_INDEX_NAME )are properly set up.

I get this error : raise OperationFailure(errmsg, code, response, max_wire_version) pymongo.errors.OperationFailure: Error in specification { name: "embedding_SON([('$', '2dsphere')])", key: { embedding: { $: "2dsphere" } } } :: caused by :: Values in v:2 index key pattern cannot be of type object. Only numbers > 0, numbers < 0, and strings are allowed., full error: {'ok': 0.0, 'errmsg': 'Error in specification { name: "embedding_SON([('$', '2dsphere')])", key: { embedding: { $: "2dsphere" } } } :: caused by :: Values in v:2 index key pattern cannot be of type object. Only numbers > 0, numbers < 0, and strings are allowed.', 'code': 67, 'codeName': 'CannotCreateIndex', '$clusterTime': {'clusterTime': Timestamp(1711621244, 1), 'signature': {'hash': b'pV~@\xb12@\x03\x17d\xaf\x1d\xb1aq\xb7IS\xd3\xbe', 'keyId': 7302093767895416833}}, 'operationTime': Timestamp(1711621244, 1)}

impossible to figure out the meaning of this error. Any idea ?

from pymongo import MongoClient, IndexModel, ASCENDING
from bson import SON
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import MongoDBAtlasVectorSearch
from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from pymongo import MongoClient
from dotenv import load_dotenv
import os
import argparse
import warnings

load_dotenv()

# Filter out the UserWarning from langchain
warnings.filterwarnings("ignore",
                        category=UserWarning,
                        module="langchain.chains.llm")

# https://python.langchain.com/docs/integrations/vectorstores/mongodb_atlas
# (getting started with ATLAS)https://www.mongodb.com/docs/atlas/getting-started/
# (Atlas - LangChain Integration) https://www.mongodb.com/docs/atlas/atlas-vector-search/ai-integrations/langchain/#create-the-atlas-vector-search-index


# Process arguments
parser = argparse.ArgumentParser(description='Atlas Vector Search Demo')
parser.add_argument('-q', '--question', help="The question to ask")
args = parser.parse_args()

if args.question is None:
    # Some questions to try...
    query = "How big is the telecom company?"
    query = "Who started AT&T?"
    #query = "Where is AT&T based?"
    #query = "What venues are AT&T branded?"
    #query = "How big is BofA?"
    #query = "When was the financial institution started?"
    #query = "Does the bank have an investment arm?"
    #query = "Where does the bank's revenue come from?"
    #query = "Tell me about charity."
    #query = "What buildings are BofA branded?"

else:
    query = args.question

DB_NAME = "db"
COLLECTION_NAME = "courses"
ATLAS_VECTOR_SEARCH_INDEX_NAME = "vector_search_index"
DB_PASSWORD = os.getenv("DB_PASSWORD")
ATLAS_CONNECTION_STRING = "connection string"

# initialize MongoDB python client
client = MongoClient(ATLAS_CONNECTION_STRING)

collection = client[DB_NAME][COLLECTION_NAME]

# Erstellen Sie das IndexModel für den knnVector
knn_vector_index = IndexModel([("embedding", SON([("$**", "2dsphere")]))])

# Fügen Sie den Index zur Sammlung hinzu
collection.create_indexes([knn_vector_index])

# Load the PDF
loader = TextLoader("./docs/faq.txt")
data = loader.load()

# Split PDF into documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
docs = text_splitter.split_documents(data)


print("\nYour question:")
print("-------------")
print(query)


llm = OpenAI()
compressor = LLMChainExtractor.from_llm(llm)


# Create the vector store for test.books
# vectorStore = MongoDBAtlasVectorSearch.from_documents(
#     documents=docs,
#     embedding=OpenAIEmbeddings(disallowed_special=()),
#     collection=collection,
#     index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME
# )

vectorStore = MongoDBAtlasVectorSearch(
    collection,
    OpenAIEmbeddings(),
    index_name=ATLAS_VECTOR_SEARCH_INDEX_NAME
)

docs = vectorStore.max_marginal_relevance_search(query, K=1)
print(docs[0])


compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectorStore.as_retriever()
)

print("\nAI Response:")
print("-----------")
compressed_docs = compression_retriever.get_relevant_documents(query)
0

There are 0 best solutions below