FAISS.from_documents(docs, embeddings) in for loop

749 Views Asked by At

I have an issue in using the FAISS.from_documents(docs, embeddings) function in a for loop as the text document I want to load is in a huge chunk so I thought I could upload separate chunks in txt file 1-86 but I keep getting error in the FAISS.from_document part.

This is my code:

#export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:120
#25 25 2 hr attempt

from langchain.document_loaders import TextLoader 
import textwrap
import os
import PyPDF2
from langchain.text_splitter import CharacterTextSplitter
import os
from langchain.embeddings import HuggingFaceEmbeddings 
from langchain.llms import HuggingFaceHub
import time
from langchain.document_loaders import TextLoader 
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain

faiss_objects = []  # Create an empty list to store FAISS objects
num = 4
queryText = "what is ISBN number"

for i in range(0,1):
    #------------------------------------------------------------------------------------------------------------------------------------------

    os.environ["HUGGINGFACEHUB_API_TOKEN"] = ' '

    db = []  # Initialize an empty list

    dir = "/home/r20/Downloads/Hugging_dace/pdf2txt/TXT/ 4.txt"
    v = "/home/r20/Downloads/Hugging_dace/pdf2txt/TXT/ " + str(i) + ".txt"

    var_name = f"variable_{i}"
    globals()[var_name] = 1

    loader = TextLoader(v)
    document = loader.load()

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    docs = text_splitter.split_documents(document)

    embeddings = HuggingFaceEmbeddings()
    #print(embeddings)
    #print(FAISS.from_documents(docs, embeddings))

    llm= HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.8, "max_length":512})
    chain = load_qa_chain(llm, chain_type="stuff")

    globals()[var_name] = FAISS.add_item(docs, embeddings) 

'''docsResult = globals()[var_name].similarity_search(queryText)
print(chain.run(input_documents = docsResult, question = queryText))
time.sleep(2)
'''
#queryText = (str(input("TYPE YOUR QUERY  "))+ "if don't know just answer: 0")

print('done')
0

There are 0 best solutions below