I have an issue in using the FAISS.from_documents(docs, embeddings)
function in a for loop as the text document I want to load is in a huge chunk so I thought I could upload separate chunks in txt file 1-86 but I keep getting error in the FAISS.from_document
part.
This is my code:
#export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:120
#25 25 2 hr attempt
from langchain.document_loaders import TextLoader
import textwrap
import os
import PyPDF2
from langchain.text_splitter import CharacterTextSplitter
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFaceHub
import time
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
faiss_objects = [] # Create an empty list to store FAISS objects
num = 4
queryText = "what is ISBN number"
for i in range(0,1):
#------------------------------------------------------------------------------------------------------------------------------------------
os.environ["HUGGINGFACEHUB_API_TOKEN"] = ' '
db = [] # Initialize an empty list
dir = "/home/r20/Downloads/Hugging_dace/pdf2txt/TXT/ 4.txt"
v = "/home/r20/Downloads/Hugging_dace/pdf2txt/TXT/ " + str(i) + ".txt"
var_name = f"variable_{i}"
globals()[var_name] = 1
loader = TextLoader(v)
document = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(document)
embeddings = HuggingFaceEmbeddings()
#print(embeddings)
#print(FAISS.from_documents(docs, embeddings))
llm= HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.8, "max_length":512})
chain = load_qa_chain(llm, chain_type="stuff")
globals()[var_name] = FAISS.add_item(docs, embeddings)
'''docsResult = globals()[var_name].similarity_search(queryText)
print(chain.run(input_documents = docsResult, question = queryText))
time.sleep(2)
'''
#queryText = (str(input("TYPE YOUR QUERY "))+ "if don't know just answer: 0")
print('done')