I would like to implement something like this - https://github.com/wombyz/gpt4all_langchain_chatbots/blob/main/custom_knowledge_chatbot.py
But since GGML models are slower and inefficient, I have decided to go with a gguf model (https://huggingface.co/TheBloke/airoboros-l2-7B-gpt4-2.0-GGUF) and instead of LlamaCPPEmbeddings (https://huggingface.co/Pi3141/alpaca-native-7B-ggml/commit/397e872bf4c83f4c642317a5bf65ce84a105786e), I am using SentenceTransformer("all-mpnet-base-v2")
This is my code -
from pygpt4all.models.gpt4all import GPT4All
from pprint import pprint
#import streamlit as st
from langchain import PromptTemplate, LLMChain
from langchain.document_loaders import TextLoader
from langchain.embeddings import LlamaCppEmbeddings
from langchain.llms import GPT4All
from langchain.text_splitter import RecursiveCharacterTextSplitter
#from langchain.callbacks.base import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores.faiss import FAISS
from llama_index import download_loader
from langchain.document_loaders import UnstructuredURLLoader
from langchain.docstore.document import Document
from unstructured.cleaners.core import remove_punctuation,clean,clean_extra_whitespace
import PyPDF2
import re
from langchain.chains import ConversationalRetrievalChain
from pathlib import Path
from typing import List, Tuple
import requests
from bs4 import BeautifulSoup
gpt4all_path = './models/airoboros-l2-7B-gpt4-2.0.Q4_K_M.gguf'
llm = GPT4All(model=gpt4all_path,max_tokens=2048, verbose=True,temp=0.1)
def load_documents() -> List[str]:
loader = TextLoader('./docs/cleaned_q_and_a.txt')
documents = loader.load()
texts = [doc.page_content for doc in documents]
return texts
def load_meta_data_documents() -> List:
loader = TextLoader('./docs/cleaned_q_and_a.txt')
return loader.load()
def create_index(texts: List[str], embeddings_model: SentenceTransformer,chunks:List) -> FAISS:
# Generate embeddings for the texts
embeddings = embeddings_model.encode(texts, show_progress_bar=True)
metadatas = [doc.metadata for doc in chunks]
# Prepare text_embeddings as a list of tuples for FAISS.from_embeddings
text_embeddings = [(text, embedding) for text, embedding in zip(texts,
embeddings)]
# Call FAISS.from_embeddings
search_index = FAISS.from_embeddings(text_embeddings=text_embeddings,
embedding=embeddings_model, metadatas=metadatas)
return search_index
docs = load_documents()
metadata = load_meta_data_documents()
vector_store = create_index(docs,embeddings_model,metadata)
# Save Index (use this to save the index for later use)
# Comment the line below after running once successfully (IMPORTANT)
vector_store.save_local("q_and_a_index")
index = FAISS.load_local("./q_and_a_index/", embeddings_model)
qa = ConversationalRetrievalChain.from_llm(llm,index.as_retriever(),max_tokens_limit=500)
chat_history=[]
print("Custom Knowledge ChatBot")
while True:
query = input("Please enter your question: ")
if query.lower() == 'exit':
break
#processed_query = embeddings_model.encode([query])
result = qa({"question":query,"chat_history":chat_history})
pattern = r'Helpful Answer:.*'
match = re.search(pattern, result['answer'], re.DOTALL)
if match:
# Only display the matched part which is the relevant answer
print(match.group())
else:
print("Answer:", result['answer'])
This is the Error I am getting.
I tried encoding the query
processed_query = embeddings_model.encode([query])
result = qa({"question":processed_query,"chat_history":chat_history})
When I tried encoding the query this is the error I received -
trans_features = {'input_ids': features['input_ids'], 'attention_mask': features['attention_mask']}
if 'token_type_ids' in features:
rans_features['token_type_ids'] = features['token_type_ids']
IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices
packages in my environment - https://pastebin.com/L4wqnwyi