Keep Vector store persistent even after close browser tab (Document Question and Answer Chatbot)

Question

Keep Vector store persistent even after close browser tab (Document Question and Answer Chatbot)

16 Views Asked by Jason At 26 March 2024 at 07:24

Here's my python file which is a streamlit app that you can upload documents and then question and answer, basically a standard ask document chatbot.

how should i modify it so that even if i close the browser tab and open it again, the vector store is saved and the user doesn't have to re-upload the files? in fact, the user is able to continously upload more files.

import os
from apikey import apikey
import streamlit as st 

from langchain.chat_models import ChatOpenAI 
from langchain.text_splitter import RecursiveCharacterTextSplitter 
from langchain.embeddings.openai import OpenAIEmbeddings 
from langchain.vectorstores import Chroma 
from langchain.chains import ConversationalRetrievalChain

# Set the OpenAI API key in the environment variables for authentication.
os.environ["OPENAI_API_KEY"] = apikey

# Define a function to clear the conversation history stored in Streamlit's session state.
def clear_history():
   if 'history' in st.session_state:
      del st.session_state['history']

# Set the title of the web page displayed to the user.
st.title('Chat with Document')

# Create a file uploader widget allowing users to upload documents in PDF, DOCX, or TXT format.
uploaded_file = st.file_uploader('Upload file:',type=['pdf','docx', 'txt'])

# Create a button that, when clicked, triggers the clear_history function to reset the session.
add_file = st.button('Add File', on_click=clear_history)

# Check if a file has been uploaded and the 'Add File' button has been pressed.
if uploaded_file and add_file:
    with st.spinner('Reading, chunking and embedding file...'):
        # Read the uploaded file's content as bytes.
        bytes_data = uploaded_file.read()
        # Construct a file path to save the uploaded file temporarily.
        file_name = os.path. join('./', uploaded_file.name)
        # Save the uploaded file to the constructed path.
        with open (file_name, 'wb') as f:
            f.write(bytes_data)

        # Determine the file's extension to decide on the appropriate loader.
        name, extension = os.path.splitext(file_name)

        # Select the loader based on the file extension.
        if extension == '.pdf':
            from langchain.document_loaders import PyPDFLoader
            loader = PyPDFLoader(file_name)
        elif extension == '.docx':
            from langchain.document_loaders import Docx2txtLoader
            loader = Docx2txtLoader(file_name)
        elif extension == '.txt':
            from langchain.document_loaders import TextLoader 
            loader = TextLoader(file_name)
        else:
            st.write('Document format is not supported!')
        
        # Load the document using the selected loader.
        documents = loader.load()

        # Initialize the text splitter and split the loaded document into manageable chunks.
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        chunks = text_splitter.split_documents(documents)
        # Initialize embeddings and vector store for the document chunks.
        embeddings = OpenAIEmbeddings()
        vector_store = Chroma.from_documents(chunks, embeddings)

        # Initialize a ChatOpenAI instance with GPT-3.5 turbo model and temperature set to 0 for objective responses.
        llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0)
        # Create a retriever from the vector store for document retrieval.
        retriever=vector_store.as_retriever()

        # Initialize a conversational retrieval chain with the language model and the retriever.
        crc = ConversationalRetrievalChain.from_llm(llm, retriever)

        # Store the initialized conversational retrieval chain in Streamlit's session state.
        st.session_state.crc = crc

        # Display a success message once the file has been processed.
        st.success('File uploaded, chunked and embedded successfully')

# Create an input widget for users to type in their questions.
question = st.text_input('Input your question')

# Process the question if it has been asked.
if question:
    if 'crc' in st.session_state:
        crc = st.session_state.crc

        # Initialize the chat history in session state if it doesn't exist.
        if 'history' not in st.session_state:
            st.session_state['history'] = []
        
        # Run the conversational retrieval chain with the current question and chat history.
        response = crc.run({
            'question':question, 
            'chat_history': st.session_state['history']
        })
        
        # Append the current question and response to the chat history.
        st.session_state['history'].append((question,response)) 
        # Display the response.
        st.write(response)
        # Iterate through the chat history to display previous questions and answers.
        for prompts in st.session_state['history']:
            st.write("Question: " + prompts[0]) 
            st.write("Answer: " + prompts[1])

Original Q&A

Keep Vector store persistent even after close browser tab (Document Question and Answer Chatbot)

There are 0 best solutions below

Related Questions in LANGCHAIN

Related Questions in VECTORSTORE

Trending Questions

Popular # Hahtags

Popular Questions