Problem initializing ChatGoogleGenerativeAI class with a 'NoneType' object error

88 Views Asked by At

I need to use convert 2gb related research paper pdf into Question (prompt) and answer pair for that im using Gemini api with RAG system.

But while ruuning my script to generate them this is error:

ValidationError: 1 validation error for ChatGoogleGenerativeAI
__root__
  'NoneType' object does not support item assignment (type=type_error)

Details:

  • I have installed the required libraries including langchain, openai, python-dotenv, pypdf, faiss-cpu, chromadb, langchain_google_genai, and google-generativeai.
  • I have set up my Gemini API key using os.environ["GOOGLE_API_KEY"].
  • The directory path to my PDFs is correctly specified.
  • I'm using the ChatGoogleGenerativeAI class to initialize a chat model with the "gemini-pro" model.

Code:

import os
import json
import re
import google.generativeai as genai
import chromadb
from chromadb import Documents, EmbeddingFunction, Embeddings
from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
from langchain.text_splitter import TokenTextSplitter
from langchain.docstore.document import Document
from langchain.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain

# Set your Gemini API key (replace with your actual key)
os.environ["GOOGLE_API_KEY"] = "MY_KEY"

# Set directory path to your PDFs
directory_path = '/kaggle/input/deepmind-research-papers'  # Replace with the actual path

# Set up initial Gemini Chat model
chat_gemini = ChatGoogleGenerativeAI(model="gemini-pro")


# Function to load a PDF file
def load_pdf(file_path):
    from pypdf import PdfReader

    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()

    return text

# Function to split text into chunks
def split_text(text: str):
    split_text = re.split('\n \n', text)
    return [i for i in split_text if i != ""]

# Class for RAGE Embedding Function (assuming chromadb is installed)
class RAGEmbeddingFunction(EmbeddingFunction):
    def __call__(self, input: Documents) -> Embeddings:
        genai.configure(api_key=gemini_api_key)
        model = "models/embedding-001"
        title = "Custom query"
        return genai.embed_content(model=model,
                                   content=input,
                                   task_type="retrieval_document",
                                   title=title)["embedding"]

# Function to create or load a Chroma DB
def create_chroma_db(documents, path, name):
    chroma_client = chromadb.PersistentClient(path=path)
    try:
        db = chroma_client.get_collection(name=name, embedding_function=RAGEmbeddingFunction())
        print(f"Loaded existing Chroma collection: {name}")
    except chromadb.CollectionNotFoundError:
        db = chroma_client.create_collection(name=name, embedding_function=RAGEmbeddingFunction())
        for i, d in enumerate(documents):
            db.add(documents=d, ids=str(i))
        print(f"Created new Chroma collection: {name}")
    return db, name

# Function to load a Chroma collection
def load_chroma_collection(path, name):
    chroma_client = chromadb.PersistentClient(path=path)
    db = chroma_client.get_collection(name=name, embedding_function=RAGEmbeddingFunction())
    return db

# Function to retrieve relevant passage
def get_relevant_passage(query, db, n_results):
    passage = db.query(query_texts=[query], n_results=n_results)['documents'][0]
    return passage

# Function to construct the RAG prompt
def make_rag_prompt(query, relevant_passage):
    escaped = relevant_passage.replace("'", "").replace('"', "").replace("\n", " ")
    prompt = ("""You are a helpful and informative bot that answers questions using text from the reference passage included below. \
    Be sure to respond in a complete sentence, being comprehensive, including all relevant background information. \
    However, you are talking to a non-technical audience, so be sure to break down complicated concepts and \
    strike a friendly and conversational tone. \
    If the passage is irrelevant to the answer, you may ignore it.
    QUESTION: '{query}'
    PASSAGE: '{relevant_passage}'
•
    ANSWER:
    """).format(query=query, relevant_passage=escaped)
    return prompt

# Function to generate the answer using Gemini
def generate_answer(prompt):
    genai.configure(api_key=gemini_api_key)
    model = genai.GenerativeModel('gemini-pro')  # Assuming Gemini model is available
    answer = model.generate_content(prompt)
    return answer.text

# Function to generate prompts and answers for each PDF file
def generate_prompts_and_answers(file_path):
    pdf_text = load_pdf(file_path)
    chunked_text = split_text(pdf_text)

    # Create or load Chroma DB
    db, name = create_chroma_db(documents=chunked_text,
                                path="/path/to/persistent/directory",
                                name="rag_experiment")

    ques_gen_chain = load_summarize_chain(llm=chat_gemini,
                                          chain_type="refine",
                                          verbose=True,
                                          question_prompt=PROMPT_QUESTIONS,
                                          refine_prompt=REFINE_PROMPT_QUESTIONS)

    prompts_and_answers = []
    for chunk in chunked_text:
        document_ques_gen = [Document(page_content=chunk)]
        ques = ques_gen_chain.run(document_ques_gen)

        for question in ques.split("\n"):
            relevant_text = get_relevant_passage(query=question, db=db, n_results=3)
            prompt = make_rag_prompt(query=question, relevant_passage="".join(relevant_text))
            answer = generate_answer(prompt)
            prompts_and_answers.append({"question": question, "answer": answer})

    return prompts_and_answers

# Prepare prompts template
prompt_template = """
You are an expert at creating questions based on coding materials and documentation.
Your goal is to prepare a coder or programmer for their exam and coding tests.
You do this by asking questions about the text below:
\------------
{text}
\------------
Create an array of questions and an array of corresponding answers that will prepare the coders or programmers for their tests.
Make sure not to lose any important information.
​
Remember you must only respond in array
QUESTIONS:
"""

PROMPT_QUESTIONS = PromptTemplate(template=prompt_template, input_variables=["text"])

refine_template = ("""
You are an expert at creating practice questions based on coding material and documentation.
Your goal is to help a coder or programmer prepare for a coding test.
We have received some practice questions to a certain extent: {existing_answer}.
We have the option to refine the existing questions or add new ones.
(only if necessary) with some more context below.
\------------
{text}
\------------
Given the new context, refine the original questions in English.
If the context is not helpful, please provide the original questions.
QUESTIONS:
"""
)

REFINE_PROMPT_QUESTIONS = PromptTemplate(
    input_variables=["existing_answer", "text"],
    template=refine_template,
)

# Process each PDF file in the directory
for file_name in os.listdir(directory_path):
    if file_name.endswith('.pdf'):
        file_path = os.path.join(directory_path, file_name)

        try:
            prompts_and_answers_all = generate_prompts_and_answers(file_path)

            # Save prompts and answers to a JSON file after processing each PDF
            with open(f"{file_name}_prompts_and_answers.json", "w", encoding="utf-8") as jsonfile:
                json.dump({"prompts_and_answers": prompts_and_answers_all}, jsonfile, indent=4)

            print(f"Processed {file_name} successfully.")

        except Exception as e:
            print(f"Error processing {file_name}: {str(e)}")
            continue  # Continue to the next PDF file if an error occurs

Troubleshooting Steps Taken:

  1. I have double-checked the definition of the ChatGoogleGenerativeAI class and ensured that it can be instantiated with the specified arguments.
  2. I verified that the "gemini-pro" model is available and properly installed.
  3. I checked for any typos or incorrect imports in the code.
  4. I attempted to debug the issue by adding print statements but haven't been able to pinpoint the exact cause of the error.

Expected Outcome:

I expect to successfully initialize the ChatGoogleGenerativeAI class with the "gemini-pro" model argument without encountering the 'NoneType' object does not support item assignment error.

0

There are 0 best solutions below