I'm currently haveing troubles with executing load_summarize_chain with custom prompt on the text which is splitted into chunks using chain_type='map_reduce'.
The code I wrote:
def chunk_data(docs, chunk_size=800, chunk_overlap=50) -> list:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
split_docs = text_splitter.split_documents(docs)
return split_docs
documents = chunk_data(docs=doc)
llm_summary = ChatOpenAI(temperature=0.3, model_name="gpt-3.5-turbo-0125")
mapreduce_prompt = """
You are an expert in Data Science and Data Analytics. You can easilty understand Data Science scientific papers.
Please summarize the following text:
Text: `{documents}`
Summary:
"""
map_prompt_template = PromptTemplate(input_variables=['documents'],
template=mapreduce_prompt
)
final_comb_prompt = """
You are an expert in Data Science and Data Analytics. You can easilty understand Data Science scientific papers.
Now I want you to take a deep breath and provide a final summary of the entire text with these important points.
Add a Generic Motivation Title.
Start with comprehensive summary. Limit yourself with 250 word. In the end add key takeaways in up to 5 bullit points.
Text: `{documents}`
"""
final_comb_prompt_template = PromptTemplate(input_variables=['documents'],
template=final_comb_prompt)
summary_chain = load_summarize_chain(
llm=llm_summary,
chain_type='map_reduce',
map_prompt=map_prompt_template,
combine_prompt=final_comb_prompt_template,
verbose=False
)
The error which I get is:
---------------------------------------------------------------------------
ValidationError Traceback (most recent call last)
Cell In[80], line 1
----> 1 summary_chain = load_summarize_chain(
2 llm=llm_summary,
3 chain_type='map_reduce',
4 map_prompt=map_prompt_template,
5 combine_prompt=final_comb_prompt_template,
6 verbose=False
7 )
File c:\Users...\venv\Lib\site-packages\langchain\chains\summarize\__init__.py:160, in load_summarize_chain(llm, chain_type, verbose, **kwargs)
155 if chain_type not in loader_mapping:
156 raise ValueError(
157 f"Got unsupported chain type: {chain_type}. "
158 f"Should be one of {loader_mapping.keys()}"
159 )
--> 160 return loader_mapping[chain_type](llm, verbose=verbose, **kwargs)
File c:\Users\...\venv\Lib\site-packages\langchain\chains\summarize\__init__.py:67, in _load_map_reduce_chain(llm, map_prompt, combine_prompt, combine_document_variable_name, map_reduce_document_variable_name, collapse_prompt, reduce_llm, collapse_llm, verbose, token_max, callbacks, collapse_max_retries, **kwargs)
63 reduce_chain = LLMChain(
64 llm=_reduce_llm, prompt=combine_prompt, verbose=verbose, callbacks=callbacks
65 )
66 # TODO: document prompt
---> 67 combine_documents_chain = StuffDocumentsChain(
...
343 object_setattr(__pydantic_self__, '__dict__', values)
ValidationError: 1 validation error for StuffDocumentsChain
__root__
document_variable_name text was not found in llm_chain input_variables: ['documents'] (type=value_error)
I spent quite some time on that but didn't get any hint on what I did in the wrong way. Thank you in advance.