I am getting ValueError when performing RagEvaluatorPack in llama-index with ragas.
Below is the code
judge_llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
RagEvaluatorPack = download_llama_pack("RagEvaluatorPack", "./pack")
rag_evaluator = RagEvaluatorPack(
query_engine=query_engine,
rag_dataset=rag_dataset, # defined in 1A
judge_llm=judge_llm,
show_progress=True,
)
benchmark_df = await rag_evaluator.arun(
batch_size=2,
sleep_time_in_seconds=60,
)
Below is the stack trace
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[23], line 30
16 rag_evaluator = RagEvaluatorPack(
17 query_engine=query_engine,
18 rag_dataset=rag_dataset, # defined in 1A
19 judge_llm=judge_llm,
20 show_progress=True,
21 )
23 ############################################################################
24 # NOTE: If have a lower tier subscription for OpenAI API like Usage Tier 1 #
25 # then you'll need to use different batch_size and sleep_time_in_seconds. #
26 # For Usage Tier 1, settings that seemed to work well were batch_size=5, #
27 # and sleep_time_in_seconds=15 (as of December 2023.) #
28 ############################################################################
---> 30 benchmark_df = await rag_evaluator.arun(
31 batch_size=2, # batches the number of openai api calls to make
32 sleep_time_in_seconds=60, # seconds to sleep before making an api call
33 )
File D:\documents\github\infinitejoy_courses\creating-gpt-chatbots-for-enterprise-useca-vt9QSr1Q-py3.10\lib\site-packages\llama_index\packs\rag_evaluator\base.py:442, in RagEvaluatorPack.arun(self, batch_size, sleep_time_in_seconds)
440 # which is heavily rate-limited
441 eval_batch_size = int(max(batch_size / 4, 1))
--> 442 return await self._amake_evaluations(
443 batch_size=eval_batch_size, sleep_time_in_seconds=eval_sleep_time_in_seconds
444 )
File D:\documents\github\infinitejoy_courses\creating-gpt-chatbots-for-enterprise-useca-vt9QSr1Q-py3.10\lib\site-packages\llama_index\packs\rag_evaluator\base.py:366, in RagEvaluatorPack._amake_evaluations(self, batch_size, sleep_time_in_seconds)
364 # do this in batches to avoid RateLimitError
365 try:
--> 366 eval_results: List[EvaluationResult] = await asyncio.gather(*tasks)
367 except RateLimitError as err:
368 if self.show_progress:
File D:\ProgramData\miniconda3\lib\asyncio\tasks.py:304, in Task.__wakeup(self, future)
302 def __wakeup(self, future):
303 try:
--> 304 future.result()
305 except BaseException as exc:
306 # This may also be a cancellation.
307 self.__step(exc)
File D:\ProgramData\miniconda3\lib\asyncio\tasks.py:232, in Task.__step(***failed resolving arguments***)
228 try:
229 if exc is None:
230 # We use the `send` method directly, because coroutines
231 # don't have `__iter__` and `__next__` methods.
--> 232 result = coro.send(None)
233 else:
234 result = coro.throw(exc)
File D:\documents\github\infinitejoy_courses\creating-gpt-chatbots-for-enterprise-useca-vt9QSr1Q-py3.10\lib\site-packages\llama_index\core\evaluation\correctness.py:146, in CorrectnessEvaluator.aevaluate(***failed resolving arguments***)
138 eval_response = await self._llm.apredict(
139 prompt=self._eval_template,
140 query=query,
141 generated_answer=response,
142 reference_answer=reference or "(NO REFERENCE ANSWER SUPPLIED)",
143 )
145 # Use the parser function
--> 146 score, reasoning = self.parser_function(eval_response)
148 return EvaluationResult(
149 query=query,
150 response=response,
(...)
153 feedback=reasoning,
154 )
File D:\documents\github\infinitejoy_courses\creating-gpt-chatbots-for-enterprise-useca-vt9QSr1Q-py3.10\lib\site-packages\llama_index\core\evaluation\eval_utils.py:183, in default_parser(eval_response)
173 """
174 Default parser function for evaluation response.
175
(...)
180 Tuple[float, str]: A tuple containing the score as a float and the reasoning as a string.
181 """
182 score_str, reasoning_str = eval_response.split("\n", 1)
--> 183 score = float(score_str)
184 reasoning = reasoning_str.lstrip("\n")
185 return score, reasoning
ValueError: could not convert string to float: ''
Below are my dependencies
python = ">=3.10,<3.12"
streamlit = "^1.31.1"
llama-index = "^0.10.9"
llama-index-embeddings-huggingface = "^0.1.1"
llama-index-llms-ollama = "^0.1.1"
ragas = "^0.1.2"
spacy = "^3.7.4"