from llama_index.core import SimpleDirectoryReader from llama_index.core.llama_dataset.generator import RagDatasetGenerator from llama_index.llms.openai import OpenAI
documents = SimpleDirectoryReader("./data").load_data() llm = OpenAI(model="gpt-3.5-turbo") dataset_generator = RagDatasetGenerator.from_documents( documents, llm=llm, num_questions_per_chunk=1, ) dataset = dataset_generator.generate_questions_from_nodes() examples = dataset.examples for i, example inenumerate(examples): contexts = [n[:100] for n in example.reference_contexts] print(f"{i + 1}. {example.query}")
# 显示结果 1. Question: How did Ultron initially come into existence and what was his ultimate goal? 2. Question: What event prompts the Avengers to devise a plan involving time travel to undo Thanos's actions in "Avengers: Endgame"? 3. Question: How does Thanos acquire the Power Stone and what events transpire after he obtains it? 4. Question: How does Thanos ultimately achieve his goal of completing the Gauntlet and causing half of all life across the universe to disintegrate in "Avengers: Infinity War"? 5. Question: How does Loki initially gain access to Earth and what is his ultimate goal upon arriving?
dataset = dataset_generator.generate_dataset_from_nodes() examples = dataset.examples for i, example inenumerate(examples): contexts = [n[:100] for n in example.reference_contexts] print(f"{i + 1}. {example.query}") print(f"Ground Truth: {example.reference_answer[:100]}...")
# 显示结果 1. Question: How did Ultron initially come into existence and what was his ultimate goal? Ground Truth: Ultron initially came into existence when Tony Stark and Bruce Banner discovered an artificial intel... 2. Question: What event prompts the Avengers to devise a plan involving time travel to undo Thanos's actions in "Avengers: Endgame"? Ground Truth: The event that prompts the Avengers to devise a plan involving time travel to undo Thanos's actions ... 3. Question: How does Thanos acquire the Power Stone and what events transpire after he obtains it? Ground Truth: Thanos acquires the Power Stone from the planet Xandar. After obtaining the Power Stone, Thanos and ... 4. Question: How does Thanos ultimately achieve his goal of completing the Gauntlet and causing half of all life across the universe to disintegrate in"Avengers: Infinity War"? Ground Truth: Thanos ultimately achieves his goal of completing the Gauntlet and causing half of all life across t... 5. Question: How does Loki initially gain access to Earth and what is his ultimate goal upon arriving? Ground Truth: Loki initially gains access to Earth by using the Tesseract to open a wormhole. His ultimate goal up...
这次使用数据生成器的generate_dataset_from_nodes方法来生成测试数据,生成的数据不仅包含 Question,还包含 Ground Truth,也是就代码中的example.reference_answer属性的值。其实除了 Question 和 Ground Truth 外,在生成的数据中还包含reference_contexts,这是数据生成器使用其内部检索器检索到的上下文,这个数据暂时对我们没有用处,我们只需要关注 Question 和 Ground Truth 即可。
from llama_index.core.evaluation import AnswerRelevancyEvaluator from llama_index.core import VectorStoreIndex, Settings from llama_index.core.node_parser import SentenceSplitter
# 显示结果 Question: How did Ultron initially come into existence and what was his ultimate goal? Answer: Ultron initially came into existence when Tony Stark and Bruce Banner discovered an artificial intelligence within Loki's scepter and decided to use it to complete Stark's "Ultron"global defense program. Ultron's ultimate goal was to eradicate humanity in order to save Earth. score: 1.0 feedback: 1. The provided response matches the subject matter of the user's query by explaining how Ultron initially came into existence and what his ultimate goal was. 2. The response directly addresses the focus and perspective of the user's query by detailing the specific events that led to Ultron's creation and his ultimate goal of eradicating humanity.
from llama_index.core.prompts import PromptTemplate
DEFAULT_EVAL_TEMPLATE = PromptTemplate( "Your task is to evaluate if the response is relevant to the query.\n" "The evaluation should be performed in a step-by-step manner by answering the following questions:\n" "1. Does the provided response match the subject matter of the user's query?\n" "2. Does the provided response attempt to address the focus or perspective " "on the subject matter taken on by the user's query?\n" "Each question above is worth 1 point. Provide detailed feedback on response according to the criteria questions above " "After your feedback provide a final result by strictly following this format: '[RESULT] followed by the integer number representing the total score assigned to the response'\n\n" "Query: \n {query}\n" "Response: \n {response}\n" "Feedback:" )
from llama_index.core.evaluation import ContextRelevancyEvaluator
contexts = [n.get_content() for n in response.source_nodes] evaluator = ContextRelevancyEvaluator(llm) result = evaluator.evaluate(query=question, contexts=contexts) print(f"score: {result.score}") print(f"feedback: {result.feedback}")
# 显示结果 score: 1.0 feedback: 1. The retrieved context matches the subject matter of the user's query. It provides a detailed explanation of how Ultron initially came into existence and what his ultimate goal was. 2. The retrieved context can be used exclusively to provide a full answer to the user's query. It covers all the necessary information about Ultron's creation and his goal to eradicate humanity. [RESULT] 4.0
from llama_index.core.query_pipeline import QueryPipeline, InputComponent from llama_index.core.response_synthesizers.simple_summarize import SimpleSummarize
# 显示结果 Question: What event prompts the Avengers to devise a plan involving time travel to undo Thanos's actions in "Avengers: Endgame"? Answer: The event that prompts the Avengers to devise a plan involving time travel to undo Thanos's actions in"Avengers: Endgame" occurs when Scott Lang escapes from the Quantum Realm and reaches the Avengers Compound. He explains that he experienced only five hours while trapped, despite being there for five years. This leads to the realization that the Quantum Realm allows for time travel, prompting the Avengers to ask Tony Stark to help them retrieve the Infinity Stones from the past to reverse Thanos's actions in the present. Ground Truth: The event that prompts the Avengers to devise a plan involving time travel to undo Thanos's actions in"Avengers: Endgame"is the discovery that Thanos has already destroyed the Infinity Stones, preventing any further use to reverse his actions.
score: 4.0 feedback: The generated answer is relevant and mostly correct in detailing the events leading to the Avengers' decision to use time travel in "Avengers: Endgame." It accurately describes Scott Lang's escape from the Quantum Realm and his crucial role in introducing the concept of time manipulation via the Quantum Realm. However, it slightly deviates from the reference answer, which emphasizes the destruction of the Infinity Stones by Thanos as the critical event. The generated answer instead focuses on the discovery of time travel as a viable option, which is also a correct perspective but not the only one. Thus, the score reflects high relevance and correctness with a minor deviation in focus. passing: True
构建CorrectnessEvaluator评估器
使用我们之前创建的测试数据集中某条数据的reference_answer作为 Ground Truth
将 Question、Answer 和 Ground Truth 传递给评估器的evaluate方法进行评估
# 显示结果 Question: What event prompts the Avengers to devise a plan involving time travel to undo Thanos's actions in "Avengers: Endgame"? Answer: The event that prompts the Avengers to devise a plan involving time travel to undo Thanos's actions in"Avengers: Endgame"is the discovery that Thanos has already destroyed the Infinity Stones, preventing any further use to reverse his actions. Second Answer: The destruction of the Infinity Stones by Thanos prompts the Avengers to devise a plan involving time travel to undo Thanos's actions in "Avengers: Endgame". score: 1.0 feedback: Assistant A provides a more detailed and informative response by explaining that the Avengers discover that Thanos has already destroyed the Infinity Stones, which is the event that prompts them to devise a plan involving time travel to undo his actions in "Avengers: Endgame." This additional context enhances the understanding of the situation and the motivation behind the Avengers' plan. Assistant B, on the other hand, simply states that the destruction of the Infinity Stones by Thanos is the event that leads to the Avengers' plan without providing any further elaboration. Therefore, based on the level of detail and clarity provided in the responses, [[A]] Assistant A is better. pairwise source: EvaluationSource.ORIGINAL
构建PairwiseComparisonEvaluator评估器
将 Question、Answer 和 Second Answer 传递给评估器的evaluate方法进行评估
在显示结果中,我们打印了 Question、Answer 和 Second Answer,以及评估结果的几个属性,从评估结果中可以看到,第一个 Answer 比第二个 Answer 更好。在评估结果中还有一个pairwise_source属性,值是EvaluationSource.ORIGINAL,表示评估顺序是原始顺序。
在 PairwiseComparisonEvaluator评估器中,有一个初始化参数enforce_consensus,默认值是 True。在评估器进行对比评估时,首先会将 Answer 和 Second Answer 进行对比, 即evaluate(response=answer, second_response=second_answer),如果enforce_consensus为 True,则会将 Answer 和 Second Answer 反过来再进行对比, 即evaluate(response=second_answer, second_response=answer), 最后根据两次结果来产生最终的评估结果。如果最终结果使用的是反转后的结果,那么pairwise source的值就是EvaluationSource.FLIPPED。
可以看下另外一种对比结果,在下面的评估结果中,2 个 Answer 的得分一样,评估结果是平局:
1 2 3 4 5
score: 0.5 feedback: Both Assistant A and Assistant B provided the same answer to the user's question, stating that Tony Stark and Bruce Banner are the two members of the Avengers who created Ultron. Since both responses are identical in terms of accuracy and relevance to the user's question, there is no significant difference between the two answers. Therefore, in this case, it is a tie between Assistant A and Assistant B.
Therefore, the final verdict is '[[C]]'for a tie. pairwise_source: EvaluationSource.ORIGINAL
runner = BatchEvalRunner( evaluators={ "answer_relevancy": answer_relevancy_evaluator, "context_relevancy": context_relevancy_evaluator, "relevancy": relevant_evaluator, "correctness": correctness_evaluator, "faithfulness": faithfulness_evaluator, }, workers=8, ) questions = [example.query for example in examples] ground_truths = [example.reference_answer for example in examples] metrics_results = runner.evaluate_queries( engine, queries=questions, reference=ground_truths )
for metrics in metrics_results.keys(): print(f"metrics: {metrics}") eval_results = metrics_results[metrics] for eval_result in eval_results: print(f"score: {eval_result.score}") print(f"feedback: {eval_result.feedback}") if eval_result.passing isnotNone: print(f"passing: {eval_result.passing}")
# 显示结果 metrics: answer_relevancy score: 1.0 feedback: 1. The provided response matches the subject matter of the user's query by explaining how Ultron initially came into existence and what his ultimate goal was. 2. The response directly addresses the focus and perspective of the user's query by detailing the specific events that led to Ultron's creation and his ultimate goal of eradicating humanity. [RESULT] 2 ......