-
Notifications
You must be signed in to change notification settings - Fork 385
/
Copy pathg.py
38 lines (35 loc) · 1.22 KB
/
g.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from deepeval import evaluate
from deepeval.metrics import TaskCompletionMetric
from deepeval.test_case import LLMTestCase, ToolCall
metric = TaskCompletionMetric(
threshold=0.7, model="gpt-4o", include_reason=True
)
test_case = LLMTestCase(
input="Plan a 3-day itinerary for Paris with cultural landmarks and local cuisine.",
actual_output=(
"Day 1: Eiffel Tower, dinner at Le Jules Verne. "
"Day 2: Louvre Museum, lunch at Angelina Paris. "
),
tools_called=[
ToolCall(
name="Itinerary Generator",
description="Creates travel plans based on destination and duration.",
input_parameters={"destination": "Paris", "days": 2},
output=[
"Day 1: Eiffel Tower, Le Jules Verne.",
"Day 2: Louvre Museum, Angelina Paris.",
],
),
ToolCall(
name="Restaurant Finder",
description="Finds top restaurants in a city.",
input_parameters={"city": "Paris"},
output=["Le Jules Verne", "Angelina Paris"],
),
],
)
# metric.measure(test_case)
# print(metric.score)
# print(metric.reason)
# or evaluate test cases in bulk
evaluate([test_case], [metric])