-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.orig
61 lines (51 loc) · 1.99 KB
/
main.orig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from deepeval.metrics import GEval
from deepeval.test_case import LLMTestCase, LLMTestCaseParams
from deepeval import assert_test
from typing import Optional
app = FastAPI()
class TestData(BaseModel):
input: str
actual_output: str
expected_output: Optional[str] = None
context: Optional[str] = None
retrieval_context: Optional[list[str]] = None
evaluation_criteria: Optional[list[str]] = None
@app.post("/evaluate/")
async def evaluate_llm_response(data: TestData):
try:
user_fields = data.dict(exclude_none=True).keys()
evaluation_params = [
param
for param in LLMTestCaseParams
if param.value in user_fields
]
correctness_metric = GEval(
name="Correctness",
evaluation_steps=data.evaluation_criteria or [
"Check whether 'actual output' has all the files or code snippets (Controller, service, entity, repository) from 'expected output'",
"The entity class import statements should use 'jakarta.persistence' instead of 'javax.persistence'",
"Check if the correct dependencies are added in the 'pom.xml' file",
],
evaluation_params=evaluation_params,
threshold=0.8
)
test_case = LLMTestCase(
input=data.input,
actual_output=data.actual_output,
expected_output=data.expected_output,
context=data.context,
retrieval_context=data.retrieval_context
)
correctness_metric.measure(test_case)
# assert_test(test_case, [correctness_metric])
result = {
"score": correctness_metric.score,
"reason": correctness_metric.reason
}
print("Evaluation Result:", result)
return result
except Exception as e:
print("Error:", str(e))
raise HTTPException(status_code=500, detail=str(e))