Skip to content

Commit 52b87e2

Browse files
authored
Merge pull request #972 from souvik03-136/main
feat: enhance error handling and validation across utility modules
2 parents 304c166 + b552aa9 commit 52b87e2

File tree

3 files changed

+838
-203
lines changed

3 files changed

+838
-203
lines changed

scrapegraphai/utils/code_error_analysis.py

+236-48
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
"""
1313

1414
import json
15-
from typing import Any, Dict
15+
from typing import Any, Dict, Optional
1616

17+
from pydantic import BaseModel, Field, validator
1718
from langchain.prompts import PromptTemplate
1819
from langchain_core.output_parsers import StrOutputParser
1920

@@ -25,7 +26,77 @@
2526
)
2627

2728

28-
def syntax_focused_analysis(state: dict, llm_model) -> str:
29+
class AnalysisError(Exception):
30+
"""Base exception for code analysis errors."""
31+
pass
32+
33+
34+
class InvalidStateError(AnalysisError):
35+
"""Exception raised when state dictionary is missing required keys."""
36+
pass
37+
38+
39+
class CodeAnalysisState(BaseModel):
40+
"""Base model for code analysis state validation."""
41+
generated_code: str = Field(..., description="The generated code to analyze")
42+
errors: Dict[str, Any] = Field(..., description="Dictionary containing error information")
43+
44+
@validator('errors')
45+
def validate_errors(cls, v):
46+
"""Ensure errors dictionary has expected structure."""
47+
if not isinstance(v, dict):
48+
raise ValueError("errors must be a dictionary")
49+
return v
50+
51+
52+
class ExecutionAnalysisState(CodeAnalysisState):
53+
"""Model for execution analysis state validation."""
54+
html_code: Optional[str] = Field(None, description="HTML code if available")
55+
html_analysis: Optional[str] = Field(None, description="Analysis of HTML code")
56+
57+
@validator('errors')
58+
def validate_execution_errors(cls, v):
59+
"""Ensure errors dictionary contains execution key."""
60+
super().validate_errors(v)
61+
if 'execution' not in v:
62+
raise ValueError("errors dictionary must contain 'execution' key")
63+
return v
64+
65+
66+
class ValidationAnalysisState(CodeAnalysisState):
67+
"""Model for validation analysis state validation."""
68+
json_schema: Dict[str, Any] = Field(..., description="JSON schema for validation")
69+
execution_result: Any = Field(..., description="Result of code execution")
70+
71+
@validator('errors')
72+
def validate_validation_errors(cls, v):
73+
"""Ensure errors dictionary contains validation key."""
74+
super().validate_errors(v)
75+
if 'validation' not in v:
76+
raise ValueError("errors dictionary must contain 'validation' key")
77+
return v
78+
79+
80+
def get_optimal_analysis_template(error_type: str) -> str:
81+
"""
82+
Returns the optimal prompt template based on the error type.
83+
84+
Args:
85+
error_type (str): Type of error to analyze.
86+
87+
Returns:
88+
str: The prompt template text.
89+
"""
90+
template_registry = {
91+
"syntax": TEMPLATE_SYNTAX_ANALYSIS,
92+
"execution": TEMPLATE_EXECUTION_ANALYSIS,
93+
"validation": TEMPLATE_VALIDATION_ANALYSIS,
94+
"semantic": TEMPLATE_SEMANTIC_ANALYSIS,
95+
}
96+
return template_registry.get(error_type, TEMPLATE_SYNTAX_ANALYSIS)
97+
98+
99+
def syntax_focused_analysis(state: Dict[str, Any], llm_model) -> str:
29100
"""
30101
Analyzes the syntax errors in the generated code.
31102
@@ -35,17 +106,48 @@ def syntax_focused_analysis(state: dict, llm_model) -> str:
35106
36107
Returns:
37108
str: The result of the syntax error analysis.
109+
110+
Raises:
111+
InvalidStateError: If state is missing required keys.
112+
113+
Example:
114+
>>> state = {
115+
'generated_code': 'print("Hello World")',
116+
'errors': {'syntax': 'Missing parenthesis'}
117+
}
118+
>>> analysis = syntax_focused_analysis(state, mock_llm)
38119
"""
39-
prompt = PromptTemplate(
40-
template=TEMPLATE_SYNTAX_ANALYSIS, input_variables=["generated_code", "errors"]
41-
)
42-
chain = prompt | llm_model | StrOutputParser()
43-
return chain.invoke(
44-
{"generated_code": state["generated_code"], "errors": state["errors"]["syntax"]}
45-
)
120+
try:
121+
# Validate state using Pydantic model
122+
validated_state = CodeAnalysisState(
123+
generated_code=state.get("generated_code", ""),
124+
errors=state.get("errors", {})
125+
)
126+
127+
# Check if syntax errors exist
128+
if "syntax" not in validated_state.errors:
129+
raise InvalidStateError("No syntax errors found in state dictionary")
130+
131+
# Create prompt template and chain
132+
prompt = PromptTemplate(
133+
template=get_optimal_analysis_template("syntax"),
134+
input_variables=["generated_code", "errors"]
135+
)
136+
chain = prompt | llm_model | StrOutputParser()
137+
138+
# Execute chain with validated state
139+
return chain.invoke({
140+
"generated_code": validated_state.generated_code,
141+
"errors": validated_state.errors["syntax"]
142+
})
143+
144+
except KeyError as e:
145+
raise InvalidStateError(f"Missing required key in state dictionary: {e}")
146+
except Exception as e:
147+
raise AnalysisError(f"Syntax analysis failed: {str(e)}")
46148

47149

48-
def execution_focused_analysis(state: dict, llm_model) -> str:
150+
def execution_focused_analysis(state: Dict[str, Any], llm_model) -> str:
49151
"""
50152
Analyzes the execution errors in the generated code and HTML code.
51153
@@ -55,23 +157,50 @@ def execution_focused_analysis(state: dict, llm_model) -> str:
55157
56158
Returns:
57159
str: The result of the execution error analysis.
58-
"""
59-
prompt = PromptTemplate(
60-
template=TEMPLATE_EXECUTION_ANALYSIS,
61-
input_variables=["generated_code", "errors", "html_code", "html_analysis"],
62-
)
63-
chain = prompt | llm_model | StrOutputParser()
64-
return chain.invoke(
65-
{
66-
"generated_code": state["generated_code"],
67-
"errors": state["errors"]["execution"],
68-
"html_code": state["html_code"],
69-
"html_analysis": state["html_analysis"],
160+
161+
Raises:
162+
InvalidStateError: If state is missing required keys.
163+
164+
Example:
165+
>>> state = {
166+
'generated_code': 'print(x)',
167+
'errors': {'execution': 'NameError: name "x" is not defined'},
168+
'html_code': '<div>Test</div>',
169+
'html_analysis': 'Valid HTML'
70170
}
71-
)
171+
>>> analysis = execution_focused_analysis(state, mock_llm)
172+
"""
173+
try:
174+
# Validate state using Pydantic model
175+
validated_state = ExecutionAnalysisState(
176+
generated_code=state.get("generated_code", ""),
177+
errors=state.get("errors", {}),
178+
html_code=state.get("html_code", ""),
179+
html_analysis=state.get("html_analysis", "")
180+
)
181+
182+
# Create prompt template and chain
183+
prompt = PromptTemplate(
184+
template=get_optimal_analysis_template("execution"),
185+
input_variables=["generated_code", "errors", "html_code", "html_analysis"],
186+
)
187+
chain = prompt | llm_model | StrOutputParser()
188+
189+
# Execute chain with validated state
190+
return chain.invoke({
191+
"generated_code": validated_state.generated_code,
192+
"errors": validated_state.errors["execution"],
193+
"html_code": validated_state.html_code,
194+
"html_analysis": validated_state.html_analysis,
195+
})
196+
197+
except KeyError as e:
198+
raise InvalidStateError(f"Missing required key in state dictionary: {e}")
199+
except Exception as e:
200+
raise AnalysisError(f"Execution analysis failed: {str(e)}")
72201

73202

74-
def validation_focused_analysis(state: dict, llm_model) -> str:
203+
def validation_focused_analysis(state: Dict[str, Any], llm_model) -> str:
75204
"""
76205
Analyzes the validation errors in the generated code based on a JSON schema.
77206
@@ -82,24 +211,51 @@ def validation_focused_analysis(state: dict, llm_model) -> str:
82211
83212
Returns:
84213
str: The result of the validation error analysis.
85-
"""
86-
prompt = PromptTemplate(
87-
template=TEMPLATE_VALIDATION_ANALYSIS,
88-
input_variables=["generated_code", "errors", "json_schema", "execution_result"],
89-
)
90-
chain = prompt | llm_model | StrOutputParser()
91-
return chain.invoke(
92-
{
93-
"generated_code": state["generated_code"],
94-
"errors": state["errors"]["validation"],
95-
"json_schema": state["json_schema"],
96-
"execution_result": state["execution_result"],
214+
215+
Raises:
216+
InvalidStateError: If state is missing required keys.
217+
218+
Example:
219+
>>> state = {
220+
'generated_code': 'return {"name": "John"}',
221+
'errors': {'validation': 'Missing required field: age'},
222+
'json_schema': {'required': ['name', 'age']},
223+
'execution_result': {'name': 'John'}
97224
}
98-
)
225+
>>> analysis = validation_focused_analysis(state, mock_llm)
226+
"""
227+
try:
228+
# Validate state using Pydantic model
229+
validated_state = ValidationAnalysisState(
230+
generated_code=state.get("generated_code", ""),
231+
errors=state.get("errors", {}),
232+
json_schema=state.get("json_schema", {}),
233+
execution_result=state.get("execution_result", {})
234+
)
235+
236+
# Create prompt template and chain
237+
prompt = PromptTemplate(
238+
template=get_optimal_analysis_template("validation"),
239+
input_variables=["generated_code", "errors", "json_schema", "execution_result"],
240+
)
241+
chain = prompt | llm_model | StrOutputParser()
242+
243+
# Execute chain with validated state
244+
return chain.invoke({
245+
"generated_code": validated_state.generated_code,
246+
"errors": validated_state.errors["validation"],
247+
"json_schema": validated_state.json_schema,
248+
"execution_result": validated_state.execution_result,
249+
})
250+
251+
except KeyError as e:
252+
raise InvalidStateError(f"Missing required key in state dictionary: {e}")
253+
except Exception as e:
254+
raise AnalysisError(f"Validation analysis failed: {str(e)}")
99255

100256

101257
def semantic_focused_analysis(
102-
state: dict, comparison_result: Dict[str, Any], llm_model
258+
state: Dict[str, Any], comparison_result: Dict[str, Any], llm_model
103259
) -> str:
104260
"""
105261
Analyzes the semantic differences in the generated code based on a comparison result.
@@ -112,16 +268,48 @@ def semantic_focused_analysis(
112268
113269
Returns:
114270
str: The result of the semantic error analysis.
271+
272+
Raises:
273+
InvalidStateError: If state or comparison_result is missing required keys.
274+
275+
Example:
276+
>>> state = {
277+
'generated_code': 'def add(a, b): return a + b'
278+
}
279+
>>> comparison_result = {
280+
'differences': ['Missing docstring', 'No type hints'],
281+
'explanation': 'The code is missing documentation'
282+
}
283+
>>> analysis = semantic_focused_analysis(state, comparison_result, mock_llm)
115284
"""
116-
prompt = PromptTemplate(
117-
template=TEMPLATE_SEMANTIC_ANALYSIS,
118-
input_variables=["generated_code", "differences", "explanation"],
119-
)
120-
chain = prompt | llm_model | StrOutputParser()
121-
return chain.invoke(
122-
{
123-
"generated_code": state["generated_code"],
285+
try:
286+
# Validate state using Pydantic model
287+
validated_state = CodeAnalysisState(
288+
generated_code=state.get("generated_code", ""),
289+
errors=state.get("errors", {})
290+
)
291+
292+
# Validate comparison_result
293+
if "differences" not in comparison_result:
294+
raise InvalidStateError("comparison_result missing 'differences' key")
295+
if "explanation" not in comparison_result:
296+
raise InvalidStateError("comparison_result missing 'explanation' key")
297+
298+
# Create prompt template and chain
299+
prompt = PromptTemplate(
300+
template=get_optimal_analysis_template("semantic"),
301+
input_variables=["generated_code", "differences", "explanation"],
302+
)
303+
chain = prompt | llm_model | StrOutputParser()
304+
305+
# Execute chain with validated inputs
306+
return chain.invoke({
307+
"generated_code": validated_state.generated_code,
124308
"differences": json.dumps(comparison_result["differences"], indent=2),
125309
"explanation": comparison_result["explanation"],
126-
}
127-
)
310+
})
311+
312+
except KeyError as e:
313+
raise InvalidStateError(f"Missing required key: {e}")
314+
except Exception as e:
315+
raise AnalysisError(f"Semantic analysis failed: {str(e)}")

0 commit comments

Comments
 (0)