Skip to content
This repository has been archived by the owner on Aug 2, 2023. It is now read-only.

Commit

Permalink
fix: Initialization of error monitor plugin (#439)
Browse files Browse the repository at this point in the history
* fix: Improve debug log messages for error collection

Backported-From: main
Backported-To: 21.03
  • Loading branch information
achimnol committed Jun 18, 2021
1 parent 2ecd253 commit 31cee9c
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
1 change: 1 addition & 0 deletions changes/439.fix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Handle missing root context gracefully with explicit warning during initialization of the intrinsic error monitor plugin
28 changes: 23 additions & 5 deletions src/ai/backend/manager/plugin/error_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,23 @@
class ErrorMonitor(AbstractErrorReporterPlugin):

async def init(self, context: Any = None) -> None:
if context is None:
log.warning(
"manager.plugin.error_monitor is initialized without the root context. "
"The plugin is disabled."
)
self.enabled = False
return
else:
self.enabled = True
root_ctx: RootContext = context['_root.context'] # type: ignore
self.event_dispatcher = root_ctx.event_dispatcher
self._evh = self.event_dispatcher.consume(AgentErrorEvent, None, self.handle_agent_error)
self.db = root_ctx.db

async def cleanup(self) -> None:
self.event_dispatcher.unconsume(self._evh)
if self.enabled:
self.event_dispatcher.unconsume(self._evh)

async def update_plugin_config(self, plugin_config: Mapping[str, Any]) -> None:
pass
Expand All @@ -43,6 +53,8 @@ async def capture_exception(
exc_instance: Exception = None,
context: Mapping[str, Any] = None,
) -> None:
if not self.enabled:
return
if exc_instance:
tb = exc_instance.__traceback__
else:
Expand All @@ -65,26 +77,32 @@ async def capture_exception(
user = None
else:
user = context['user']
message = ''.join(traceback.format_exception_only(exc_type, exc_instance)).strip()

async with self.db.begin() as conn:
query = error_logs.insert().values({
'severity': severity,
'source': 'manager',
'user': user,
'message': ''.join(traceback.format_exception_only(exc_type, exc_instance)).strip(),
'message': message,
'context_lang': 'python',
'context_env': context,
'traceback': ''.join(traceback.format_tb(tb)).strip()
})
await conn.execute(query)
log.debug('Manager log collected: {}', str(exc_instance))
log.debug(
"collected an error log [{}] \"{}\" from manager",
severity.name, message,
)

async def handle_agent_error(
self,
context: None,
source: AgentId,
event: AgentErrorEvent,
) -> None:
if not self.enabled:
return
async with self.db.begin() as conn:
query = error_logs.insert().values({
'severity': event.severity,
Expand All @@ -97,6 +115,6 @@ async def handle_agent_error(
})
await conn.execute(query)
log.debug(
'collected AgentErrorEvent: [{}:{}] {}',
source, event.severity, event.message,
"collected an error log [{}] \"{}\" from agent:{}",
event.severity.name, event.message, source,
)

0 comments on commit 31cee9c

Please # to comment.