Skip to content
This repository has been archived by the owner on Aug 2, 2023. It is now read-only.

Commit

Permalink
fix: Store last_stat when kernels are terminated as a backup
Browse files Browse the repository at this point in the history
  • Loading branch information
achimnol committed Mar 29, 2022
1 parent 2cb6035 commit 3c86a82
Showing 1 changed file with 32 additions and 17 deletions.
49 changes: 32 additions & 17 deletions src/ai/backend/manager/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1807,15 +1807,22 @@ async def _update() -> None:
destroyed_kernels.append(kernel)

async def _update() -> None:
kern_stat = await redis.execute(
self.redis_stat,
lambda r: r.get(str(kernel['id'])),
)
async with self.db.begin() as conn:
values = {
'status': KernelStatus.TERMINATED,
'status_info': reason,
'status_changed': now,
'terminated_at': now,
}
if kern_stat:
values['last_stat'] = msgpack.unpackb(kern_stat)
await conn.execute(
sa.update(kernels)
.values({
'status': KernelStatus.TERMINATED,
'status_info': reason,
'status_changed': now,
'terminated_at': now,
})
.values(values)
.where(kernels.c.id == kernel['id']),
)

Expand Down Expand Up @@ -2612,6 +2619,11 @@ async def mark_kernel_terminated(
except asyncio.CancelledError:
pass

kern_stat = await redis.execute(
self.redis_stat,
lambda r: r.get(str(kernel_id)),
)

async def _update_kernel_status() -> Row | None:
async with self.db.begin() as conn:
# Check the current status.
Expand Down Expand Up @@ -2643,19 +2655,22 @@ async def _update_kernel_status() -> Row | None:
# Change the status to TERMINATED.
# (we don't delete the row for later logging and billing)
now = datetime.now(tzutc())
values = {
'status': KernelStatus.TERMINATED,
'status_info': reason,
'status_changed': now,
'status_data': sql_json_merge(
kernels.c.status_data,
("kernel",),
{"exit_code": exit_code},
),
'terminated_at': now,
}
if kern_stat:
values['last_stat'] = msgpack.unpackb(kern_stat)
update_query = (
sa.update(kernels)
.values({
'status': KernelStatus.TERMINATED,
'status_info': reason,
'status_changed': now,
'status_data': sql_json_merge(
kernels.c.status_data,
("kernel",),
{"exit_code": exit_code},
),
'terminated_at': now,
})
.values(values)
.where(kernels.c.id == kernel_id)
)
await conn.execute(update_query)
Expand Down

0 comments on commit 3c86a82

Please # to comment.