From 5b6f202c836ba3b4c8e5e35b00ec12eb1329e712 Mon Sep 17 00:00:00 2001 From: Jennings Zhang Date: Tue, 7 May 2024 15:19:20 -0400 Subject: [PATCH] Add support for GPUs in CONTAINER_ENV=docker mode --- pman/dockermgr.py | 8 ++++++-- requirements/base.txt | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pman/dockermgr.py b/pman/dockermgr.py index d1d7594..a6ff4db 100755 --- a/pman/dockermgr.py +++ b/pman/dockermgr.py @@ -2,6 +2,7 @@ from typing import List, Optional, AnyStr from docker import DockerClient +from docker.types import DeviceRequest from docker.models.containers import Container from pman.abstractmgr import (AbstractManager, Image, JobName, ResourcesDict, @@ -36,8 +37,6 @@ def schedule_job(self, image: Image, command: List[str], name: JobName, 'got number_of_workers=' + str(resources_dict['number_of_workers']), status_code=400 ) - if resources_dict['gpu_limit'] != 0: - raise ManagerException('Compute environment does not support GPUs yet.') volumes = { 'volumes': { @@ -53,6 +52,11 @@ def schedule_job(self, image: Image, command: List[str], name: JobName, limits['nano_cpus'] = int(resources_dict['cpu_limit'] * 1e6) limits['mem_reservation'] = resources_dict['memory_limit'] * 1024 * 1024 + if resources_dict['gpu_limit'] > 0: + limits['device_requests'] = [ + DeviceRequest(count=resources_dict['gpu_limit'], capabilities=[['gpu']]) + ] + user_spec = {} if uid is not None: user_spec['user'] = uid diff --git a/requirements/base.txt b/requirements/base.txt index 8a62261..e38e695 100755 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,6 +1,6 @@ Flask==2.3.3 Flask-RESTful==0.3.10 -docker==6.1.3 +docker==7.0.0 openshift==0.13.2 kubernetes==27.2.0 python-keystoneclient==4.2.0