From 056b1cfd5aa1cf2c4beea192868c5c02463ce6d3 Mon Sep 17 00:00:00 2001 From: lartpang Date: Sun, 3 Oct 2021 19:14:50 +0800 Subject: [PATCH] Add the config about the max used ratio of the gpu. --- run_it.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/run_it.py b/run_it.py index e5ebac8..ae3d7c7 100644 --- a/run_it.py +++ b/run_it.py @@ -81,6 +81,11 @@ def _create_sub_proc(self, cmd=""): def create_and_start_proc(self, cmd=None): if (used_mem := self.get_used_mem(return_ratio=True)) > self.max_used_ratio: + # TODO: + # 当前的判定方式并不是太准确。最好的方式是由程序提供设置周期数的选项(`--num-epochs`), + # 首先按照num_epoch=1来进行初步的运行,并统计各个命令对应使用的显存。 + # 之后根据这些程序实际使用的显存来安排后续的操作。 + # 这可能需要程序对输出可以实现覆盖式(`--overwrite`)操作。 self.status = STATUS.GPU_BUSY print( f"[ID {self.slot_idx} WARN] the memory usage of the GPU {self.gpu_id} is currently {used_mem}, " @@ -127,6 +132,7 @@ def get_args(): required=True, help="The text file containing all your commands. It will be combined with `interpreter`.", ) + parser.add_argument("--max-used-ratio", type=float, default=0.5, help="The max used ratio of the gpu.") args = parser.parse_args() if args.max_workers is None: args.max_workers = len(args.gpu_pool) @@ -152,7 +158,7 @@ def main(): stdout=subprocess.PIPE, stderr=subprocess.STDOUT, num_cmds=len(cmd_pool), - max_used_ratio=0.5, + max_used_ratio=args.max_used_ratio, ) print(proc) proc_slots.append(proc)