From a64e9f5e571cb12583f4bbd3b1b60af477b1b57f Mon Sep 17 00:00:00 2001 From: cavdard <44590949+cavdard@users.noreply.github.com> Date: Fri, 22 Apr 2022 12:24:38 -0700 Subject: [PATCH] Changes in create_optimizer to support tensor parallelism with SMP (#16880) * changes in create optimizer to support tensor parallelism with SMP * Update src/transformers/trainer.py Convert if check to one line. Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> Co-authored-by: Cavdar Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com> --- src/transformers/trainer.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 9e61a36ecf4d81..eb0a3ce2ee548b 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -843,16 +843,18 @@ def create_optimizer(self): We provide a reasonable default that works well. If you want to use something else, you can pass a tuple in the Trainer's init through `optimizers`, or subclass and override this method in a subclass. """ + opt_model = self.model_wrapped if is_sagemaker_mp_enabled() else self.model + if self.optimizer is None: - decay_parameters = get_parameter_names(self.model, [nn.LayerNorm]) + decay_parameters = get_parameter_names(opt_model, [nn.LayerNorm]) decay_parameters = [name for name in decay_parameters if "bias" not in name] optimizer_grouped_parameters = [ { - "params": [p for n, p in self.model.named_parameters() if n in decay_parameters], + "params": [p for n, p in opt_model.named_parameters() if n in decay_parameters], "weight_decay": self.args.weight_decay, }, { - "params": [p for n, p in self.model.named_parameters() if n not in decay_parameters], + "params": [p for n, p in opt_model.named_parameters() if n not in decay_parameters], "weight_decay": 0.0, }, ] @@ -872,7 +874,7 @@ def create_optimizer(self): manager = bitsandbytes.optim.GlobalOptimManager.get_instance() - for module in self.model.modules(): + for module in opt_model.modules(): if isinstance(module, nn.Embedding): manager.register_module_override(module, "weight", {"optim_bits": 32}) logger.debug(f"bitsandbytes: will optimize {module} in fp32")