|
85 | 85 | _update_init_kwargs_with_connected_pipeline,
|
86 | 86 | load_sub_model,
|
87 | 87 | maybe_raise_or_warn,
|
| 88 | + model_has_device_map, |
88 | 89 | variant_compatible_siblings,
|
89 | 90 | warn_deprecated_model_variant,
|
90 | 91 | )
|
@@ -406,6 +407,16 @@ def module_is_offloaded(module):
|
406 | 407 |
|
407 | 408 | return hasattr(module, "_hf_hook") and isinstance(module._hf_hook, accelerate.hooks.CpuOffload)
|
408 | 409 |
|
| 410 | + # device-mapped modules should not go through any device placements. |
| 411 | + device_mapped_components = [ |
| 412 | + key for key, component in self.components.items() if model_has_device_map(component) |
| 413 | + ] |
| 414 | + if device_mapped_components: |
| 415 | + raise ValueError( |
| 416 | + "The following pipeline components have been found to use a device map: " |
| 417 | + f"{device_mapped_components}. This is incompatible with explicitly setting the device using `to()`." |
| 418 | + ) |
| 419 | + |
409 | 420 | # .to("cuda") would raise an error if the pipeline is sequentially offloaded, so we raise our own to make it clearer
|
410 | 421 | pipeline_is_sequentially_offloaded = any(
|
411 | 422 | module_is_sequentially_offloaded(module) for _, module in self.components.items()
|
@@ -1002,6 +1013,16 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t
|
1002 | 1013 | The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
|
1003 | 1014 | default to "cuda".
|
1004 | 1015 | """
|
| 1016 | + # device-mapped modules should not go through any device placements. |
| 1017 | + device_mapped_components = [ |
| 1018 | + key for key, component in self.components.items() if model_has_device_map(component) |
| 1019 | + ] |
| 1020 | + if device_mapped_components: |
| 1021 | + raise ValueError( |
| 1022 | + "The following pipeline components have been found to use a device map: " |
| 1023 | + f"{device_mapped_components}. This is incompatible with `enable_model_cpu_offload()`." |
| 1024 | + ) |
| 1025 | + |
1005 | 1026 | is_pipeline_device_mapped = self.hf_device_map is not None and len(self.hf_device_map) > 1
|
1006 | 1027 | if is_pipeline_device_mapped:
|
1007 | 1028 | raise ValueError(
|
@@ -1104,6 +1125,16 @@ def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Un
|
1104 | 1125 | The PyTorch device type of the accelerator that shall be used in inference. If not specified, it will
|
1105 | 1126 | default to "cuda".
|
1106 | 1127 | """
|
| 1128 | + # device-mapped modules should not go through any device placements. |
| 1129 | + device_mapped_components = [ |
| 1130 | + key for key, component in self.components.items() if model_has_device_map(component) |
| 1131 | + ] |
| 1132 | + if device_mapped_components: |
| 1133 | + raise ValueError( |
| 1134 | + "The following pipeline components have been found to use a device map: " |
| 1135 | + f"{device_mapped_components}. This is incompatible with `enable_sequential_cpu_offload()`." |
| 1136 | + ) |
| 1137 | + |
1107 | 1138 | if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
|
1108 | 1139 | from accelerate import cpu_offload
|
1109 | 1140 | else:
|
|
0 commit comments