update

huggingface · a-r-r-o-w · Dec 25, 2024 · Dec 24, 2024 · Dec 24, 2024 · Dec 24, 2024
commit e5dcdec03fe53d200cf996531e104e92877ab858
diff --git a/docs/source/en/quantization/torchao.md b/docs/source/en/quantization/torchao.md
@@ -113,13 +113,37 @@ import torch
 from diffusers import FluxPipeline, FluxTransformer2DModel
 
 transformer = FluxTransformer2DModel.from_pretrained("/path/to/flux_int8wo", torch_dtype=torch.bfloat16, use_safetensors=False)
-pipe = FluxPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch.bfloat16)
+pipe = FluxPipeline.from_pretrained("black-forest-labs/Flux.1-Dev", transformer=transformer, torch_dtype=torch.bfloat16)
 pipe.to("cuda")
 
 prompt = "A cat holding a sign that says hello world"
 image = pipe(prompt, num_inference_steps=30, guidance_scale=7.0).images[0]
 image.save("output.png")
-``` 
+```
+
+Some quantization methods, such as `uint4wo`, cannot be loaded directly and may result in an `UnpicklingError` when trying to load the models, but work as expected when saving them. In order to work around this, one can load the state dict manually into the model. Note, however, that this requires using `weights_only=False` in `torch.load`, so it should be run only if the weights were obtained from a trustable source.
+
+```python
+import torch
+from accelerate import init_empty_weights
+from diffusers import FluxPipeline, FluxTransformer2DModel, TorchAoConfig
+
+# Serialize the model
+transformer = FluxTransformer2DModel.from_pretrained(
+    "black-forest-labs/Flux.1-Dev",
+    subfolder="transformer",
+    quantization_config=TorchAoConfig("uint4wo"),
+    torch_dtype=torch.bfloat16,
+)
+transformer.save_pretrained("/path/to/flux_uint4wo", safe_serialization=False, max_shard_size="50GB")
+# ...
+
+# Load the model
+state_dict = torch.load("/path/to/flux_uint4wo/diffusion_pytorch_model.bin", weights_only=False, map_location="cpu")
+with init_empty_weights():
+    transformer = FluxTransformer2DModel.from_config("/path/to/flux_uint4wo/config.json")
+transformer.load_state_dict(state_dict, strict=True, assign=True)
+```
 
 ## Resources