From add3cca09e64774338134a053455ef41253694d1 Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Fri, 28 Feb 2025 14:47:53 +0700
Subject: [PATCH 01/14] fix: correct guidance_in layer keys in lora conversion

---
 .../lora_conversions/flux_diffusers_lora_conversion_utils.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
index 6a36db7b592..d6c8b4be683 100644
--- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
+++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
@@ -124,8 +124,8 @@ def add_qkv_lora_layer_if_present(
     add_lora_layer_if_present("time_text_embed.text_embedder.linear_2", "vector_in.out_layer")
 
     # time_text_embed.guidance_embedder -> guidance_in.
-    add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_1", "guidance_in")
-    add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_2", "guidance_in")
+    add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_1", "guidance_in.in_layer")
+    add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_2", "guidance_in.out_layer")
 
     # context_embedder -> txt_in.
     add_lora_layer_if_present("context_embedder", "txt_in")

From 6efad432af6df2076616ca8ab4df839494daa4f2 Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Fri, 28 Feb 2025 14:49:05 +0700
Subject: [PATCH 02/14] feat: add missing adaLN layer in lora conversion

---
 .../flux_diffusers_lora_conversion_utils.py     | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
index d6c8b4be683..e5d31d26b9d 100644
--- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
+++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
@@ -82,6 +82,19 @@ def add_lora_layer_if_present(src_key: str, dst_key: str) -> None:
             values = get_lora_layer_values(src_layer_dict)
             layers[dst_key] = any_lora_layer_from_state_dict(values)
 
+    def add_lora_adaLN_layer_if_present(src_key: str, dst_key: str) -> None:
+        if src_key in grouped_state_dict:
+            src_layer_dict = grouped_state_dict.pop(src_key)
+            values = get_lora_layer_values(src_layer_dict)
+            
+            for _key in values.keys():
+                # in SD3 original implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale;
+                # while in diffusers it split into scale, shift. Here we swap the linear projection weights in order to be able to use diffusers implementation
+                scale, shift = values[_key].chunk(2, dim=0)
+                values[_key] = torch.cat([shift, scale], dim=0)
+
+            layers[dst_key] = any_lora_layer_from_state_dict(values)
+
     def add_qkv_lora_layer_if_present(
         src_keys: list[str],
         src_weight_shapes: list[tuple[int, int]],
@@ -223,6 +236,10 @@ def add_qkv_lora_layer_if_present(
 
     # Final layer.
     add_lora_layer_if_present("proj_out", "final_layer.linear")
+    add_lora_adaLN_layer_if_present(
+        'norm_out.linear',
+        'final_layer.adaLN_modulation.1',
+    )
 
     # Assert that all keys were processed.
     assert len(grouped_state_dict) == 0

From 0c0637f90e41fd4cd41b05a0c8036f346d79d20b Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Tue, 4 Mar 2025 09:23:02 +0700
Subject: [PATCH 03/14] chore: update util function name as convention

---
 .../lora_conversions/flux_diffusers_lora_conversion_utils.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
index e5d31d26b9d..1417489e8a4 100644
--- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
+++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
@@ -82,7 +82,7 @@ def add_lora_layer_if_present(src_key: str, dst_key: str) -> None:
             values = get_lora_layer_values(src_layer_dict)
             layers[dst_key] = any_lora_layer_from_state_dict(values)
 
-    def add_lora_adaLN_layer_if_present(src_key: str, dst_key: str) -> None:
+    def add_adaLN_lora_layer_if_present(src_key: str, dst_key: str) -> None:
         if src_key in grouped_state_dict:
             src_layer_dict = grouped_state_dict.pop(src_key)
             values = get_lora_layer_values(src_layer_dict)
@@ -236,7 +236,7 @@ def add_qkv_lora_layer_if_present(
 
     # Final layer.
     add_lora_layer_if_present("proj_out", "final_layer.linear")
-    add_lora_adaLN_layer_if_present(
+    add_adaLN_lora_layer_if_present(
         'norm_out.linear',
         'final_layer.adaLN_modulation.1',
     )

From c12005e41256902733c96fbba0cad886f639615c Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Fri, 7 Mar 2025 18:20:03 +0700
Subject: [PATCH 04/14] feat: add sample lora diffuser keys with
 norm_out.linear layer to test

---
 ...lux_lora_diffusers_with_norm_out_format.py | 1012 +++++++++++++++++
 ...st_flux_diffusers_lora_conversion_utils.py |    7 +-
 2 files changed, 1017 insertions(+), 2 deletions(-)
 create mode 100644 tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py

diff --git a/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py b/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py
new file mode 100644
index 00000000000..7a9d15083d3
--- /dev/null
+++ b/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py
@@ -0,0 +1,1012 @@
+# Sample state dict in the Diffusers FLUX LoRA format.
+# This from Hyper-SD, having extra `norm_out` layer
+# From https://huggingface.co/ByteDance/Hyper-SD/tree/main?show_file_info=Hyper-FLUX.1-dev-16steps-lora.safetensors
+state_dict_keys = { 
+  "transformer.context_embedder.lora_A.weight": [64, 4096],
+  "transformer.context_embedder.lora_B.weight": [3072, 64],
+  "transformer.norm_out.linear.lora_A.weight": [64, 3072],
+  "transformer.norm_out.linear.lora_B.weight": [6144, 64],
+  "transformer.proj_out.lora_A.weight": [64, 3072],
+  "transformer.proj_out.lora_B.weight": [64, 64],
+  "transformer.single_transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.0.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.0.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.0.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.0.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.0.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.0.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.1.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.1.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.1.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.1.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.1.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.1.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.10.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.10.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.10.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.10.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.10.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.10.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.11.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.11.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.11.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.11.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.11.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.11.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.12.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.12.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.12.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.12.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.12.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.12.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.13.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.13.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.13.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.13.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.13.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.13.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.14.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.14.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.14.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.14.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.14.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.14.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.15.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.15.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.15.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.15.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.15.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.15.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.16.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.16.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.16.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.16.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.16.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.16.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.17.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.17.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.17.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.17.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.17.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.17.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.18.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.18.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.18.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.18.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.18.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.18.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.19.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.19.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.19.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.19.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.19.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.19.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.19.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.19.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.19.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.19.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.19.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.19.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.2.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.2.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.2.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.2.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.2.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.2.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.20.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.20.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.20.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.20.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.20.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.20.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.20.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.20.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.20.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.20.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.20.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.20.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.21.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.21.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.21.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.21.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.21.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.21.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.21.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.21.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.21.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.21.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.21.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.21.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.22.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.22.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.22.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.22.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.22.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.22.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.22.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.22.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.22.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.22.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.22.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.22.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.23.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.23.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.23.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.23.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.23.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.23.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.23.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.23.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.23.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.23.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.23.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.23.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.24.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.24.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.24.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.24.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.24.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.24.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.24.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.24.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.24.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.24.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.24.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.24.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.25.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.25.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.25.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.25.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.25.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.25.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.25.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.25.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.25.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.25.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.25.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.25.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.26.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.26.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.26.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.26.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.26.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.26.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.26.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.26.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.26.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.26.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.26.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.26.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.27.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.27.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.27.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.27.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.27.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.27.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.27.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.27.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.27.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.27.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.27.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.27.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.28.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.28.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.28.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.28.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.28.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.28.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.28.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.28.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.28.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.28.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.28.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.28.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.29.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.29.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.29.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.29.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.29.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.29.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.29.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.29.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.29.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.29.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.29.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.29.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.3.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.3.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.3.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.3.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.3.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.3.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.30.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.30.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.30.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.30.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.30.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.30.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.30.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.30.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.30.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.30.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.30.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.30.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.31.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.31.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.31.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.31.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.31.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.31.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.31.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.31.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.31.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.31.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.31.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.31.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.32.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.32.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.32.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.32.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.32.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.32.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.32.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.32.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.32.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.32.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.32.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.32.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.33.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.33.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.33.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.33.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.33.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.33.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.33.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.33.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.33.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.33.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.33.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.33.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.34.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.34.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.34.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.34.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.34.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.34.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.34.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.34.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.34.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.34.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.34.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.34.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.35.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.35.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.35.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.35.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.35.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.35.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.35.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.35.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.35.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.35.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.35.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.35.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.36.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.36.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.36.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.36.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.36.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.36.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.36.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.36.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.36.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.36.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.36.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.36.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.37.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.37.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.37.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.37.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.37.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.37.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.37.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.37.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.37.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.37.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.37.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.37.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.4.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.4.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.4.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.4.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.4.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.4.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.5.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.5.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.5.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.5.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.5.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.5.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.6.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.6.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.6.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.6.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.6.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.6.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.7.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.7.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.7.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.7.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.7.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.7.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.8.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.8.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.8.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.8.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.8.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.8.proj_out.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.single_transformer_blocks.9.norm.linear.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.9.norm.linear.lora_B.weight": [9216, 64],
+  "transformer.single_transformer_blocks.9.proj_mlp.lora_A.weight": [64, 3072],
+  "transformer.single_transformer_blocks.9.proj_mlp.lora_B.weight": [12288, 64],
+  "transformer.single_transformer_blocks.9.proj_out.lora_A.weight": [64, 15360],
+  "transformer.single_transformer_blocks.9.proj_out.lora_B.weight": [3072, 64],
+  "transformer.time_text_embed.guidance_embedder.linear_1.lora_A.weight": [64, 256],
+  "transformer.time_text_embed.guidance_embedder.linear_1.lora_B.weight": [3072, 64],
+  "transformer.time_text_embed.guidance_embedder.linear_2.lora_A.weight": [64, 3072],
+  "transformer.time_text_embed.guidance_embedder.linear_2.lora_B.weight": [3072, 64],
+  "transformer.time_text_embed.text_embedder.linear_1.lora_A.weight": [64, 768],
+  "transformer.time_text_embed.text_embedder.linear_1.lora_B.weight": [3072, 64],
+  "transformer.time_text_embed.text_embedder.linear_2.lora_A.weight": [64, 3072],
+  "transformer.time_text_embed.text_embedder.linear_2.lora_B.weight": [3072, 64],
+  "transformer.time_text_embed.timestep_embedder.linear_1.lora_A.weight": [64, 256],
+  "transformer.time_text_embed.timestep_embedder.linear_1.lora_B.weight": [3072, 64],
+  "transformer.time_text_embed.timestep_embedder.linear_2.lora_A.weight": [64, 3072],
+  "transformer.time_text_embed.timestep_embedder.linear_2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.0.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.0.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.0.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.0.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.0.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.0.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.0.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.0.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.0.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.0.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.0.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.0.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.0.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.0.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.0.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.0.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.0.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.0.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.0.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.0.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.1.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.1.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.1.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.1.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.1.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.1.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.1.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.1.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.1.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.1.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.1.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.1.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.1.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.1.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.1.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.1.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.1.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.1.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.1.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.1.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.10.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.10.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.10.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.10.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.10.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.10.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.10.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.10.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.10.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.10.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.10.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.10.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.10.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.10.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.10.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.10.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.10.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.10.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.10.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.10.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.11.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.11.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.11.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.11.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.11.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.11.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.11.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.11.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.11.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.11.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.11.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.11.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.11.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.11.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.11.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.11.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.11.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.11.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.11.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.11.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.12.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.12.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.12.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.12.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.12.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.12.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.12.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.12.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.12.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.12.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.12.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.12.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.12.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.12.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.12.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.12.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.12.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.12.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.12.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.12.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.13.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.13.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.13.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.13.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.13.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.13.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.13.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.13.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.13.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.13.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.13.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.13.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.13.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.13.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.13.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.13.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.13.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.13.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.13.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.13.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.14.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.14.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.14.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.14.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.14.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.14.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.14.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.14.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.14.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.14.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.14.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.14.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.14.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.14.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.14.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.14.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.14.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.14.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.14.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.14.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.15.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.15.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.15.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.15.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.15.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.15.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.15.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.15.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.15.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.15.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.15.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.15.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.15.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.15.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.15.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.15.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.15.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.15.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.15.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.15.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.16.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.16.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.16.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.16.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.16.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.16.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.16.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.16.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.16.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.16.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.16.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.16.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.16.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.16.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.16.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.16.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.16.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.16.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.16.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.16.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.17.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.17.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.17.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.17.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.17.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.17.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.17.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.17.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.17.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.17.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.17.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.17.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.17.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.17.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.17.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.17.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.17.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.17.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.17.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.17.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.18.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.18.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.18.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.18.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.18.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.18.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.18.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.18.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.18.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.18.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.18.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.18.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.18.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.18.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.18.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.18.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.18.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.18.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.18.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.18.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.2.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.2.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.2.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.2.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.2.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.2.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.2.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.2.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.2.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.2.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.2.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.2.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.2.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.2.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.2.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.2.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.2.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.2.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.2.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.2.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.3.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.3.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.3.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.3.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.3.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.3.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.3.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.3.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.3.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.3.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.3.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.3.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.3.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.3.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.3.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.3.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.3.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.3.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.3.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.3.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.4.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.4.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.4.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.4.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.4.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.4.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.4.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.4.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.4.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.4.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.4.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.4.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.4.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.4.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.4.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.4.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.4.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.4.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.4.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.4.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.5.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.5.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.5.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.5.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.5.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.5.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.5.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.5.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.5.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.5.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.5.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.5.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.5.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.5.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.5.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.5.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.5.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.5.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.5.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.5.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.6.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.6.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.6.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.6.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.6.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.6.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.6.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.6.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.6.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.6.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.6.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.6.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.6.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.6.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.6.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.6.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.6.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.6.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.6.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.6.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.7.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.7.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.7.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.7.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.7.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.7.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.7.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.7.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.7.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.7.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.7.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.7.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.7.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.7.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.7.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.7.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.7.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.7.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.7.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.7.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.8.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.8.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.8.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.8.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.8.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.8.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.8.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.8.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.8.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.8.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.8.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.8.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.8.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.8.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.8.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.8.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.8.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.8.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.8.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.8.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.9.attn.add_k_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.9.attn.add_k_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.9.attn.add_q_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.9.attn.add_q_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.9.attn.add_v_proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.9.attn.add_v_proj.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.9.attn.to_add_out.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.9.attn.to_add_out.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.9.attn.to_out.0.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.9.attn.to_out.0.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.9.ff.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.9.ff.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.9.ff.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.9.ff.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+  "transformer.transformer_blocks.9.ff_context.net.2.lora_A.weight": [64, 12288],
+  "transformer.transformer_blocks.9.ff_context.net.2.lora_B.weight": [3072, 64],
+  "transformer.transformer_blocks.9.norm1.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.9.norm1.linear.lora_B.weight": [18432, 64],
+  "transformer.transformer_blocks.9.norm1_context.linear.lora_A.weight": [64, 3072],
+  "transformer.transformer_blocks.9.norm1_context.linear.lora_B.weight": [18432, 64],
+  "transformer.x_embedder.lora_A.weight": [64, 64],
+  "transformer.x_embedder.lora_B.weight": [3072, 64] }
\ No newline at end of file
diff --git a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
index 2bdb883faff..3558eb01eee 100644
--- a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
+++ b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
@@ -15,13 +15,16 @@
 from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_no_proj_mlp_format import (
     state_dict_keys as flux_diffusers_no_proj_mlp_state_dict_keys,
 )
+from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_with_norm_out_format import (
+    state_dict_keys as flux_diffusers_with_norm_out_state_dict_keys,
+)
 from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_kohya_format import (
     state_dict_keys as flux_kohya_state_dict_keys,
 )
 from tests.backend.patches.lora_conversions.lora_state_dicts.utils import keys_to_mock_state_dict
 
 
-@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys])
+@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys, flux_diffusers_with_norm_out_state_dict_keys])
 def test_is_state_dict_likely_in_flux_diffusers_format_true(sd_keys: dict[str, list[int]]):
     """Test that is_state_dict_likely_in_flux_diffusers_format() can identify a state dict in the Diffusers FLUX LoRA format."""
     # Construct a state dict that is in the Diffusers FLUX LoRA format.
@@ -41,7 +44,7 @@ def test_is_state_dict_likely_in_flux_diffusers_format_false(sd_keys: dict[str,
     assert not is_state_dict_likely_in_flux_diffusers_format(state_dict)
 
 
-@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys])
+@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys, flux_diffusers_with_norm_out_state_dict_keys])
 def test_lora_model_from_flux_diffusers_state_dict(sd_keys: dict[str, list[int]]):
     """Test that lora_model_from_flux_diffusers_state_dict() can load a state dict in the Diffusers FLUX LoRA format."""
     # Construct a state dict that is in the Diffusers FLUX LoRA format.

From 4f1b6ce6fa2f747c892234d1a99f428838e04159 Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Fri, 7 Mar 2025 19:39:44 +0700
Subject: [PATCH 05/14] feat: add new layer type for diffusers-ada-ln

---
 .../layers/diffusers_ada_ln_lora_layer.py        | 16 ++++++++++++++++
 invokeai/backend/patches/layers/utils.py         |  8 ++++++++
 .../flux_diffusers_lora_conversion_utils.py      | 13 +++----------
 3 files changed, 27 insertions(+), 10 deletions(-)
 create mode 100644 invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py

diff --git a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py
new file mode 100644
index 00000000000..50ca1f76914
--- /dev/null
+++ b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py
@@ -0,0 +1,16 @@
+import torch
+
+from invokeai.backend.patches.layers.lora_layer import LoRALayer
+
+class DiffusersAdaLN_LoRALayer(LoRALayer):
+    '''LoRA layer converted from Diffusers AdaLN, weight is shift-scale swapped'''
+
+    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor: 
+        # In SD3 and Flux implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale;
+        # while in diffusers it split into scale, shift. 
+        # So we swap the linear projection weights in order to be able to use Flux implementation
+
+        weight = super().get_weight(orig_weight) 
+        scale, shift = weight.chunk(2, dim=0) 
+        
+        return torch.cat([shift, scale], dim=0)
\ No newline at end of file
diff --git a/invokeai/backend/patches/layers/utils.py b/invokeai/backend/patches/layers/utils.py
index 8141a56644a..86acfe992bb 100644
--- a/invokeai/backend/patches/layers/utils.py
+++ b/invokeai/backend/patches/layers/utils.py
@@ -10,6 +10,7 @@
 from invokeai.backend.patches.layers.lokr_layer import LoKRLayer
 from invokeai.backend.patches.layers.lora_layer import LoRALayer
 from invokeai.backend.patches.layers.norm_layer import NormLayer
+from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer
 
 
 def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseLayerPatch:
@@ -33,3 +34,10 @@ def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseL
         return NormLayer.from_state_dict_values(state_dict)
     else:
         raise ValueError(f"Unsupported lora format: {state_dict.keys()}")
+
+
+def diffusers_adaLN_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> DiffusersAdaLN_LoRALayer:
+    if not "lora_up.weight" in state_dict:
+        raise ValueError(f"Unsupported lora format: {state_dict.keys()}")
+    
+    return DiffusersAdaLN_LoRALayer.from_state_dict_values(state_dict)
\ No newline at end of file
diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
index 1417489e8a4..013bd4ba542 100644
--- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
+++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
@@ -4,7 +4,7 @@
 
 from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch
 from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range
-from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict
+from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, diffusers_adaLN_lora_layer_from_state_dict
 from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
 from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
 
@@ -86,15 +86,8 @@ def add_adaLN_lora_layer_if_present(src_key: str, dst_key: str) -> None:
         if src_key in grouped_state_dict:
             src_layer_dict = grouped_state_dict.pop(src_key)
             values = get_lora_layer_values(src_layer_dict)
-            
-            for _key in values.keys():
-                # in SD3 original implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale;
-                # while in diffusers it split into scale, shift. Here we swap the linear projection weights in order to be able to use diffusers implementation
-                scale, shift = values[_key].chunk(2, dim=0)
-                values[_key] = torch.cat([shift, scale], dim=0)
-
-            layers[dst_key] = any_lora_layer_from_state_dict(values)
-
+            layers[dst_key] = diffusers_adaLN_lora_layer_from_state_dict(values)
+    
     def add_qkv_lora_layer_if_present(
         src_keys: list[str],
         src_weight_shapes: list[tuple[int, int]],

From b087694dd7e0a87cc821d6bbe2d1dd036c9ec4b0 Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Sun, 9 Mar 2025 10:28:40 +0700
Subject: [PATCH 06/14] feat: add tests for DiffuserAdaLN layer logic

---
 .../layers/diffusers_ada_ln_lora_layer.py     |  8 ++-
 .../layers/test_diffuser_ada_ln_lora_layer.py | 55 +++++++++++++++++++
 2 files changed, 60 insertions(+), 3 deletions(-)
 create mode 100644 tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py

diff --git a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py
index 50ca1f76914..a2d93531293 100644
--- a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py
+++ b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py
@@ -2,6 +2,10 @@
 
 from invokeai.backend.patches.layers.lora_layer import LoRALayer
 
+def swap_shift_scale(tensor: torch.Tensor) -> torch.Tensor:
+    scale, shift = tensor.chunk(2, dim=0) 
+    return torch.cat([shift, scale], dim=0)
+
 class DiffusersAdaLN_LoRALayer(LoRALayer):
     '''LoRA layer converted from Diffusers AdaLN, weight is shift-scale swapped'''
 
@@ -11,6 +15,4 @@ def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
         # So we swap the linear projection weights in order to be able to use Flux implementation
 
         weight = super().get_weight(orig_weight) 
-        scale, shift = weight.chunk(2, dim=0) 
-        
-        return torch.cat([shift, scale], dim=0)
\ No newline at end of file
+        return swap_shift_scale(weight)
\ No newline at end of file
diff --git a/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py b/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py
new file mode 100644
index 00000000000..2c1afa7daa0
--- /dev/null
+++ b/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py
@@ -0,0 +1,55 @@
+import torch
+
+from invokeai.backend.patches.layers.lora_layer import LoRALayer
+from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer, swap_shift_scale
+
+def test_swap_shift_scale_for_tensor():
+    """Test swaping function"""
+    tensor = torch.Tensor([1, 2])
+    expected = torch.Tensor([2, 1])
+
+    swapped = swap_shift_scale(tensor)
+    assert(torch.allclose(expected, swapped))
+
+    size= (3, 4)
+    first = torch.randn(size)
+    second = torch.randn(size)
+
+    tensor = torch.concat([first, second])
+    expected = torch.concat([second, first])
+
+    swapped = swap_shift_scale(tensor)
+    assert(torch.allclose(expected, swapped))
+
+def test_diffusers_adaLN_lora_layer_get_weight():
+    """Test getting weight from DiffusersAdaLN_LoRALayer."""
+    small_in_features = 4
+    big_in_features = 8
+    out_features = 16
+    rank = 4
+    alpha = 16.0
+
+    lora = LoRALayer(
+        up=torch.ones(out_features, rank), 
+        mid=None, 
+        down=torch.ones(rank, big_in_features), 
+        alpha=alpha, 
+        bias=None
+    )
+    layer = DiffusersAdaLN_LoRALayer(
+        up=torch.ones(out_features, rank), 
+        mid=None, 
+        down=torch.ones(rank, big_in_features), 
+        alpha=alpha, 
+        bias=None
+    )
+
+    # mock original weight, normally ignored in our loRA
+    orig_weight = torch.ones(small_in_features)
+
+    diffuser_weight = layer.get_weight(orig_weight)
+    lora_weight = lora.get_weight(orig_weight)
+
+    # diffusers lora weight should be flipped
+    assert(torch.allclose(diffuser_weight, swap_shift_scale(lora_weight)))
+

From 701e9dc6bef18ccdee128e8da9e878a23e0c811b Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Sun, 9 Mar 2025 10:31:34 +0700
Subject: [PATCH 07/14] feat: add adaLN for custom module test

---
 .../custom_modules/test_all_custom_modules.py        | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py b/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py
index c1e77c333bb..8dc240eb687 100644
--- a/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py
+++ b/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py
@@ -14,6 +14,7 @@
 from invokeai.backend.patches.layer_patcher import LayerPatcher
 from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch
 from invokeai.backend.patches.layers.flux_control_lora_layer import FluxControlLoRALayer
+from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer
 from invokeai.backend.patches.layers.lokr_layer import LoKRLayer
 from invokeai.backend.patches.layers.lora_layer import LoRALayer
 from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range
@@ -283,6 +284,7 @@ def test_inference_autocast_from_cpu_to_device(device: str, layer_under_test: La
         "multiple_loras",
         "concatenated_lora",
         "flux_control_lora",
+        "diffusers_adaLN_lora",
         "single_lokr",
     ]
 )
@@ -370,6 +372,16 @@ def patch_under_test(request: pytest.FixtureRequest) -> PatchUnderTest:
         )
         input = torch.randn(1, in_features)
         return ([(lokr_layer, 0.7)], input)
+    elif layer_type == "diffusers_adaLN_lora":
+        lora_layer = DiffusersAdaLN_LoRALayer(
+            up=torch.randn(out_features, rank),
+            mid=None,
+            down=torch.randn(rank, in_features),
+            alpha=1.0,
+            bias=torch.randn(out_features),
+        )
+        input = torch.randn(1, in_features)
+        return ([(lora_layer, 0.7)], input)
     else:
         raise ValueError(f"Unsupported layer_type: {layer_type}")
 

From bb351a625276a18116e74630a4537d2daf13d9a5 Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Mon, 10 Mar 2025 14:41:58 +0700
Subject: [PATCH 08/14] feat: refine swap logic

---
 .../patches/layers/diffusers_ada_ln_lora_layer.py    |  5 +++--
 .../layers/test_diffuser_ada_ln_lora_layer.py        | 12 +++++++-----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py
index a2d93531293..46dda7e4dc9 100644
--- a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py
+++ b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py
@@ -14,5 +14,6 @@ def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor:
         # while in diffusers it split into scale, shift. 
         # So we swap the linear projection weights in order to be able to use Flux implementation
 
-        weight = super().get_weight(orig_weight) 
-        return swap_shift_scale(weight)
\ No newline at end of file
+        weight = super().get_weight(orig_weight)
+        weight = swap_shift_scale(weight)
+        return weight
\ No newline at end of file
diff --git a/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py b/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py
index 2c1afa7daa0..d0917299282 100644
--- a/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py
+++ b/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py
@@ -29,14 +29,14 @@ def test_diffusers_adaLN_lora_layer_get_weight():
     rank = 4
     alpha = 16.0
 
-    lora = LoRALayer(
+    normal_layer = LoRALayer(
         up=torch.ones(out_features, rank), 
         mid=None, 
         down=torch.ones(rank, big_in_features), 
         alpha=alpha, 
         bias=None
     )
-    layer = DiffusersAdaLN_LoRALayer(
+    diffuser_adaLN_layer = DiffusersAdaLN_LoRALayer(
         up=torch.ones(out_features, rank), 
         mid=None, 
         down=torch.ones(rank, big_in_features), 
@@ -44,12 +44,14 @@ def test_diffusers_adaLN_lora_layer_get_weight():
         bias=None
     )
 
+    assert(isinstance(diffuser_adaLN_layer, LoRALayer))
+
     # mock original weight, normally ignored in our loRA
     orig_weight = torch.ones(small_in_features)
 
-    diffuser_weight = layer.get_weight(orig_weight)
-    lora_weight = lora.get_weight(orig_weight)
+    diffuser_weight = diffuser_adaLN_layer.get_weight(orig_weight)
+    normal_weight = normal_layer.get_weight(orig_weight)
 
     # diffusers lora weight should be flipped
-    assert(torch.allclose(diffuser_weight, swap_shift_scale(lora_weight)))
+    assert(torch.allclose(diffuser_weight, swap_shift_scale(normal_weight)))
 

From 4af72730ee2aea1f3e5dd2d0321cfa941cc0c922 Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Tue, 11 Mar 2025 11:53:00 +0700
Subject: [PATCH 09/14] feat: approximate adaLN layer for more compatibility

---
 .../layers/diffusers_ada_ln_lora_layer.py     | 19 -----
 invokeai/backend/patches/layers/utils.py      | 71 +++++++++++++++++--
 .../flux_diffusers_lora_conversion_utils.py   |  4 +-
 .../layers/test_diffuser_ada_ln_lora_layer.py | 57 ---------------
 .../patches/layers/test_layer_utils.py        | 46 ++++++++++++
 5 files changed, 114 insertions(+), 83 deletions(-)
 delete mode 100644 invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py
 delete mode 100644 tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py
 create mode 100644 tests/backend/patches/layers/test_layer_utils.py

diff --git a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py
deleted file mode 100644
index 46dda7e4dc9..00000000000
--- a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import torch
-
-from invokeai.backend.patches.layers.lora_layer import LoRALayer
-
-def swap_shift_scale(tensor: torch.Tensor) -> torch.Tensor:
-    scale, shift = tensor.chunk(2, dim=0) 
-    return torch.cat([shift, scale], dim=0)
-
-class DiffusersAdaLN_LoRALayer(LoRALayer):
-    '''LoRA layer converted from Diffusers AdaLN, weight is shift-scale swapped'''
-
-    def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor: 
-        # In SD3 and Flux implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale;
-        # while in diffusers it split into scale, shift. 
-        # So we swap the linear projection weights in order to be able to use Flux implementation
-
-        weight = super().get_weight(orig_weight)
-        weight = swap_shift_scale(weight)
-        return weight
\ No newline at end of file
diff --git a/invokeai/backend/patches/layers/utils.py b/invokeai/backend/patches/layers/utils.py
index 86acfe992bb..1f8217fe2b6 100644
--- a/invokeai/backend/patches/layers/utils.py
+++ b/invokeai/backend/patches/layers/utils.py
@@ -1,4 +1,4 @@
-from typing import Dict
+from typing import Dict, Tuple
 
 import torch
 
@@ -10,7 +10,6 @@
 from invokeai.backend.patches.layers.lokr_layer import LoKRLayer
 from invokeai.backend.patches.layers.lora_layer import LoRALayer
 from invokeai.backend.patches.layers.norm_layer import NormLayer
-from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer
 
 
 def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseLayerPatch:
@@ -36,8 +35,70 @@ def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseL
         raise ValueError(f"Unsupported lora format: {state_dict.keys()}")
 
 
-def diffusers_adaLN_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> DiffusersAdaLN_LoRALayer:
+
+def swap_shift_scale_for_linear_weight(weight: torch.Tensor) -> torch.Tensor:
+    """Swap shift/scale for given linear layer back and forth"""
+    # In SD3 and Flux implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale;
+    # while in diffusers it split into scale, shift. This will flip them around
+    chunk1, chunk2 = weight.chunk(2, dim=0) 
+    return torch.cat([chunk2, chunk1], dim=0)
+
+def decomposite_weight_matric_with_rank(
+    delta: torch.Tensor,
+    rank: int,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Decompose given matrix with a specified rank."""
+    U, S, V = torch.svd(delta)
+
+    # Truncate to rank r:
+    U_r = U[:, :rank]
+    S_r = S[:rank]
+    V_r = V[:, :rank]
+
+    S_sqrt = torch.sqrt(S_r)
+
+    up = torch.matmul(U_r, torch.diag(S_sqrt))
+    down = torch.matmul(torch.diag(S_sqrt), V_r.T)
+
+    return up, down
+
+
+def approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRALayer:
+    '''Approximate given diffusers AdaLN loRA layer in our Flux model'''
+
     if not "lora_up.weight" in state_dict:
-        raise ValueError(f"Unsupported lora format: {state_dict.keys()}")
+        raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_up")
     
-    return DiffusersAdaLN_LoRALayer.from_state_dict_values(state_dict)
\ No newline at end of file
+    if not "lora_down.weight" in state_dict:
+        raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_down")
+    
+    up = state_dict.pop('lora_up.weight')
+    down = state_dict.pop('lora_down.weight')
+
+    dtype = up.dtype
+    device = up.device
+    up_shape = up.shape
+    down_shape = down.shape
+    
+    # desired low rank
+    rank = up_shape[1]
+
+    # up scaling for more precise
+    up.double()
+    down.double()
+    weight  = up.reshape(up.shape[0], -1) @ down.reshape(down.shape[0], -1)
+
+    # swap to our linear format
+    swapped = swap_shift_scale_for_linear_weight(weight)
+
+    _up, _down = decomposite_weight_matric_with_rank(swapped, rank)
+
+    assert(_up.shape == up_shape)
+    assert(_down.shape == down_shape)
+
+    # down scaling to original dtype, device
+    state_dict['lora_up.weight'] = _up.to(dtype).to(device=device)
+    state_dict['lora_down.weight'] = _down.to(dtype).to(device=device)
+
+    return LoRALayer.from_state_dict_values(state_dict)
+
diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
index 013bd4ba542..152129883ae 100644
--- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
+++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
@@ -4,7 +4,7 @@
 
 from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch
 from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range
-from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, diffusers_adaLN_lora_layer_from_state_dict
+from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, approximate_flux_adaLN_lora_layer_from_diffusers_state_dict
 from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
 from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
 
@@ -86,7 +86,7 @@ def add_adaLN_lora_layer_if_present(src_key: str, dst_key: str) -> None:
         if src_key in grouped_state_dict:
             src_layer_dict = grouped_state_dict.pop(src_key)
             values = get_lora_layer_values(src_layer_dict)
-            layers[dst_key] = diffusers_adaLN_lora_layer_from_state_dict(values)
+            layers[dst_key] = approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(values)
     
     def add_qkv_lora_layer_if_present(
         src_keys: list[str],
diff --git a/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py b/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py
deleted file mode 100644
index d0917299282..00000000000
--- a/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import torch
-
-from invokeai.backend.patches.layers.lora_layer import LoRALayer
-from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer, swap_shift_scale
-
-def test_swap_shift_scale_for_tensor():
-    """Test swaping function"""
-    tensor = torch.Tensor([1, 2])
-    expected = torch.Tensor([2, 1])
-
-    swapped = swap_shift_scale(tensor)
-    assert(torch.allclose(expected, swapped))
-
-    size= (3, 4)
-    first = torch.randn(size)
-    second = torch.randn(size)
-
-    tensor = torch.concat([first, second])
-    expected = torch.concat([second, first])
-
-    swapped = swap_shift_scale(tensor)
-    assert(torch.allclose(expected, swapped))
-
-def test_diffusers_adaLN_lora_layer_get_weight():
-    """Test getting weight from DiffusersAdaLN_LoRALayer."""
-    small_in_features = 4
-    big_in_features = 8
-    out_features = 16
-    rank = 4
-    alpha = 16.0
-
-    normal_layer = LoRALayer(
-        up=torch.ones(out_features, rank), 
-        mid=None, 
-        down=torch.ones(rank, big_in_features), 
-        alpha=alpha, 
-        bias=None
-    )
-    diffuser_adaLN_layer = DiffusersAdaLN_LoRALayer(
-        up=torch.ones(out_features, rank), 
-        mid=None, 
-        down=torch.ones(rank, big_in_features), 
-        alpha=alpha, 
-        bias=None
-    )
-
-    assert(isinstance(diffuser_adaLN_layer, LoRALayer))
-
-    # mock original weight, normally ignored in our loRA
-    orig_weight = torch.ones(small_in_features)
-
-    diffuser_weight = diffuser_adaLN_layer.get_weight(orig_weight)
-    normal_weight = normal_layer.get_weight(orig_weight)
-
-    # diffusers lora weight should be flipped
-    assert(torch.allclose(diffuser_weight, swap_shift_scale(normal_weight)))
-
diff --git a/tests/backend/patches/layers/test_layer_utils.py b/tests/backend/patches/layers/test_layer_utils.py
new file mode 100644
index 00000000000..3deca37b38b
--- /dev/null
+++ b/tests/backend/patches/layers/test_layer_utils.py
@@ -0,0 +1,46 @@
+import torch
+
+from invokeai.backend.patches.layers.utils import decomposite_weight_matric_with_rank, swap_shift_scale_for_linear_weight
+
+
+def test_swap_shift_scale_for_linear_weight():
+    """Test that swaping should work"""
+    original = torch.Tensor([1, 2])
+    expected = torch.Tensor([2, 1])
+
+    swapped = swap_shift_scale_for_linear_weight(original)
+    assert(torch.allclose(expected, swapped))
+
+    size= (3, 4)
+    first = torch.randn(size)
+    second = torch.randn(size)
+
+    original = torch.concat([first, second])
+    expected = torch.concat([second, first])
+
+    swapped = swap_shift_scale_for_linear_weight(original)
+    assert(torch.allclose(expected, swapped))
+
+    # call this twice will reconstruct the original
+    reconstructed = swap_shift_scale_for_linear_weight(swapped)
+    assert(torch.allclose(reconstructed, original))
+
+def test_decomposite_weight_matric_with_rank():
+    """Test that decompsition of given matrix into 2 low rank matrices work"""
+    input_dim = 1024
+    output_dim = 1024
+    rank = 8  # Low rank
+
+
+    A = torch.randn(input_dim, rank).double()
+    B = torch.randn(rank, output_dim).double()
+    W0 = A @ B
+
+    C, D = decomposite_weight_matric_with_rank(W0, rank)
+    R = C @ D
+
+    assert(C.shape == A.shape)
+    assert(D.shape == B.shape)
+
+    assert torch.allclose(W0, R)
+

From 0cad89d3c8236ef2213695788fa564528250a1ef Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Tue, 11 Mar 2025 14:36:08 +0700
Subject: [PATCH 10/14] feat: refactor conversion module, add test for svd
 correctness

---
 invokeai/backend/patches/layers/utils.py      | 44 +--------------
 .../flux_diffusers_lora_conversion_utils.py   | 46 +++++++++++++++-
 ...st_flux_diffusers_lora_conversion_utils.py | 55 +++++++++++++++++++
 3 files changed, 102 insertions(+), 43 deletions(-)

diff --git a/invokeai/backend/patches/layers/utils.py b/invokeai/backend/patches/layers/utils.py
index 1f8217fe2b6..2d2b99763b7 100644
--- a/invokeai/backend/patches/layers/utils.py
+++ b/invokeai/backend/patches/layers/utils.py
@@ -46,6 +46,7 @@ def swap_shift_scale_for_linear_weight(weight: torch.Tensor) -> torch.Tensor:
 def decomposite_weight_matric_with_rank(
     delta: torch.Tensor,
     rank: int,
+    epsilon: float = 1e-8,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
     """Decompose given matrix with a specified rank."""
     U, S, V = torch.svd(delta)
@@ -55,50 +56,9 @@ def decomposite_weight_matric_with_rank(
     S_r = S[:rank]
     V_r = V[:, :rank]
 
-    S_sqrt = torch.sqrt(S_r)
+    S_sqrt = torch.sqrt(S_r + epsilon) # regularization
 
     up = torch.matmul(U_r, torch.diag(S_sqrt))
     down = torch.matmul(torch.diag(S_sqrt), V_r.T)
 
     return up, down
-
-
-def approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRALayer:
-    '''Approximate given diffusers AdaLN loRA layer in our Flux model'''
-
-    if not "lora_up.weight" in state_dict:
-        raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_up")
-    
-    if not "lora_down.weight" in state_dict:
-        raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_down")
-    
-    up = state_dict.pop('lora_up.weight')
-    down = state_dict.pop('lora_down.weight')
-
-    dtype = up.dtype
-    device = up.device
-    up_shape = up.shape
-    down_shape = down.shape
-    
-    # desired low rank
-    rank = up_shape[1]
-
-    # up scaling for more precise
-    up.double()
-    down.double()
-    weight  = up.reshape(up.shape[0], -1) @ down.reshape(down.shape[0], -1)
-
-    # swap to our linear format
-    swapped = swap_shift_scale_for_linear_weight(weight)
-
-    _up, _down = decomposite_weight_matric_with_rank(swapped, rank)
-
-    assert(_up.shape == up_shape)
-    assert(_down.shape == down_shape)
-
-    # down scaling to original dtype, device
-    state_dict['lora_up.weight'] = _up.to(dtype).to(device=device)
-    state_dict['lora_down.weight'] = _down.to(dtype).to(device=device)
-
-    return LoRALayer.from_state_dict_values(state_dict)
-
diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
index 152129883ae..530e9954ee9 100644
--- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
+++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
@@ -2,9 +2,10 @@
 
 import torch
 
+from invokeai.backend.patches.layers.lora_layer import LoRALayer
 from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch
 from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range
-from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, approximate_flux_adaLN_lora_layer_from_diffusers_state_dict
+from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, swap_shift_scale_for_linear_weight, decomposite_weight_matric_with_rank
 from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
 from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
 
@@ -29,6 +30,49 @@ def is_state_dict_likely_in_flux_diffusers_format(state_dict: Dict[str, torch.Te
 
     return all_keys_in_peft_format and all_expected_keys_present
 
+def approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRALayer:
+    '''Approximate given diffusers AdaLN loRA layer in our Flux model'''
+
+    if not "lora_up.weight" in state_dict:
+        raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_up")
+    
+    if not "lora_down.weight" in state_dict:
+        raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_down")
+    
+    up = state_dict.pop('lora_up.weight')
+    down = state_dict.pop('lora_down.weight')
+
+    # layer-patcher upcast things to f32, 
+    # we want to maintain a better precison for this one
+    dtype = torch.float32
+
+    device = up.device
+    up_shape = up.shape
+    down_shape = down.shape
+    
+    # desired low rank
+    rank = up_shape[1]
+
+    # up scaling for more precise
+    up = up.to(torch.float32)
+    down = down.to(torch.float32)
+
+    weight  = up.reshape(up_shape[0], -1) @ down.reshape(down_shape[0], -1)
+
+    # swap to our linear format
+    swapped = swap_shift_scale_for_linear_weight(weight)
+
+    _up, _down = decomposite_weight_matric_with_rank(swapped, rank)
+
+    assert(_up.shape == up_shape)
+    assert(_down.shape == down_shape)
+
+    # down scaling to original dtype, device
+    state_dict['lora_up.weight'] = _up.to(dtype).to(device=device)
+    state_dict['lora_down.weight'] = _down.to(dtype).to(device=device)
+
+    return LoRALayer.from_state_dict_values(state_dict)
+
 
 def lora_model_from_flux_diffusers_state_dict(
     state_dict: Dict[str, torch.Tensor], alpha: float | None
diff --git a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
index 3558eb01eee..77e6c4e9055 100644
--- a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
+++ b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
@@ -1,9 +1,12 @@
 import pytest
 import torch
 
+
+from invokeai.backend.patches.layers.utils import swap_shift_scale_for_linear_weight
 from invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils import (
     is_state_dict_likely_in_flux_diffusers_format,
     lora_model_from_flux_diffusers_state_dict,
+    approximate_flux_adaLN_lora_layer_from_diffusers_state_dict,
 )
 from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
 from tests.backend.patches.lora_conversions.lora_state_dicts.flux_dora_onetrainer_format import (
@@ -78,3 +81,55 @@ def test_lora_model_from_flux_diffusers_state_dict_extra_keys_error():
     # Check that an error is raised.
     with pytest.raises(AssertionError):
         lora_model_from_flux_diffusers_state_dict(state_dict, alpha=8.0)
+
+
+@pytest.mark.parametrize("layer_sd_keys",[
+    {}, # no keys
+    {'lora_A.weight': [1024, 8], 'lora_B.weight': [8, 512]}, # wrong keys
+    {'lora_up.weight': [1024, 8],}, # missing key
+    {'lora_down.weight': [8, 512],}, # missing key
+])
+def test_approximate_adaLN_from_state_dict_should_only_accept_vanilla_LoRA_format(layer_sd_keys: dict[str, list[int]]):
+    """Should only accept the valid state dict"""
+    layer_state_dict = keys_to_mock_state_dict(layer_sd_keys)
+
+    with pytest.raises(ValueError):
+        approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(layer_state_dict)
+
+
+@pytest.mark.parametrize("dtype, rtol", [
+   (torch.float32, 1e-4),
+   (torch.half, 1e-3),
+])
+def test_approximate_adaLN_from_state_dict_should_work(dtype: torch.dtype, rtol: float, rate: float = 0.99):
+    """Test that we should approximate good enough adaLN layer from diffusers state dict.
+    This should tolorance some kind of errorness respect to input dtype"""
+    input_dim = 1024
+    output_dim = 512
+    rank = 8  # Low rank
+    total = input_dim * output_dim
+
+    up = torch.randn(input_dim, rank, dtype=dtype)
+    down = torch.randn(rank, output_dim, dtype=dtype)
+
+    layer_state_dict = {
+        'lora_up.weight': up,
+        'lora_down.weight': down
+    }
+
+    # XXX Layer patcher cast things to f32
+    original = up.float() @ down.float()
+    swapped = swap_shift_scale_for_linear_weight(original)
+
+    layer = approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(layer_state_dict)
+    weight = layer.get_weight(original).float()
+
+    print(weight.dtype, swapped.dtype, layer.up.dtype)
+
+    close_count = torch.isclose(weight, swapped, rtol=rtol).sum().item()
+    close_rate = close_count / total
+
+    assert close_rate > rate
+
+
+

From 26b21aef54676c07e1b51beb484dd2d9b6b5532f Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Tue, 11 Mar 2025 14:55:04 +0700
Subject: [PATCH 11/14] feat: verify function called while converting model

---
 ...test_flux_diffusers_lora_conversion_utils.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
index 77e6c4e9055..837c922388b 100644
--- a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
+++ b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
@@ -1,5 +1,7 @@
+import unittest.mock
 import pytest
 import torch
+import unittest
 
 
 from invokeai.backend.patches.layers.utils import swap_shift_scale_for_linear_weight
@@ -131,5 +133,20 @@ def test_approximate_adaLN_from_state_dict_should_work(dtype: torch.dtype, rtol:
 
     assert close_rate > rate
 
+def test_adaLN_should_be_approximated_if_present_while_converting():
+    """AdaLN layer should be approximated if existed inside given model"""
+    state_dict = keys_to_mock_state_dict(flux_diffusers_with_norm_out_state_dict_keys)
 
+    adaLN_layer_key = 'final_layer.adaLN_modulation.1'
+    prefixed_layer_key = FLUX_LORA_TRANSFORMER_PREFIX + adaLN_layer_key
 
+    with unittest.mock.patch(
+        'invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils.approximate_flux_adaLN_lora_layer_from_diffusers_state_dict'
+    ) as mock_approximate_func:
+        model = lora_model_from_flux_diffusers_state_dict(state_dict, alpha=8.0)
+
+        # Check that the model has the correct number of LoRA layers.
+        assert all(k.startswith(FLUX_LORA_TRANSFORMER_PREFIX) for k in model.layers.keys())
+
+        assert prefixed_layer_key in model.layers.keys()
+        assert mock_approximate_func.call_count == 1

From 970b2a80140b05735278a18cb78a38d0ce56a0ec Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Tue, 11 Mar 2025 14:58:07 +0700
Subject: [PATCH 12/14] chore: fix import path

---
 .../test_flux_diffusers_lora_conversion_utils.py             | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
index 837c922388b..c7dfaff8812 100644
--- a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
+++ b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
@@ -1,7 +1,6 @@
-import unittest.mock
 import pytest
 import torch
-import unittest
+from unittest import mock
 
 
 from invokeai.backend.patches.layers.utils import swap_shift_scale_for_linear_weight
@@ -140,7 +139,7 @@ def test_adaLN_should_be_approximated_if_present_while_converting():
     adaLN_layer_key = 'final_layer.adaLN_modulation.1'
     prefixed_layer_key = FLUX_LORA_TRANSFORMER_PREFIX + adaLN_layer_key
 
-    with unittest.mock.patch(
+    with mock.patch(
         'invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils.approximate_flux_adaLN_lora_layer_from_diffusers_state_dict'
     ) as mock_approximate_func:
         model = lora_model_from_flux_diffusers_state_dict(state_dict, alpha=8.0)

From aa58da9aa50e5c63623b2cce586a174b07de2ed1 Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Tue, 11 Mar 2025 15:15:17 +0700
Subject: [PATCH 13/14] chore: remove unused test setup

---
 .../custom_modules/test_all_custom_modules.py        | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py b/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py
index 8dc240eb687..c1e77c333bb 100644
--- a/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py
+++ b/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py
@@ -14,7 +14,6 @@
 from invokeai.backend.patches.layer_patcher import LayerPatcher
 from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch
 from invokeai.backend.patches.layers.flux_control_lora_layer import FluxControlLoRALayer
-from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer
 from invokeai.backend.patches.layers.lokr_layer import LoKRLayer
 from invokeai.backend.patches.layers.lora_layer import LoRALayer
 from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range
@@ -284,7 +283,6 @@ def test_inference_autocast_from_cpu_to_device(device: str, layer_under_test: La
         "multiple_loras",
         "concatenated_lora",
         "flux_control_lora",
-        "diffusers_adaLN_lora",
         "single_lokr",
     ]
 )
@@ -372,16 +370,6 @@ def patch_under_test(request: pytest.FixtureRequest) -> PatchUnderTest:
         )
         input = torch.randn(1, in_features)
         return ([(lokr_layer, 0.7)], input)
-    elif layer_type == "diffusers_adaLN_lora":
-        lora_layer = DiffusersAdaLN_LoRALayer(
-            up=torch.randn(out_features, rank),
-            mid=None,
-            down=torch.randn(rank, in_features),
-            alpha=1.0,
-            bias=torch.randn(out_features),
-        )
-        input = torch.randn(1, in_features)
-        return ([(lora_layer, 0.7)], input)
     else:
         raise ValueError(f"Unsupported layer_type: {layer_type}")
 

From a5d23ade5b206ce16a2d720c232c6e41b66bfbd5 Mon Sep 17 00:00:00 2001
From: simpletrontdip <thongitbk@gmail.com>
Date: Fri, 14 Mar 2025 14:27:35 +0700
Subject: [PATCH 14/14] chore: ruff fix

---
 invokeai/backend/patches/layers/utils.py      |    6 +-
 .../flux_diffusers_lora_conversion_utils.py   |   43 +-
 .../patches/layers/test_layer_utils.py        |   20 +-
 ...lux_lora_diffusers_with_norm_out_format.py | 2019 +++++++++--------
 ...st_flux_diffusers_lora_conversion_utils.py |   64 +-
 5 files changed, 1091 insertions(+), 1061 deletions(-)

diff --git a/invokeai/backend/patches/layers/utils.py b/invokeai/backend/patches/layers/utils.py
index 2d2b99763b7..778884cacc1 100644
--- a/invokeai/backend/patches/layers/utils.py
+++ b/invokeai/backend/patches/layers/utils.py
@@ -35,14 +35,14 @@ def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseL
         raise ValueError(f"Unsupported lora format: {state_dict.keys()}")
 
 
-
 def swap_shift_scale_for_linear_weight(weight: torch.Tensor) -> torch.Tensor:
     """Swap shift/scale for given linear layer back and forth"""
     # In SD3 and Flux implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale;
     # while in diffusers it split into scale, shift. This will flip them around
-    chunk1, chunk2 = weight.chunk(2, dim=0) 
+    chunk1, chunk2 = weight.chunk(2, dim=0)
     return torch.cat([chunk2, chunk1], dim=0)
 
+
 def decomposite_weight_matric_with_rank(
     delta: torch.Tensor,
     rank: int,
@@ -56,7 +56,7 @@ def decomposite_weight_matric_with_rank(
     S_r = S[:rank]
     V_r = V[:, :rank]
 
-    S_sqrt = torch.sqrt(S_r + epsilon) # regularization
+    S_sqrt = torch.sqrt(S_r + epsilon)  # regularization
 
     up = torch.matmul(U_r, torch.diag(S_sqrt))
     down = torch.matmul(torch.diag(S_sqrt), V_r.T)
diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
index 530e9954ee9..71f4cbecdde 100644
--- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
+++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py
@@ -2,10 +2,14 @@
 
 import torch
 
-from invokeai.backend.patches.layers.lora_layer import LoRALayer
 from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch
+from invokeai.backend.patches.layers.lora_layer import LoRALayer
 from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range
-from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, swap_shift_scale_for_linear_weight, decomposite_weight_matric_with_rank
+from invokeai.backend.patches.layers.utils import (
+    any_lora_layer_from_state_dict,
+    decomposite_weight_matric_with_rank,
+    swap_shift_scale_for_linear_weight,
+)
 from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
 from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
 
@@ -30,26 +34,27 @@ def is_state_dict_likely_in_flux_diffusers_format(state_dict: Dict[str, torch.Te
 
     return all_keys_in_peft_format and all_expected_keys_present
 
+
 def approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRALayer:
-    '''Approximate given diffusers AdaLN loRA layer in our Flux model'''
+    """Approximate given diffusers AdaLN loRA layer in our Flux model"""
 
-    if not "lora_up.weight" in state_dict:
+    if "lora_up.weight" not in state_dict:
         raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_up")
-    
-    if not "lora_down.weight" in state_dict:
+
+    if "lora_down.weight" not in state_dict:
         raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_down")
-    
-    up = state_dict.pop('lora_up.weight')
-    down = state_dict.pop('lora_down.weight')
 
-    # layer-patcher upcast things to f32, 
+    up = state_dict.pop("lora_up.weight")
+    down = state_dict.pop("lora_down.weight")
+
+    # layer-patcher upcast things to f32,
     # we want to maintain a better precison for this one
     dtype = torch.float32
 
     device = up.device
     up_shape = up.shape
     down_shape = down.shape
-    
+
     # desired low rank
     rank = up_shape[1]
 
@@ -57,19 +62,19 @@ def approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(state_dict: Dict
     up = up.to(torch.float32)
     down = down.to(torch.float32)
 
-    weight  = up.reshape(up_shape[0], -1) @ down.reshape(down_shape[0], -1)
+    weight = up.reshape(up_shape[0], -1) @ down.reshape(down_shape[0], -1)
 
     # swap to our linear format
     swapped = swap_shift_scale_for_linear_weight(weight)
 
     _up, _down = decomposite_weight_matric_with_rank(swapped, rank)
 
-    assert(_up.shape == up_shape)
-    assert(_down.shape == down_shape)
+    assert _up.shape == up_shape
+    assert _down.shape == down_shape
 
     # down scaling to original dtype, device
-    state_dict['lora_up.weight'] = _up.to(dtype).to(device=device)
-    state_dict['lora_down.weight'] = _down.to(dtype).to(device=device)
+    state_dict["lora_up.weight"] = _up.to(dtype).to(device=device)
+    state_dict["lora_down.weight"] = _down.to(dtype).to(device=device)
 
     return LoRALayer.from_state_dict_values(state_dict)
 
@@ -131,7 +136,7 @@ def add_adaLN_lora_layer_if_present(src_key: str, dst_key: str) -> None:
             src_layer_dict = grouped_state_dict.pop(src_key)
             values = get_lora_layer_values(src_layer_dict)
             layers[dst_key] = approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(values)
-    
+
     def add_qkv_lora_layer_if_present(
         src_keys: list[str],
         src_weight_shapes: list[tuple[int, int]],
@@ -274,8 +279,8 @@ def add_qkv_lora_layer_if_present(
     # Final layer.
     add_lora_layer_if_present("proj_out", "final_layer.linear")
     add_adaLN_lora_layer_if_present(
-        'norm_out.linear',
-        'final_layer.adaLN_modulation.1',
+        "norm_out.linear",
+        "final_layer.adaLN_modulation.1",
     )
 
     # Assert that all keys were processed.
diff --git a/tests/backend/patches/layers/test_layer_utils.py b/tests/backend/patches/layers/test_layer_utils.py
index 3deca37b38b..2383ec3bacf 100644
--- a/tests/backend/patches/layers/test_layer_utils.py
+++ b/tests/backend/patches/layers/test_layer_utils.py
@@ -1,6 +1,9 @@
 import torch
 
-from invokeai.backend.patches.layers.utils import decomposite_weight_matric_with_rank, swap_shift_scale_for_linear_weight
+from invokeai.backend.patches.layers.utils import (
+    decomposite_weight_matric_with_rank,
+    swap_shift_scale_for_linear_weight,
+)
 
 
 def test_swap_shift_scale_for_linear_weight():
@@ -9,9 +12,9 @@ def test_swap_shift_scale_for_linear_weight():
     expected = torch.Tensor([2, 1])
 
     swapped = swap_shift_scale_for_linear_weight(original)
-    assert(torch.allclose(expected, swapped))
+    assert torch.allclose(expected, swapped)
 
-    size= (3, 4)
+    size = (3, 4)
     first = torch.randn(size)
     second = torch.randn(size)
 
@@ -19,11 +22,12 @@ def test_swap_shift_scale_for_linear_weight():
     expected = torch.concat([second, first])
 
     swapped = swap_shift_scale_for_linear_weight(original)
-    assert(torch.allclose(expected, swapped))
+    assert torch.allclose(expected, swapped)
 
     # call this twice will reconstruct the original
     reconstructed = swap_shift_scale_for_linear_weight(swapped)
-    assert(torch.allclose(reconstructed, original))
+    assert torch.allclose(reconstructed, original)
+
 
 def test_decomposite_weight_matric_with_rank():
     """Test that decompsition of given matrix into 2 low rank matrices work"""
@@ -31,7 +35,6 @@ def test_decomposite_weight_matric_with_rank():
     output_dim = 1024
     rank = 8  # Low rank
 
-
     A = torch.randn(input_dim, rank).double()
     B = torch.randn(rank, output_dim).double()
     W0 = A @ B
@@ -39,8 +42,7 @@ def test_decomposite_weight_matric_with_rank():
     C, D = decomposite_weight_matric_with_rank(W0, rank)
     R = C @ D
 
-    assert(C.shape == A.shape)
-    assert(D.shape == B.shape)
+    assert C.shape == A.shape
+    assert D.shape == B.shape
 
     assert torch.allclose(W0, R)
-
diff --git a/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py b/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py
index 7a9d15083d3..fd08ce8c3bf 100644
--- a/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py
+++ b/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py
@@ -1,1012 +1,1013 @@
 # Sample state dict in the Diffusers FLUX LoRA format.
 # This from Hyper-SD, having extra `norm_out` layer
 # From https://huggingface.co/ByteDance/Hyper-SD/tree/main?show_file_info=Hyper-FLUX.1-dev-16steps-lora.safetensors
-state_dict_keys = { 
-  "transformer.context_embedder.lora_A.weight": [64, 4096],
-  "transformer.context_embedder.lora_B.weight": [3072, 64],
-  "transformer.norm_out.linear.lora_A.weight": [64, 3072],
-  "transformer.norm_out.linear.lora_B.weight": [6144, 64],
-  "transformer.proj_out.lora_A.weight": [64, 3072],
-  "transformer.proj_out.lora_B.weight": [64, 64],
-  "transformer.single_transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.0.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.0.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.0.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.0.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.0.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.0.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.1.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.1.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.1.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.1.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.1.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.1.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.10.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.10.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.10.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.10.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.10.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.10.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.11.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.11.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.11.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.11.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.11.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.11.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.12.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.12.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.12.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.12.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.12.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.12.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.13.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.13.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.13.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.13.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.13.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.13.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.14.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.14.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.14.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.14.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.14.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.14.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.15.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.15.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.15.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.15.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.15.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.15.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.16.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.16.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.16.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.16.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.16.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.16.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.17.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.17.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.17.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.17.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.17.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.17.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.18.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.18.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.18.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.18.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.18.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.18.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.19.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.19.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.19.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.19.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.19.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.19.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.19.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.19.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.19.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.19.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.19.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.19.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.2.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.2.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.2.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.2.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.2.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.2.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.20.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.20.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.20.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.20.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.20.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.20.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.20.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.20.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.20.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.20.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.20.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.20.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.21.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.21.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.21.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.21.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.21.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.21.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.21.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.21.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.21.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.21.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.21.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.21.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.22.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.22.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.22.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.22.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.22.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.22.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.22.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.22.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.22.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.22.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.22.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.22.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.23.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.23.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.23.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.23.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.23.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.23.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.23.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.23.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.23.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.23.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.23.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.23.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.24.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.24.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.24.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.24.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.24.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.24.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.24.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.24.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.24.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.24.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.24.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.24.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.25.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.25.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.25.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.25.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.25.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.25.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.25.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.25.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.25.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.25.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.25.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.25.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.26.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.26.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.26.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.26.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.26.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.26.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.26.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.26.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.26.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.26.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.26.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.26.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.27.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.27.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.27.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.27.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.27.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.27.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.27.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.27.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.27.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.27.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.27.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.27.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.28.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.28.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.28.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.28.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.28.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.28.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.28.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.28.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.28.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.28.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.28.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.28.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.29.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.29.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.29.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.29.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.29.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.29.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.29.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.29.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.29.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.29.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.29.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.29.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.3.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.3.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.3.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.3.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.3.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.3.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.30.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.30.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.30.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.30.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.30.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.30.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.30.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.30.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.30.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.30.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.30.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.30.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.31.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.31.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.31.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.31.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.31.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.31.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.31.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.31.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.31.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.31.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.31.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.31.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.32.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.32.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.32.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.32.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.32.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.32.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.32.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.32.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.32.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.32.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.32.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.32.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.33.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.33.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.33.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.33.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.33.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.33.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.33.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.33.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.33.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.33.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.33.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.33.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.34.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.34.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.34.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.34.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.34.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.34.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.34.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.34.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.34.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.34.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.34.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.34.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.35.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.35.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.35.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.35.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.35.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.35.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.35.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.35.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.35.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.35.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.35.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.35.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.36.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.36.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.36.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.36.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.36.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.36.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.36.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.36.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.36.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.36.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.36.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.36.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.37.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.37.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.37.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.37.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.37.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.37.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.37.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.37.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.37.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.37.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.37.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.37.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.4.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.4.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.4.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.4.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.4.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.4.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.5.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.5.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.5.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.5.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.5.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.5.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.6.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.6.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.6.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.6.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.6.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.6.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.7.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.7.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.7.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.7.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.7.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.7.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.8.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.8.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.8.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.8.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.8.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.8.proj_out.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.single_transformer_blocks.9.norm.linear.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.9.norm.linear.lora_B.weight": [9216, 64],
-  "transformer.single_transformer_blocks.9.proj_mlp.lora_A.weight": [64, 3072],
-  "transformer.single_transformer_blocks.9.proj_mlp.lora_B.weight": [12288, 64],
-  "transformer.single_transformer_blocks.9.proj_out.lora_A.weight": [64, 15360],
-  "transformer.single_transformer_blocks.9.proj_out.lora_B.weight": [3072, 64],
-  "transformer.time_text_embed.guidance_embedder.linear_1.lora_A.weight": [64, 256],
-  "transformer.time_text_embed.guidance_embedder.linear_1.lora_B.weight": [3072, 64],
-  "transformer.time_text_embed.guidance_embedder.linear_2.lora_A.weight": [64, 3072],
-  "transformer.time_text_embed.guidance_embedder.linear_2.lora_B.weight": [3072, 64],
-  "transformer.time_text_embed.text_embedder.linear_1.lora_A.weight": [64, 768],
-  "transformer.time_text_embed.text_embedder.linear_1.lora_B.weight": [3072, 64],
-  "transformer.time_text_embed.text_embedder.linear_2.lora_A.weight": [64, 3072],
-  "transformer.time_text_embed.text_embedder.linear_2.lora_B.weight": [3072, 64],
-  "transformer.time_text_embed.timestep_embedder.linear_1.lora_A.weight": [64, 256],
-  "transformer.time_text_embed.timestep_embedder.linear_1.lora_B.weight": [3072, 64],
-  "transformer.time_text_embed.timestep_embedder.linear_2.lora_A.weight": [64, 3072],
-  "transformer.time_text_embed.timestep_embedder.linear_2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.0.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.0.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.0.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.0.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.0.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.0.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.0.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.0.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.0.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.0.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.0.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.0.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.0.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.0.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.0.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.0.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.0.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.0.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.0.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.0.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.1.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.1.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.1.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.1.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.1.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.1.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.1.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.1.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.1.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.1.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.1.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.1.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.1.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.1.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.1.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.1.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.1.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.1.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.1.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.1.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.10.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.10.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.10.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.10.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.10.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.10.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.10.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.10.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.10.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.10.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.10.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.10.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.10.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.10.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.10.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.10.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.10.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.10.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.10.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.10.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.11.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.11.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.11.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.11.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.11.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.11.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.11.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.11.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.11.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.11.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.11.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.11.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.11.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.11.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.11.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.11.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.11.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.11.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.11.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.11.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.12.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.12.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.12.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.12.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.12.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.12.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.12.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.12.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.12.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.12.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.12.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.12.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.12.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.12.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.12.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.12.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.12.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.12.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.12.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.12.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.13.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.13.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.13.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.13.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.13.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.13.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.13.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.13.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.13.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.13.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.13.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.13.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.13.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.13.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.13.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.13.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.13.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.13.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.13.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.13.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.14.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.14.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.14.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.14.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.14.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.14.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.14.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.14.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.14.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.14.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.14.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.14.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.14.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.14.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.14.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.14.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.14.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.14.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.14.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.14.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.15.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.15.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.15.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.15.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.15.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.15.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.15.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.15.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.15.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.15.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.15.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.15.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.15.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.15.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.15.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.15.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.15.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.15.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.15.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.15.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.16.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.16.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.16.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.16.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.16.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.16.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.16.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.16.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.16.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.16.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.16.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.16.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.16.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.16.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.16.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.16.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.16.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.16.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.16.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.16.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.17.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.17.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.17.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.17.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.17.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.17.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.17.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.17.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.17.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.17.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.17.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.17.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.17.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.17.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.17.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.17.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.17.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.17.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.17.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.17.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.18.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.18.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.18.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.18.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.18.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.18.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.18.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.18.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.18.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.18.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.18.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.18.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.18.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.18.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.18.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.18.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.18.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.18.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.18.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.18.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.2.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.2.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.2.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.2.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.2.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.2.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.2.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.2.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.2.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.2.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.2.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.2.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.2.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.2.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.2.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.2.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.2.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.2.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.2.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.2.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.3.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.3.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.3.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.3.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.3.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.3.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.3.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.3.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.3.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.3.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.3.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.3.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.3.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.3.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.3.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.3.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.3.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.3.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.3.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.3.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.4.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.4.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.4.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.4.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.4.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.4.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.4.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.4.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.4.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.4.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.4.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.4.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.4.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.4.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.4.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.4.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.4.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.4.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.4.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.4.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.5.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.5.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.5.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.5.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.5.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.5.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.5.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.5.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.5.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.5.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.5.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.5.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.5.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.5.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.5.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.5.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.5.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.5.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.5.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.5.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.6.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.6.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.6.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.6.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.6.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.6.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.6.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.6.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.6.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.6.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.6.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.6.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.6.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.6.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.6.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.6.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.6.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.6.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.6.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.6.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.7.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.7.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.7.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.7.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.7.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.7.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.7.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.7.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.7.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.7.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.7.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.7.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.7.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.7.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.7.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.7.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.7.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.7.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.7.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.7.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.8.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.8.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.8.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.8.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.8.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.8.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.8.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.8.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.8.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.8.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.8.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.8.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.8.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.8.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.8.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.8.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.8.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.8.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.8.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.8.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.9.attn.add_k_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.9.attn.add_k_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.9.attn.add_q_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.9.attn.add_q_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.9.attn.add_v_proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.9.attn.add_v_proj.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.9.attn.to_add_out.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.9.attn.to_add_out.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.9.attn.to_out.0.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.9.attn.to_out.0.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.9.ff.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.9.ff.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.9.ff.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.9.ff.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_B.weight": [12288, 64],
-  "transformer.transformer_blocks.9.ff_context.net.2.lora_A.weight": [64, 12288],
-  "transformer.transformer_blocks.9.ff_context.net.2.lora_B.weight": [3072, 64],
-  "transformer.transformer_blocks.9.norm1.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.9.norm1.linear.lora_B.weight": [18432, 64],
-  "transformer.transformer_blocks.9.norm1_context.linear.lora_A.weight": [64, 3072],
-  "transformer.transformer_blocks.9.norm1_context.linear.lora_B.weight": [18432, 64],
-  "transformer.x_embedder.lora_A.weight": [64, 64],
-  "transformer.x_embedder.lora_B.weight": [3072, 64] }
\ No newline at end of file
+state_dict_keys = {
+    "transformer.context_embedder.lora_A.weight": [64, 4096],
+    "transformer.context_embedder.lora_B.weight": [3072, 64],
+    "transformer.norm_out.linear.lora_A.weight": [64, 3072],
+    "transformer.norm_out.linear.lora_B.weight": [6144, 64],
+    "transformer.proj_out.lora_A.weight": [64, 3072],
+    "transformer.proj_out.lora_B.weight": [64, 64],
+    "transformer.single_transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.0.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.0.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.0.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.0.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.0.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.0.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.1.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.1.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.1.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.1.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.1.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.1.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.10.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.10.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.10.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.10.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.10.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.10.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.11.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.11.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.11.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.11.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.11.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.11.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.12.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.12.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.12.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.12.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.12.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.12.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.13.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.13.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.13.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.13.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.13.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.13.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.14.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.14.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.14.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.14.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.14.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.14.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.15.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.15.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.15.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.15.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.15.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.15.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.16.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.16.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.16.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.16.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.16.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.16.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.17.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.17.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.17.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.17.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.17.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.17.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.18.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.18.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.18.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.18.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.18.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.18.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.19.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.19.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.19.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.19.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.19.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.19.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.19.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.19.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.19.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.19.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.19.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.19.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.2.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.2.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.2.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.2.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.2.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.2.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.20.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.20.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.20.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.20.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.20.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.20.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.20.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.20.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.20.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.20.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.20.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.20.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.21.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.21.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.21.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.21.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.21.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.21.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.21.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.21.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.21.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.21.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.21.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.21.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.22.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.22.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.22.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.22.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.22.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.22.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.22.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.22.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.22.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.22.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.22.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.22.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.23.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.23.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.23.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.23.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.23.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.23.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.23.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.23.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.23.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.23.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.23.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.23.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.24.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.24.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.24.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.24.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.24.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.24.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.24.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.24.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.24.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.24.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.24.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.24.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.25.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.25.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.25.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.25.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.25.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.25.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.25.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.25.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.25.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.25.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.25.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.25.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.26.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.26.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.26.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.26.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.26.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.26.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.26.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.26.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.26.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.26.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.26.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.26.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.27.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.27.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.27.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.27.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.27.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.27.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.27.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.27.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.27.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.27.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.27.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.27.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.28.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.28.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.28.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.28.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.28.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.28.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.28.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.28.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.28.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.28.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.28.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.28.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.29.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.29.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.29.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.29.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.29.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.29.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.29.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.29.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.29.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.29.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.29.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.29.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.3.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.3.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.3.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.3.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.3.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.3.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.30.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.30.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.30.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.30.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.30.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.30.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.30.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.30.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.30.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.30.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.30.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.30.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.31.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.31.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.31.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.31.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.31.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.31.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.31.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.31.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.31.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.31.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.31.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.31.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.32.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.32.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.32.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.32.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.32.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.32.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.32.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.32.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.32.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.32.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.32.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.32.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.33.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.33.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.33.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.33.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.33.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.33.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.33.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.33.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.33.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.33.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.33.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.33.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.34.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.34.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.34.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.34.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.34.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.34.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.34.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.34.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.34.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.34.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.34.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.34.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.35.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.35.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.35.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.35.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.35.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.35.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.35.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.35.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.35.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.35.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.35.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.35.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.36.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.36.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.36.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.36.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.36.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.36.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.36.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.36.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.36.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.36.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.36.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.36.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.37.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.37.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.37.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.37.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.37.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.37.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.37.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.37.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.37.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.37.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.37.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.37.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.4.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.4.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.4.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.4.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.4.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.4.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.5.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.5.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.5.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.5.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.5.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.5.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.6.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.6.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.6.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.6.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.6.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.6.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.7.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.7.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.7.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.7.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.7.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.7.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.8.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.8.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.8.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.8.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.8.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.8.proj_out.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.single_transformer_blocks.9.norm.linear.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.9.norm.linear.lora_B.weight": [9216, 64],
+    "transformer.single_transformer_blocks.9.proj_mlp.lora_A.weight": [64, 3072],
+    "transformer.single_transformer_blocks.9.proj_mlp.lora_B.weight": [12288, 64],
+    "transformer.single_transformer_blocks.9.proj_out.lora_A.weight": [64, 15360],
+    "transformer.single_transformer_blocks.9.proj_out.lora_B.weight": [3072, 64],
+    "transformer.time_text_embed.guidance_embedder.linear_1.lora_A.weight": [64, 256],
+    "transformer.time_text_embed.guidance_embedder.linear_1.lora_B.weight": [3072, 64],
+    "transformer.time_text_embed.guidance_embedder.linear_2.lora_A.weight": [64, 3072],
+    "transformer.time_text_embed.guidance_embedder.linear_2.lora_B.weight": [3072, 64],
+    "transformer.time_text_embed.text_embedder.linear_1.lora_A.weight": [64, 768],
+    "transformer.time_text_embed.text_embedder.linear_1.lora_B.weight": [3072, 64],
+    "transformer.time_text_embed.text_embedder.linear_2.lora_A.weight": [64, 3072],
+    "transformer.time_text_embed.text_embedder.linear_2.lora_B.weight": [3072, 64],
+    "transformer.time_text_embed.timestep_embedder.linear_1.lora_A.weight": [64, 256],
+    "transformer.time_text_embed.timestep_embedder.linear_1.lora_B.weight": [3072, 64],
+    "transformer.time_text_embed.timestep_embedder.linear_2.lora_A.weight": [64, 3072],
+    "transformer.time_text_embed.timestep_embedder.linear_2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.0.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.0.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.0.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.0.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.0.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.0.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.0.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.0.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.0.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.0.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.0.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.0.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.0.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.0.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.0.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.0.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.0.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.0.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.0.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.0.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.1.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.1.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.1.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.1.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.1.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.1.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.1.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.1.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.1.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.1.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.1.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.1.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.1.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.1.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.1.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.1.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.1.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.1.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.1.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.1.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.10.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.10.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.10.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.10.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.10.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.10.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.10.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.10.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.10.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.10.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.10.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.10.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.10.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.10.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.10.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.10.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.10.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.10.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.10.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.10.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.11.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.11.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.11.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.11.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.11.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.11.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.11.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.11.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.11.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.11.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.11.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.11.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.11.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.11.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.11.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.11.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.11.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.11.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.11.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.11.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.12.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.12.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.12.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.12.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.12.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.12.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.12.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.12.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.12.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.12.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.12.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.12.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.12.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.12.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.12.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.12.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.12.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.12.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.12.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.12.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.13.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.13.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.13.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.13.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.13.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.13.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.13.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.13.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.13.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.13.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.13.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.13.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.13.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.13.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.13.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.13.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.13.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.13.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.13.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.13.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.14.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.14.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.14.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.14.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.14.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.14.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.14.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.14.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.14.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.14.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.14.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.14.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.14.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.14.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.14.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.14.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.14.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.14.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.14.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.14.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.15.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.15.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.15.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.15.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.15.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.15.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.15.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.15.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.15.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.15.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.15.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.15.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.15.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.15.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.15.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.15.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.15.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.15.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.15.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.15.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.16.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.16.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.16.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.16.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.16.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.16.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.16.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.16.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.16.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.16.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.16.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.16.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.16.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.16.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.16.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.16.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.16.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.16.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.16.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.16.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.17.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.17.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.17.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.17.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.17.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.17.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.17.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.17.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.17.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.17.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.17.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.17.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.17.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.17.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.17.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.17.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.17.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.17.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.17.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.17.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.18.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.18.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.18.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.18.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.18.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.18.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.18.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.18.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.18.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.18.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.18.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.18.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.18.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.18.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.18.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.18.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.18.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.18.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.18.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.18.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.2.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.2.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.2.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.2.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.2.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.2.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.2.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.2.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.2.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.2.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.2.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.2.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.2.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.2.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.2.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.2.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.2.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.2.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.2.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.2.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.3.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.3.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.3.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.3.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.3.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.3.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.3.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.3.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.3.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.3.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.3.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.3.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.3.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.3.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.3.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.3.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.3.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.3.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.3.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.3.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.4.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.4.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.4.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.4.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.4.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.4.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.4.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.4.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.4.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.4.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.4.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.4.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.4.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.4.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.4.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.4.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.4.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.4.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.4.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.4.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.5.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.5.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.5.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.5.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.5.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.5.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.5.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.5.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.5.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.5.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.5.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.5.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.5.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.5.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.5.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.5.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.5.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.5.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.5.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.5.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.6.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.6.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.6.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.6.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.6.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.6.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.6.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.6.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.6.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.6.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.6.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.6.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.6.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.6.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.6.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.6.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.6.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.6.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.6.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.6.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.7.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.7.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.7.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.7.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.7.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.7.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.7.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.7.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.7.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.7.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.7.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.7.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.7.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.7.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.7.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.7.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.7.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.7.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.7.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.7.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.8.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.8.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.8.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.8.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.8.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.8.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.8.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.8.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.8.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.8.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.8.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.8.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.8.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.8.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.8.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.8.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.8.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.8.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.8.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.8.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.9.attn.add_k_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.9.attn.add_k_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.9.attn.add_q_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.9.attn.add_q_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.9.attn.add_v_proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.9.attn.add_v_proj.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.9.attn.to_add_out.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.9.attn.to_add_out.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.9.attn.to_out.0.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.9.attn.to_out.0.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.9.ff.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.9.ff.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.9.ff.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.9.ff.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_B.weight": [12288, 64],
+    "transformer.transformer_blocks.9.ff_context.net.2.lora_A.weight": [64, 12288],
+    "transformer.transformer_blocks.9.ff_context.net.2.lora_B.weight": [3072, 64],
+    "transformer.transformer_blocks.9.norm1.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.9.norm1.linear.lora_B.weight": [18432, 64],
+    "transformer.transformer_blocks.9.norm1_context.linear.lora_A.weight": [64, 3072],
+    "transformer.transformer_blocks.9.norm1_context.linear.lora_B.weight": [18432, 64],
+    "transformer.x_embedder.lora_A.weight": [64, 64],
+    "transformer.x_embedder.lora_B.weight": [3072, 64],
+}
diff --git a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
index c7dfaff8812..2d30bd8a678 100644
--- a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
+++ b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py
@@ -1,13 +1,13 @@
-import pytest
-import torch
 from unittest import mock
 
+import pytest
+import torch
 
 from invokeai.backend.patches.layers.utils import swap_shift_scale_for_linear_weight
 from invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils import (
+    approximate_flux_adaLN_lora_layer_from_diffusers_state_dict,
     is_state_dict_likely_in_flux_diffusers_format,
     lora_model_from_flux_diffusers_state_dict,
-    approximate_flux_adaLN_lora_layer_from_diffusers_state_dict,
 )
 from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
 from tests.backend.patches.lora_conversions.lora_state_dicts.flux_dora_onetrainer_format import (
@@ -28,7 +28,14 @@
 from tests.backend.patches.lora_conversions.lora_state_dicts.utils import keys_to_mock_state_dict
 
 
-@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys, flux_diffusers_with_norm_out_state_dict_keys])
+@pytest.mark.parametrize(
+    "sd_keys",
+    [
+        flux_diffusers_state_dict_keys,
+        flux_diffusers_no_proj_mlp_state_dict_keys,
+        flux_diffusers_with_norm_out_state_dict_keys,
+    ],
+)
 def test_is_state_dict_likely_in_flux_diffusers_format_true(sd_keys: dict[str, list[int]]):
     """Test that is_state_dict_likely_in_flux_diffusers_format() can identify a state dict in the Diffusers FLUX LoRA format."""
     # Construct a state dict that is in the Diffusers FLUX LoRA format.
@@ -48,7 +55,14 @@ def test_is_state_dict_likely_in_flux_diffusers_format_false(sd_keys: dict[str,
     assert not is_state_dict_likely_in_flux_diffusers_format(state_dict)
 
 
-@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys, flux_diffusers_with_norm_out_state_dict_keys])
+@pytest.mark.parametrize(
+    "sd_keys",
+    [
+        flux_diffusers_state_dict_keys,
+        flux_diffusers_no_proj_mlp_state_dict_keys,
+        flux_diffusers_with_norm_out_state_dict_keys,
+    ],
+)
 def test_lora_model_from_flux_diffusers_state_dict(sd_keys: dict[str, list[int]]):
     """Test that lora_model_from_flux_diffusers_state_dict() can load a state dict in the Diffusers FLUX LoRA format."""
     # Construct a state dict that is in the Diffusers FLUX LoRA format.
@@ -84,12 +98,19 @@ def test_lora_model_from_flux_diffusers_state_dict_extra_keys_error():
         lora_model_from_flux_diffusers_state_dict(state_dict, alpha=8.0)
 
 
-@pytest.mark.parametrize("layer_sd_keys",[
-    {}, # no keys
-    {'lora_A.weight': [1024, 8], 'lora_B.weight': [8, 512]}, # wrong keys
-    {'lora_up.weight': [1024, 8],}, # missing key
-    {'lora_down.weight': [8, 512],}, # missing key
-])
+@pytest.mark.parametrize(
+    "layer_sd_keys",
+    [
+        {},  # no keys
+        {"lora_A.weight": [1024, 8], "lora_B.weight": [8, 512]},  # wrong keys
+        {
+            "lora_up.weight": [1024, 8],
+        },  # missing key
+        {
+            "lora_down.weight": [8, 512],
+        },  # missing key
+    ],
+)
 def test_approximate_adaLN_from_state_dict_should_only_accept_vanilla_LoRA_format(layer_sd_keys: dict[str, list[int]]):
     """Should only accept the valid state dict"""
     layer_state_dict = keys_to_mock_state_dict(layer_sd_keys)
@@ -98,10 +119,13 @@ def test_approximate_adaLN_from_state_dict_should_only_accept_vanilla_LoRA_forma
         approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(layer_state_dict)
 
 
-@pytest.mark.parametrize("dtype, rtol", [
-   (torch.float32, 1e-4),
-   (torch.half, 1e-3),
-])
+@pytest.mark.parametrize(
+    "dtype, rtol",
+    [
+        (torch.float32, 1e-4),
+        (torch.half, 1e-3),
+    ],
+)
 def test_approximate_adaLN_from_state_dict_should_work(dtype: torch.dtype, rtol: float, rate: float = 0.99):
     """Test that we should approximate good enough adaLN layer from diffusers state dict.
     This should tolorance some kind of errorness respect to input dtype"""
@@ -113,10 +137,7 @@ def test_approximate_adaLN_from_state_dict_should_work(dtype: torch.dtype, rtol:
     up = torch.randn(input_dim, rank, dtype=dtype)
     down = torch.randn(rank, output_dim, dtype=dtype)
 
-    layer_state_dict = {
-        'lora_up.weight': up,
-        'lora_down.weight': down
-    }
+    layer_state_dict = {"lora_up.weight": up, "lora_down.weight": down}
 
     # XXX Layer patcher cast things to f32
     original = up.float() @ down.float()
@@ -132,15 +153,16 @@ def test_approximate_adaLN_from_state_dict_should_work(dtype: torch.dtype, rtol:
 
     assert close_rate > rate
 
+
 def test_adaLN_should_be_approximated_if_present_while_converting():
     """AdaLN layer should be approximated if existed inside given model"""
     state_dict = keys_to_mock_state_dict(flux_diffusers_with_norm_out_state_dict_keys)
 
-    adaLN_layer_key = 'final_layer.adaLN_modulation.1'
+    adaLN_layer_key = "final_layer.adaLN_modulation.1"
     prefixed_layer_key = FLUX_LORA_TRANSFORMER_PREFIX + adaLN_layer_key
 
     with mock.patch(
-        'invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils.approximate_flux_adaLN_lora_layer_from_diffusers_state_dict'
+        "invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils.approximate_flux_adaLN_lora_layer_from_diffusers_state_dict"
     ) as mock_approximate_func:
         model = lora_model_from_flux_diffusers_state_dict(state_dict, alpha=8.0)