From add3cca09e64774338134a053455ef41253694d1 Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Fri, 28 Feb 2025 14:47:53 +0700 Subject: [PATCH 01/14] fix: correct guidance_in layer keys in lora conversion --- .../lora_conversions/flux_diffusers_lora_conversion_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py index 6a36db7b592..d6c8b4be683 100644 --- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py +++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py @@ -124,8 +124,8 @@ def add_qkv_lora_layer_if_present( add_lora_layer_if_present("time_text_embed.text_embedder.linear_2", "vector_in.out_layer") # time_text_embed.guidance_embedder -> guidance_in. - add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_1", "guidance_in") - add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_2", "guidance_in") + add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_1", "guidance_in.in_layer") + add_lora_layer_if_present("time_text_embed.guidance_embedder.linear_2", "guidance_in.out_layer") # context_embedder -> txt_in. add_lora_layer_if_present("context_embedder", "txt_in") From 6efad432af6df2076616ca8ab4df839494daa4f2 Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Fri, 28 Feb 2025 14:49:05 +0700 Subject: [PATCH 02/14] feat: add missing adaLN layer in lora conversion --- .../flux_diffusers_lora_conversion_utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py index d6c8b4be683..e5d31d26b9d 100644 --- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py +++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py @@ -82,6 +82,19 @@ def add_lora_layer_if_present(src_key: str, dst_key: str) -> None: values = get_lora_layer_values(src_layer_dict) layers[dst_key] = any_lora_layer_from_state_dict(values) + def add_lora_adaLN_layer_if_present(src_key: str, dst_key: str) -> None: + if src_key in grouped_state_dict: + src_layer_dict = grouped_state_dict.pop(src_key) + values = get_lora_layer_values(src_layer_dict) + + for _key in values.keys(): + # in SD3 original implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale; + # while in diffusers it split into scale, shift. Here we swap the linear projection weights in order to be able to use diffusers implementation + scale, shift = values[_key].chunk(2, dim=0) + values[_key] = torch.cat([shift, scale], dim=0) + + layers[dst_key] = any_lora_layer_from_state_dict(values) + def add_qkv_lora_layer_if_present( src_keys: list[str], src_weight_shapes: list[tuple[int, int]], @@ -223,6 +236,10 @@ def add_qkv_lora_layer_if_present( # Final layer. add_lora_layer_if_present("proj_out", "final_layer.linear") + add_lora_adaLN_layer_if_present( + 'norm_out.linear', + 'final_layer.adaLN_modulation.1', + ) # Assert that all keys were processed. assert len(grouped_state_dict) == 0 From 0c0637f90e41fd4cd41b05a0c8036f346d79d20b Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Tue, 4 Mar 2025 09:23:02 +0700 Subject: [PATCH 03/14] chore: update util function name as convention --- .../lora_conversions/flux_diffusers_lora_conversion_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py index e5d31d26b9d..1417489e8a4 100644 --- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py +++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py @@ -82,7 +82,7 @@ def add_lora_layer_if_present(src_key: str, dst_key: str) -> None: values = get_lora_layer_values(src_layer_dict) layers[dst_key] = any_lora_layer_from_state_dict(values) - def add_lora_adaLN_layer_if_present(src_key: str, dst_key: str) -> None: + def add_adaLN_lora_layer_if_present(src_key: str, dst_key: str) -> None: if src_key in grouped_state_dict: src_layer_dict = grouped_state_dict.pop(src_key) values = get_lora_layer_values(src_layer_dict) @@ -236,7 +236,7 @@ def add_qkv_lora_layer_if_present( # Final layer. add_lora_layer_if_present("proj_out", "final_layer.linear") - add_lora_adaLN_layer_if_present( + add_adaLN_lora_layer_if_present( 'norm_out.linear', 'final_layer.adaLN_modulation.1', ) From c12005e41256902733c96fbba0cad886f639615c Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Fri, 7 Mar 2025 18:20:03 +0700 Subject: [PATCH 04/14] feat: add sample lora diffuser keys with norm_out.linear layer to test --- ...lux_lora_diffusers_with_norm_out_format.py | 1012 +++++++++++++++++ ...st_flux_diffusers_lora_conversion_utils.py | 7 +- 2 files changed, 1017 insertions(+), 2 deletions(-) create mode 100644 tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py diff --git a/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py b/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py new file mode 100644 index 00000000000..7a9d15083d3 --- /dev/null +++ b/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py @@ -0,0 +1,1012 @@ +# Sample state dict in the Diffusers FLUX LoRA format. +# This from Hyper-SD, having extra `norm_out` layer +# From https://huggingface.co/ByteDance/Hyper-SD/tree/main?show_file_info=Hyper-FLUX.1-dev-16steps-lora.safetensors +state_dict_keys = { + "transformer.context_embedder.lora_A.weight": [64, 4096], + "transformer.context_embedder.lora_B.weight": [3072, 64], + "transformer.norm_out.linear.lora_A.weight": [64, 3072], + "transformer.norm_out.linear.lora_B.weight": [6144, 64], + "transformer.proj_out.lora_A.weight": [64, 3072], + "transformer.proj_out.lora_B.weight": [64, 64], + "transformer.single_transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.0.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.0.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.0.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.0.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.1.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.1.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.1.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.1.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.10.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.10.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.10.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.10.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.11.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.11.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.11.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.11.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.12.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.12.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.12.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.12.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.13.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.13.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.13.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.13.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.14.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.14.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.14.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.14.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.15.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.15.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.15.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.15.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.16.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.16.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.16.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.16.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.17.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.17.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.17.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.17.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.18.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.18.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.18.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.18.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.19.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.19.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.19.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.19.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.19.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.19.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.19.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.2.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.2.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.2.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.2.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.20.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.20.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.20.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.20.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.20.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.20.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.20.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.21.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.21.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.21.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.21.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.21.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.21.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.21.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.22.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.22.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.22.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.22.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.22.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.22.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.22.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.23.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.23.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.23.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.23.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.23.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.23.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.23.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.24.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.24.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.24.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.24.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.24.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.24.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.24.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.25.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.25.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.25.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.25.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.25.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.25.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.25.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.26.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.26.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.26.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.26.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.26.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.26.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.26.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.27.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.27.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.27.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.27.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.27.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.27.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.27.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.28.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.28.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.28.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.28.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.28.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.28.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.28.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.29.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.29.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.29.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.29.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.29.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.29.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.29.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.3.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.3.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.3.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.3.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.30.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.30.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.30.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.30.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.30.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.30.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.30.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.31.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.31.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.31.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.31.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.31.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.31.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.31.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.32.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.32.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.32.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.32.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.32.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.32.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.32.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.33.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.33.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.33.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.33.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.33.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.33.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.33.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.34.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.34.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.34.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.34.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.34.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.34.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.34.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.35.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.35.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.35.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.35.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.35.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.35.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.35.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.36.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.36.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.36.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.36.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.36.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.36.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.36.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.37.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.37.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.37.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.37.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.37.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.37.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.37.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.4.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.4.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.4.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.4.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.5.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.5.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.5.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.5.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.6.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.6.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.6.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.6.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.7.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.7.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.7.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.7.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.8.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.8.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.8.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.8.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.9.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.9.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.9.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.9.proj_out.lora_B.weight": [3072, 64], + "transformer.time_text_embed.guidance_embedder.linear_1.lora_A.weight": [64, 256], + "transformer.time_text_embed.guidance_embedder.linear_1.lora_B.weight": [3072, 64], + "transformer.time_text_embed.guidance_embedder.linear_2.lora_A.weight": [64, 3072], + "transformer.time_text_embed.guidance_embedder.linear_2.lora_B.weight": [3072, 64], + "transformer.time_text_embed.text_embedder.linear_1.lora_A.weight": [64, 768], + "transformer.time_text_embed.text_embedder.linear_1.lora_B.weight": [3072, 64], + "transformer.time_text_embed.text_embedder.linear_2.lora_A.weight": [64, 3072], + "transformer.time_text_embed.text_embedder.linear_2.lora_B.weight": [3072, 64], + "transformer.time_text_embed.timestep_embedder.linear_1.lora_A.weight": [64, 256], + "transformer.time_text_embed.timestep_embedder.linear_1.lora_B.weight": [3072, 64], + "transformer.time_text_embed.timestep_embedder.linear_2.lora_A.weight": [64, 3072], + "transformer.time_text_embed.timestep_embedder.linear_2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.0.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.0.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.0.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.0.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.0.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.1.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.1.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.1.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.1.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.1.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.1.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.10.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.10.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.10.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.10.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.10.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.10.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.11.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.11.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.11.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.11.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.11.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.11.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.12.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.12.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.12.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.12.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.12.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.12.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.13.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.13.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.13.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.13.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.13.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.13.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.14.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.14.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.14.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.14.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.14.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.14.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.15.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.15.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.15.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.15.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.15.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.15.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.16.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.16.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.16.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.16.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.16.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.16.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.17.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.17.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.17.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.17.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.17.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.17.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.18.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.18.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.18.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.18.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.18.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.18.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.2.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.2.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.2.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.2.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.2.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.2.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.3.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.3.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.3.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.3.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.3.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.3.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.4.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.4.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.4.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.4.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.4.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.4.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.5.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.5.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.5.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.5.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.5.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.5.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.6.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.6.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.6.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.6.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.6.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.6.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.7.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.7.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.7.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.7.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.7.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.7.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.8.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.8.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.8.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.8.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.8.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.8.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.9.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.9.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.9.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.9.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.9.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.9.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.x_embedder.lora_A.weight": [64, 64], + "transformer.x_embedder.lora_B.weight": [3072, 64] } \ No newline at end of file diff --git a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py index 2bdb883faff..3558eb01eee 100644 --- a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py +++ b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py @@ -15,13 +15,16 @@ from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_no_proj_mlp_format import ( state_dict_keys as flux_diffusers_no_proj_mlp_state_dict_keys, ) +from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_diffusers_with_norm_out_format import ( + state_dict_keys as flux_diffusers_with_norm_out_state_dict_keys, +) from tests.backend.patches.lora_conversions.lora_state_dicts.flux_lora_kohya_format import ( state_dict_keys as flux_kohya_state_dict_keys, ) from tests.backend.patches.lora_conversions.lora_state_dicts.utils import keys_to_mock_state_dict -@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys]) +@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys, flux_diffusers_with_norm_out_state_dict_keys]) def test_is_state_dict_likely_in_flux_diffusers_format_true(sd_keys: dict[str, list[int]]): """Test that is_state_dict_likely_in_flux_diffusers_format() can identify a state dict in the Diffusers FLUX LoRA format.""" # Construct a state dict that is in the Diffusers FLUX LoRA format. @@ -41,7 +44,7 @@ def test_is_state_dict_likely_in_flux_diffusers_format_false(sd_keys: dict[str, assert not is_state_dict_likely_in_flux_diffusers_format(state_dict) -@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys]) +@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys, flux_diffusers_with_norm_out_state_dict_keys]) def test_lora_model_from_flux_diffusers_state_dict(sd_keys: dict[str, list[int]]): """Test that lora_model_from_flux_diffusers_state_dict() can load a state dict in the Diffusers FLUX LoRA format.""" # Construct a state dict that is in the Diffusers FLUX LoRA format. From 4f1b6ce6fa2f747c892234d1a99f428838e04159 Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Fri, 7 Mar 2025 19:39:44 +0700 Subject: [PATCH 05/14] feat: add new layer type for diffusers-ada-ln --- .../layers/diffusers_ada_ln_lora_layer.py | 16 ++++++++++++++++ invokeai/backend/patches/layers/utils.py | 8 ++++++++ .../flux_diffusers_lora_conversion_utils.py | 13 +++---------- 3 files changed, 27 insertions(+), 10 deletions(-) create mode 100644 invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py diff --git a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py new file mode 100644 index 00000000000..50ca1f76914 --- /dev/null +++ b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py @@ -0,0 +1,16 @@ +import torch + +from invokeai.backend.patches.layers.lora_layer import LoRALayer + +class DiffusersAdaLN_LoRALayer(LoRALayer): + '''LoRA layer converted from Diffusers AdaLN, weight is shift-scale swapped''' + + def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor: + # In SD3 and Flux implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale; + # while in diffusers it split into scale, shift. + # So we swap the linear projection weights in order to be able to use Flux implementation + + weight = super().get_weight(orig_weight) + scale, shift = weight.chunk(2, dim=0) + + return torch.cat([shift, scale], dim=0) \ No newline at end of file diff --git a/invokeai/backend/patches/layers/utils.py b/invokeai/backend/patches/layers/utils.py index 8141a56644a..86acfe992bb 100644 --- a/invokeai/backend/patches/layers/utils.py +++ b/invokeai/backend/patches/layers/utils.py @@ -10,6 +10,7 @@ from invokeai.backend.patches.layers.lokr_layer import LoKRLayer from invokeai.backend.patches.layers.lora_layer import LoRALayer from invokeai.backend.patches.layers.norm_layer import NormLayer +from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseLayerPatch: @@ -33,3 +34,10 @@ def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseL return NormLayer.from_state_dict_values(state_dict) else: raise ValueError(f"Unsupported lora format: {state_dict.keys()}") + + +def diffusers_adaLN_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> DiffusersAdaLN_LoRALayer: + if not "lora_up.weight" in state_dict: + raise ValueError(f"Unsupported lora format: {state_dict.keys()}") + + return DiffusersAdaLN_LoRALayer.from_state_dict_values(state_dict) \ No newline at end of file diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py index 1417489e8a4..013bd4ba542 100644 --- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py +++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py @@ -4,7 +4,7 @@ from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range -from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict +from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, diffusers_adaLN_lora_layer_from_state_dict from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX from invokeai.backend.patches.model_patch_raw import ModelPatchRaw @@ -86,15 +86,8 @@ def add_adaLN_lora_layer_if_present(src_key: str, dst_key: str) -> None: if src_key in grouped_state_dict: src_layer_dict = grouped_state_dict.pop(src_key) values = get_lora_layer_values(src_layer_dict) - - for _key in values.keys(): - # in SD3 original implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale; - # while in diffusers it split into scale, shift. Here we swap the linear projection weights in order to be able to use diffusers implementation - scale, shift = values[_key].chunk(2, dim=0) - values[_key] = torch.cat([shift, scale], dim=0) - - layers[dst_key] = any_lora_layer_from_state_dict(values) - + layers[dst_key] = diffusers_adaLN_lora_layer_from_state_dict(values) + def add_qkv_lora_layer_if_present( src_keys: list[str], src_weight_shapes: list[tuple[int, int]], From b087694dd7e0a87cc821d6bbe2d1dd036c9ec4b0 Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Sun, 9 Mar 2025 10:28:40 +0700 Subject: [PATCH 06/14] feat: add tests for DiffuserAdaLN layer logic --- .../layers/diffusers_ada_ln_lora_layer.py | 8 ++- .../layers/test_diffuser_ada_ln_lora_layer.py | 55 +++++++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py diff --git a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py index 50ca1f76914..a2d93531293 100644 --- a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py +++ b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py @@ -2,6 +2,10 @@ from invokeai.backend.patches.layers.lora_layer import LoRALayer +def swap_shift_scale(tensor: torch.Tensor) -> torch.Tensor: + scale, shift = tensor.chunk(2, dim=0) + return torch.cat([shift, scale], dim=0) + class DiffusersAdaLN_LoRALayer(LoRALayer): '''LoRA layer converted from Diffusers AdaLN, weight is shift-scale swapped''' @@ -11,6 +15,4 @@ def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor: # So we swap the linear projection weights in order to be able to use Flux implementation weight = super().get_weight(orig_weight) - scale, shift = weight.chunk(2, dim=0) - - return torch.cat([shift, scale], dim=0) \ No newline at end of file + return swap_shift_scale(weight) \ No newline at end of file diff --git a/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py b/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py new file mode 100644 index 00000000000..2c1afa7daa0 --- /dev/null +++ b/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py @@ -0,0 +1,55 @@ +import torch + +from invokeai.backend.patches.layers.lora_layer import LoRALayer +from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer, swap_shift_scale + +def test_swap_shift_scale_for_tensor(): + """Test swaping function""" + tensor = torch.Tensor([1, 2]) + expected = torch.Tensor([2, 1]) + + swapped = swap_shift_scale(tensor) + assert(torch.allclose(expected, swapped)) + + size= (3, 4) + first = torch.randn(size) + second = torch.randn(size) + + tensor = torch.concat([first, second]) + expected = torch.concat([second, first]) + + swapped = swap_shift_scale(tensor) + assert(torch.allclose(expected, swapped)) + +def test_diffusers_adaLN_lora_layer_get_weight(): + """Test getting weight from DiffusersAdaLN_LoRALayer.""" + small_in_features = 4 + big_in_features = 8 + out_features = 16 + rank = 4 + alpha = 16.0 + + lora = LoRALayer( + up=torch.ones(out_features, rank), + mid=None, + down=torch.ones(rank, big_in_features), + alpha=alpha, + bias=None + ) + layer = DiffusersAdaLN_LoRALayer( + up=torch.ones(out_features, rank), + mid=None, + down=torch.ones(rank, big_in_features), + alpha=alpha, + bias=None + ) + + # mock original weight, normally ignored in our loRA + orig_weight = torch.ones(small_in_features) + + diffuser_weight = layer.get_weight(orig_weight) + lora_weight = lora.get_weight(orig_weight) + + # diffusers lora weight should be flipped + assert(torch.allclose(diffuser_weight, swap_shift_scale(lora_weight))) + From 701e9dc6bef18ccdee128e8da9e878a23e0c811b Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Sun, 9 Mar 2025 10:31:34 +0700 Subject: [PATCH 07/14] feat: add adaLN for custom module test --- .../custom_modules/test_all_custom_modules.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py b/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py index c1e77c333bb..8dc240eb687 100644 --- a/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py +++ b/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py @@ -14,6 +14,7 @@ from invokeai.backend.patches.layer_patcher import LayerPatcher from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch from invokeai.backend.patches.layers.flux_control_lora_layer import FluxControlLoRALayer +from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer from invokeai.backend.patches.layers.lokr_layer import LoKRLayer from invokeai.backend.patches.layers.lora_layer import LoRALayer from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range @@ -283,6 +284,7 @@ def test_inference_autocast_from_cpu_to_device(device: str, layer_under_test: La "multiple_loras", "concatenated_lora", "flux_control_lora", + "diffusers_adaLN_lora", "single_lokr", ] ) @@ -370,6 +372,16 @@ def patch_under_test(request: pytest.FixtureRequest) -> PatchUnderTest: ) input = torch.randn(1, in_features) return ([(lokr_layer, 0.7)], input) + elif layer_type == "diffusers_adaLN_lora": + lora_layer = DiffusersAdaLN_LoRALayer( + up=torch.randn(out_features, rank), + mid=None, + down=torch.randn(rank, in_features), + alpha=1.0, + bias=torch.randn(out_features), + ) + input = torch.randn(1, in_features) + return ([(lora_layer, 0.7)], input) else: raise ValueError(f"Unsupported layer_type: {layer_type}") From bb351a625276a18116e74630a4537d2daf13d9a5 Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Mon, 10 Mar 2025 14:41:58 +0700 Subject: [PATCH 08/14] feat: refine swap logic --- .../patches/layers/diffusers_ada_ln_lora_layer.py | 5 +++-- .../layers/test_diffuser_ada_ln_lora_layer.py | 12 +++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py index a2d93531293..46dda7e4dc9 100644 --- a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py +++ b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py @@ -14,5 +14,6 @@ def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor: # while in diffusers it split into scale, shift. # So we swap the linear projection weights in order to be able to use Flux implementation - weight = super().get_weight(orig_weight) - return swap_shift_scale(weight) \ No newline at end of file + weight = super().get_weight(orig_weight) + weight = swap_shift_scale(weight) + return weight \ No newline at end of file diff --git a/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py b/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py index 2c1afa7daa0..d0917299282 100644 --- a/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py +++ b/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py @@ -29,14 +29,14 @@ def test_diffusers_adaLN_lora_layer_get_weight(): rank = 4 alpha = 16.0 - lora = LoRALayer( + normal_layer = LoRALayer( up=torch.ones(out_features, rank), mid=None, down=torch.ones(rank, big_in_features), alpha=alpha, bias=None ) - layer = DiffusersAdaLN_LoRALayer( + diffuser_adaLN_layer = DiffusersAdaLN_LoRALayer( up=torch.ones(out_features, rank), mid=None, down=torch.ones(rank, big_in_features), @@ -44,12 +44,14 @@ def test_diffusers_adaLN_lora_layer_get_weight(): bias=None ) + assert(isinstance(diffuser_adaLN_layer, LoRALayer)) + # mock original weight, normally ignored in our loRA orig_weight = torch.ones(small_in_features) - diffuser_weight = layer.get_weight(orig_weight) - lora_weight = lora.get_weight(orig_weight) + diffuser_weight = diffuser_adaLN_layer.get_weight(orig_weight) + normal_weight = normal_layer.get_weight(orig_weight) # diffusers lora weight should be flipped - assert(torch.allclose(diffuser_weight, swap_shift_scale(lora_weight))) + assert(torch.allclose(diffuser_weight, swap_shift_scale(normal_weight))) From 4af72730ee2aea1f3e5dd2d0321cfa941cc0c922 Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Tue, 11 Mar 2025 11:53:00 +0700 Subject: [PATCH 09/14] feat: approximate adaLN layer for more compatibility --- .../layers/diffusers_ada_ln_lora_layer.py | 19 ----- invokeai/backend/patches/layers/utils.py | 71 +++++++++++++++++-- .../flux_diffusers_lora_conversion_utils.py | 4 +- .../layers/test_diffuser_ada_ln_lora_layer.py | 57 --------------- .../patches/layers/test_layer_utils.py | 46 ++++++++++++ 5 files changed, 114 insertions(+), 83 deletions(-) delete mode 100644 invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py delete mode 100644 tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py create mode 100644 tests/backend/patches/layers/test_layer_utils.py diff --git a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py b/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py deleted file mode 100644 index 46dda7e4dc9..00000000000 --- a/invokeai/backend/patches/layers/diffusers_ada_ln_lora_layer.py +++ /dev/null @@ -1,19 +0,0 @@ -import torch - -from invokeai.backend.patches.layers.lora_layer import LoRALayer - -def swap_shift_scale(tensor: torch.Tensor) -> torch.Tensor: - scale, shift = tensor.chunk(2, dim=0) - return torch.cat([shift, scale], dim=0) - -class DiffusersAdaLN_LoRALayer(LoRALayer): - '''LoRA layer converted from Diffusers AdaLN, weight is shift-scale swapped''' - - def get_weight(self, orig_weight: torch.Tensor) -> torch.Tensor: - # In SD3 and Flux implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale; - # while in diffusers it split into scale, shift. - # So we swap the linear projection weights in order to be able to use Flux implementation - - weight = super().get_weight(orig_weight) - weight = swap_shift_scale(weight) - return weight \ No newline at end of file diff --git a/invokeai/backend/patches/layers/utils.py b/invokeai/backend/patches/layers/utils.py index 86acfe992bb..1f8217fe2b6 100644 --- a/invokeai/backend/patches/layers/utils.py +++ b/invokeai/backend/patches/layers/utils.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Dict, Tuple import torch @@ -10,7 +10,6 @@ from invokeai.backend.patches.layers.lokr_layer import LoKRLayer from invokeai.backend.patches.layers.lora_layer import LoRALayer from invokeai.backend.patches.layers.norm_layer import NormLayer -from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseLayerPatch: @@ -36,8 +35,70 @@ def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseL raise ValueError(f"Unsupported lora format: {state_dict.keys()}") -def diffusers_adaLN_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> DiffusersAdaLN_LoRALayer: + +def swap_shift_scale_for_linear_weight(weight: torch.Tensor) -> torch.Tensor: + """Swap shift/scale for given linear layer back and forth""" + # In SD3 and Flux implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale; + # while in diffusers it split into scale, shift. This will flip them around + chunk1, chunk2 = weight.chunk(2, dim=0) + return torch.cat([chunk2, chunk1], dim=0) + +def decomposite_weight_matric_with_rank( + delta: torch.Tensor, + rank: int, +) -> Tuple[torch.Tensor, torch.Tensor]: + """Decompose given matrix with a specified rank.""" + U, S, V = torch.svd(delta) + + # Truncate to rank r: + U_r = U[:, :rank] + S_r = S[:rank] + V_r = V[:, :rank] + + S_sqrt = torch.sqrt(S_r) + + up = torch.matmul(U_r, torch.diag(S_sqrt)) + down = torch.matmul(torch.diag(S_sqrt), V_r.T) + + return up, down + + +def approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRALayer: + '''Approximate given diffusers AdaLN loRA layer in our Flux model''' + if not "lora_up.weight" in state_dict: - raise ValueError(f"Unsupported lora format: {state_dict.keys()}") + raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_up") - return DiffusersAdaLN_LoRALayer.from_state_dict_values(state_dict) \ No newline at end of file + if not "lora_down.weight" in state_dict: + raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_down") + + up = state_dict.pop('lora_up.weight') + down = state_dict.pop('lora_down.weight') + + dtype = up.dtype + device = up.device + up_shape = up.shape + down_shape = down.shape + + # desired low rank + rank = up_shape[1] + + # up scaling for more precise + up.double() + down.double() + weight = up.reshape(up.shape[0], -1) @ down.reshape(down.shape[0], -1) + + # swap to our linear format + swapped = swap_shift_scale_for_linear_weight(weight) + + _up, _down = decomposite_weight_matric_with_rank(swapped, rank) + + assert(_up.shape == up_shape) + assert(_down.shape == down_shape) + + # down scaling to original dtype, device + state_dict['lora_up.weight'] = _up.to(dtype).to(device=device) + state_dict['lora_down.weight'] = _down.to(dtype).to(device=device) + + return LoRALayer.from_state_dict_values(state_dict) + diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py index 013bd4ba542..152129883ae 100644 --- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py +++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py @@ -4,7 +4,7 @@ from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range -from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, diffusers_adaLN_lora_layer_from_state_dict +from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, approximate_flux_adaLN_lora_layer_from_diffusers_state_dict from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX from invokeai.backend.patches.model_patch_raw import ModelPatchRaw @@ -86,7 +86,7 @@ def add_adaLN_lora_layer_if_present(src_key: str, dst_key: str) -> None: if src_key in grouped_state_dict: src_layer_dict = grouped_state_dict.pop(src_key) values = get_lora_layer_values(src_layer_dict) - layers[dst_key] = diffusers_adaLN_lora_layer_from_state_dict(values) + layers[dst_key] = approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(values) def add_qkv_lora_layer_if_present( src_keys: list[str], diff --git a/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py b/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py deleted file mode 100644 index d0917299282..00000000000 --- a/tests/backend/patches/layers/test_diffuser_ada_ln_lora_layer.py +++ /dev/null @@ -1,57 +0,0 @@ -import torch - -from invokeai.backend.patches.layers.lora_layer import LoRALayer -from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer, swap_shift_scale - -def test_swap_shift_scale_for_tensor(): - """Test swaping function""" - tensor = torch.Tensor([1, 2]) - expected = torch.Tensor([2, 1]) - - swapped = swap_shift_scale(tensor) - assert(torch.allclose(expected, swapped)) - - size= (3, 4) - first = torch.randn(size) - second = torch.randn(size) - - tensor = torch.concat([first, second]) - expected = torch.concat([second, first]) - - swapped = swap_shift_scale(tensor) - assert(torch.allclose(expected, swapped)) - -def test_diffusers_adaLN_lora_layer_get_weight(): - """Test getting weight from DiffusersAdaLN_LoRALayer.""" - small_in_features = 4 - big_in_features = 8 - out_features = 16 - rank = 4 - alpha = 16.0 - - normal_layer = LoRALayer( - up=torch.ones(out_features, rank), - mid=None, - down=torch.ones(rank, big_in_features), - alpha=alpha, - bias=None - ) - diffuser_adaLN_layer = DiffusersAdaLN_LoRALayer( - up=torch.ones(out_features, rank), - mid=None, - down=torch.ones(rank, big_in_features), - alpha=alpha, - bias=None - ) - - assert(isinstance(diffuser_adaLN_layer, LoRALayer)) - - # mock original weight, normally ignored in our loRA - orig_weight = torch.ones(small_in_features) - - diffuser_weight = diffuser_adaLN_layer.get_weight(orig_weight) - normal_weight = normal_layer.get_weight(orig_weight) - - # diffusers lora weight should be flipped - assert(torch.allclose(diffuser_weight, swap_shift_scale(normal_weight))) - diff --git a/tests/backend/patches/layers/test_layer_utils.py b/tests/backend/patches/layers/test_layer_utils.py new file mode 100644 index 00000000000..3deca37b38b --- /dev/null +++ b/tests/backend/patches/layers/test_layer_utils.py @@ -0,0 +1,46 @@ +import torch + +from invokeai.backend.patches.layers.utils import decomposite_weight_matric_with_rank, swap_shift_scale_for_linear_weight + + +def test_swap_shift_scale_for_linear_weight(): + """Test that swaping should work""" + original = torch.Tensor([1, 2]) + expected = torch.Tensor([2, 1]) + + swapped = swap_shift_scale_for_linear_weight(original) + assert(torch.allclose(expected, swapped)) + + size= (3, 4) + first = torch.randn(size) + second = torch.randn(size) + + original = torch.concat([first, second]) + expected = torch.concat([second, first]) + + swapped = swap_shift_scale_for_linear_weight(original) + assert(torch.allclose(expected, swapped)) + + # call this twice will reconstruct the original + reconstructed = swap_shift_scale_for_linear_weight(swapped) + assert(torch.allclose(reconstructed, original)) + +def test_decomposite_weight_matric_with_rank(): + """Test that decompsition of given matrix into 2 low rank matrices work""" + input_dim = 1024 + output_dim = 1024 + rank = 8 # Low rank + + + A = torch.randn(input_dim, rank).double() + B = torch.randn(rank, output_dim).double() + W0 = A @ B + + C, D = decomposite_weight_matric_with_rank(W0, rank) + R = C @ D + + assert(C.shape == A.shape) + assert(D.shape == B.shape) + + assert torch.allclose(W0, R) + From 0cad89d3c8236ef2213695788fa564528250a1ef Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Tue, 11 Mar 2025 14:36:08 +0700 Subject: [PATCH 10/14] feat: refactor conversion module, add test for svd correctness --- invokeai/backend/patches/layers/utils.py | 44 +-------------- .../flux_diffusers_lora_conversion_utils.py | 46 +++++++++++++++- ...st_flux_diffusers_lora_conversion_utils.py | 55 +++++++++++++++++++ 3 files changed, 102 insertions(+), 43 deletions(-) diff --git a/invokeai/backend/patches/layers/utils.py b/invokeai/backend/patches/layers/utils.py index 1f8217fe2b6..2d2b99763b7 100644 --- a/invokeai/backend/patches/layers/utils.py +++ b/invokeai/backend/patches/layers/utils.py @@ -46,6 +46,7 @@ def swap_shift_scale_for_linear_weight(weight: torch.Tensor) -> torch.Tensor: def decomposite_weight_matric_with_rank( delta: torch.Tensor, rank: int, + epsilon: float = 1e-8, ) -> Tuple[torch.Tensor, torch.Tensor]: """Decompose given matrix with a specified rank.""" U, S, V = torch.svd(delta) @@ -55,50 +56,9 @@ def decomposite_weight_matric_with_rank( S_r = S[:rank] V_r = V[:, :rank] - S_sqrt = torch.sqrt(S_r) + S_sqrt = torch.sqrt(S_r + epsilon) # regularization up = torch.matmul(U_r, torch.diag(S_sqrt)) down = torch.matmul(torch.diag(S_sqrt), V_r.T) return up, down - - -def approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRALayer: - '''Approximate given diffusers AdaLN loRA layer in our Flux model''' - - if not "lora_up.weight" in state_dict: - raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_up") - - if not "lora_down.weight" in state_dict: - raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_down") - - up = state_dict.pop('lora_up.weight') - down = state_dict.pop('lora_down.weight') - - dtype = up.dtype - device = up.device - up_shape = up.shape - down_shape = down.shape - - # desired low rank - rank = up_shape[1] - - # up scaling for more precise - up.double() - down.double() - weight = up.reshape(up.shape[0], -1) @ down.reshape(down.shape[0], -1) - - # swap to our linear format - swapped = swap_shift_scale_for_linear_weight(weight) - - _up, _down = decomposite_weight_matric_with_rank(swapped, rank) - - assert(_up.shape == up_shape) - assert(_down.shape == down_shape) - - # down scaling to original dtype, device - state_dict['lora_up.weight'] = _up.to(dtype).to(device=device) - state_dict['lora_down.weight'] = _down.to(dtype).to(device=device) - - return LoRALayer.from_state_dict_values(state_dict) - diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py index 152129883ae..530e9954ee9 100644 --- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py +++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py @@ -2,9 +2,10 @@ import torch +from invokeai.backend.patches.layers.lora_layer import LoRALayer from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range -from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, approximate_flux_adaLN_lora_layer_from_diffusers_state_dict +from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, swap_shift_scale_for_linear_weight, decomposite_weight_matric_with_rank from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX from invokeai.backend.patches.model_patch_raw import ModelPatchRaw @@ -29,6 +30,49 @@ def is_state_dict_likely_in_flux_diffusers_format(state_dict: Dict[str, torch.Te return all_keys_in_peft_format and all_expected_keys_present +def approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRALayer: + '''Approximate given diffusers AdaLN loRA layer in our Flux model''' + + if not "lora_up.weight" in state_dict: + raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_up") + + if not "lora_down.weight" in state_dict: + raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_down") + + up = state_dict.pop('lora_up.weight') + down = state_dict.pop('lora_down.weight') + + # layer-patcher upcast things to f32, + # we want to maintain a better precison for this one + dtype = torch.float32 + + device = up.device + up_shape = up.shape + down_shape = down.shape + + # desired low rank + rank = up_shape[1] + + # up scaling for more precise + up = up.to(torch.float32) + down = down.to(torch.float32) + + weight = up.reshape(up_shape[0], -1) @ down.reshape(down_shape[0], -1) + + # swap to our linear format + swapped = swap_shift_scale_for_linear_weight(weight) + + _up, _down = decomposite_weight_matric_with_rank(swapped, rank) + + assert(_up.shape == up_shape) + assert(_down.shape == down_shape) + + # down scaling to original dtype, device + state_dict['lora_up.weight'] = _up.to(dtype).to(device=device) + state_dict['lora_down.weight'] = _down.to(dtype).to(device=device) + + return LoRALayer.from_state_dict_values(state_dict) + def lora_model_from_flux_diffusers_state_dict( state_dict: Dict[str, torch.Tensor], alpha: float | None diff --git a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py index 3558eb01eee..77e6c4e9055 100644 --- a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py +++ b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py @@ -1,9 +1,12 @@ import pytest import torch + +from invokeai.backend.patches.layers.utils import swap_shift_scale_for_linear_weight from invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils import ( is_state_dict_likely_in_flux_diffusers_format, lora_model_from_flux_diffusers_state_dict, + approximate_flux_adaLN_lora_layer_from_diffusers_state_dict, ) from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX from tests.backend.patches.lora_conversions.lora_state_dicts.flux_dora_onetrainer_format import ( @@ -78,3 +81,55 @@ def test_lora_model_from_flux_diffusers_state_dict_extra_keys_error(): # Check that an error is raised. with pytest.raises(AssertionError): lora_model_from_flux_diffusers_state_dict(state_dict, alpha=8.0) + + +@pytest.mark.parametrize("layer_sd_keys",[ + {}, # no keys + {'lora_A.weight': [1024, 8], 'lora_B.weight': [8, 512]}, # wrong keys + {'lora_up.weight': [1024, 8],}, # missing key + {'lora_down.weight': [8, 512],}, # missing key +]) +def test_approximate_adaLN_from_state_dict_should_only_accept_vanilla_LoRA_format(layer_sd_keys: dict[str, list[int]]): + """Should only accept the valid state dict""" + layer_state_dict = keys_to_mock_state_dict(layer_sd_keys) + + with pytest.raises(ValueError): + approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(layer_state_dict) + + +@pytest.mark.parametrize("dtype, rtol", [ + (torch.float32, 1e-4), + (torch.half, 1e-3), +]) +def test_approximate_adaLN_from_state_dict_should_work(dtype: torch.dtype, rtol: float, rate: float = 0.99): + """Test that we should approximate good enough adaLN layer from diffusers state dict. + This should tolorance some kind of errorness respect to input dtype""" + input_dim = 1024 + output_dim = 512 + rank = 8 # Low rank + total = input_dim * output_dim + + up = torch.randn(input_dim, rank, dtype=dtype) + down = torch.randn(rank, output_dim, dtype=dtype) + + layer_state_dict = { + 'lora_up.weight': up, + 'lora_down.weight': down + } + + # XXX Layer patcher cast things to f32 + original = up.float() @ down.float() + swapped = swap_shift_scale_for_linear_weight(original) + + layer = approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(layer_state_dict) + weight = layer.get_weight(original).float() + + print(weight.dtype, swapped.dtype, layer.up.dtype) + + close_count = torch.isclose(weight, swapped, rtol=rtol).sum().item() + close_rate = close_count / total + + assert close_rate > rate + + + From 26b21aef54676c07e1b51beb484dd2d9b6b5532f Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Tue, 11 Mar 2025 14:55:04 +0700 Subject: [PATCH 11/14] feat: verify function called while converting model --- ...test_flux_diffusers_lora_conversion_utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py index 77e6c4e9055..837c922388b 100644 --- a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py +++ b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py @@ -1,5 +1,7 @@ +import unittest.mock import pytest import torch +import unittest from invokeai.backend.patches.layers.utils import swap_shift_scale_for_linear_weight @@ -131,5 +133,20 @@ def test_approximate_adaLN_from_state_dict_should_work(dtype: torch.dtype, rtol: assert close_rate > rate +def test_adaLN_should_be_approximated_if_present_while_converting(): + """AdaLN layer should be approximated if existed inside given model""" + state_dict = keys_to_mock_state_dict(flux_diffusers_with_norm_out_state_dict_keys) + adaLN_layer_key = 'final_layer.adaLN_modulation.1' + prefixed_layer_key = FLUX_LORA_TRANSFORMER_PREFIX + adaLN_layer_key + with unittest.mock.patch( + 'invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils.approximate_flux_adaLN_lora_layer_from_diffusers_state_dict' + ) as mock_approximate_func: + model = lora_model_from_flux_diffusers_state_dict(state_dict, alpha=8.0) + + # Check that the model has the correct number of LoRA layers. + assert all(k.startswith(FLUX_LORA_TRANSFORMER_PREFIX) for k in model.layers.keys()) + + assert prefixed_layer_key in model.layers.keys() + assert mock_approximate_func.call_count == 1 From 970b2a80140b05735278a18cb78a38d0ce56a0ec Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Tue, 11 Mar 2025 14:58:07 +0700 Subject: [PATCH 12/14] chore: fix import path --- .../test_flux_diffusers_lora_conversion_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py index 837c922388b..c7dfaff8812 100644 --- a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py +++ b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py @@ -1,7 +1,6 @@ -import unittest.mock import pytest import torch -import unittest +from unittest import mock from invokeai.backend.patches.layers.utils import swap_shift_scale_for_linear_weight @@ -140,7 +139,7 @@ def test_adaLN_should_be_approximated_if_present_while_converting(): adaLN_layer_key = 'final_layer.adaLN_modulation.1' prefixed_layer_key = FLUX_LORA_TRANSFORMER_PREFIX + adaLN_layer_key - with unittest.mock.patch( + with mock.patch( 'invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils.approximate_flux_adaLN_lora_layer_from_diffusers_state_dict' ) as mock_approximate_func: model = lora_model_from_flux_diffusers_state_dict(state_dict, alpha=8.0) From aa58da9aa50e5c63623b2cce586a174b07de2ed1 Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Tue, 11 Mar 2025 15:15:17 +0700 Subject: [PATCH 13/14] chore: remove unused test setup --- .../custom_modules/test_all_custom_modules.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py b/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py index 8dc240eb687..c1e77c333bb 100644 --- a/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py +++ b/tests/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/test_all_custom_modules.py @@ -14,7 +14,6 @@ from invokeai.backend.patches.layer_patcher import LayerPatcher from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch from invokeai.backend.patches.layers.flux_control_lora_layer import FluxControlLoRALayer -from invokeai.backend.patches.layers.diffusers_ada_ln_lora_layer import DiffusersAdaLN_LoRALayer from invokeai.backend.patches.layers.lokr_layer import LoKRLayer from invokeai.backend.patches.layers.lora_layer import LoRALayer from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range @@ -284,7 +283,6 @@ def test_inference_autocast_from_cpu_to_device(device: str, layer_under_test: La "multiple_loras", "concatenated_lora", "flux_control_lora", - "diffusers_adaLN_lora", "single_lokr", ] ) @@ -372,16 +370,6 @@ def patch_under_test(request: pytest.FixtureRequest) -> PatchUnderTest: ) input = torch.randn(1, in_features) return ([(lokr_layer, 0.7)], input) - elif layer_type == "diffusers_adaLN_lora": - lora_layer = DiffusersAdaLN_LoRALayer( - up=torch.randn(out_features, rank), - mid=None, - down=torch.randn(rank, in_features), - alpha=1.0, - bias=torch.randn(out_features), - ) - input = torch.randn(1, in_features) - return ([(lora_layer, 0.7)], input) else: raise ValueError(f"Unsupported layer_type: {layer_type}") From a5d23ade5b206ce16a2d720c232c6e41b66bfbd5 Mon Sep 17 00:00:00 2001 From: simpletrontdip Date: Fri, 14 Mar 2025 14:27:35 +0700 Subject: [PATCH 14/14] chore: ruff fix --- invokeai/backend/patches/layers/utils.py | 6 +- .../flux_diffusers_lora_conversion_utils.py | 43 +- .../patches/layers/test_layer_utils.py | 20 +- ...lux_lora_diffusers_with_norm_out_format.py | 2019 +++++++++-------- ...st_flux_diffusers_lora_conversion_utils.py | 64 +- 5 files changed, 1091 insertions(+), 1061 deletions(-) diff --git a/invokeai/backend/patches/layers/utils.py b/invokeai/backend/patches/layers/utils.py index 2d2b99763b7..778884cacc1 100644 --- a/invokeai/backend/patches/layers/utils.py +++ b/invokeai/backend/patches/layers/utils.py @@ -35,14 +35,14 @@ def any_lora_layer_from_state_dict(state_dict: Dict[str, torch.Tensor]) -> BaseL raise ValueError(f"Unsupported lora format: {state_dict.keys()}") - def swap_shift_scale_for_linear_weight(weight: torch.Tensor) -> torch.Tensor: """Swap shift/scale for given linear layer back and forth""" # In SD3 and Flux implementation of AdaLayerNormContinuous, it split linear projection output into shift, scale; # while in diffusers it split into scale, shift. This will flip them around - chunk1, chunk2 = weight.chunk(2, dim=0) + chunk1, chunk2 = weight.chunk(2, dim=0) return torch.cat([chunk2, chunk1], dim=0) + def decomposite_weight_matric_with_rank( delta: torch.Tensor, rank: int, @@ -56,7 +56,7 @@ def decomposite_weight_matric_with_rank( S_r = S[:rank] V_r = V[:, :rank] - S_sqrt = torch.sqrt(S_r + epsilon) # regularization + S_sqrt = torch.sqrt(S_r + epsilon) # regularization up = torch.matmul(U_r, torch.diag(S_sqrt)) down = torch.matmul(torch.diag(S_sqrt), V_r.T) diff --git a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py index 530e9954ee9..71f4cbecdde 100644 --- a/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py +++ b/invokeai/backend/patches/lora_conversions/flux_diffusers_lora_conversion_utils.py @@ -2,10 +2,14 @@ import torch -from invokeai.backend.patches.layers.lora_layer import LoRALayer from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch +from invokeai.backend.patches.layers.lora_layer import LoRALayer from invokeai.backend.patches.layers.merged_layer_patch import MergedLayerPatch, Range -from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict, swap_shift_scale_for_linear_weight, decomposite_weight_matric_with_rank +from invokeai.backend.patches.layers.utils import ( + any_lora_layer_from_state_dict, + decomposite_weight_matric_with_rank, + swap_shift_scale_for_linear_weight, +) from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX from invokeai.backend.patches.model_patch_raw import ModelPatchRaw @@ -30,26 +34,27 @@ def is_state_dict_likely_in_flux_diffusers_format(state_dict: Dict[str, torch.Te return all_keys_in_peft_format and all_expected_keys_present + def approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(state_dict: Dict[str, torch.Tensor]) -> LoRALayer: - '''Approximate given diffusers AdaLN loRA layer in our Flux model''' + """Approximate given diffusers AdaLN loRA layer in our Flux model""" - if not "lora_up.weight" in state_dict: + if "lora_up.weight" not in state_dict: raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_up") - - if not "lora_down.weight" in state_dict: + + if "lora_down.weight" not in state_dict: raise ValueError(f"Unsupported lora format: {state_dict.keys()}, missing lora_down") - - up = state_dict.pop('lora_up.weight') - down = state_dict.pop('lora_down.weight') - # layer-patcher upcast things to f32, + up = state_dict.pop("lora_up.weight") + down = state_dict.pop("lora_down.weight") + + # layer-patcher upcast things to f32, # we want to maintain a better precison for this one dtype = torch.float32 device = up.device up_shape = up.shape down_shape = down.shape - + # desired low rank rank = up_shape[1] @@ -57,19 +62,19 @@ def approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(state_dict: Dict up = up.to(torch.float32) down = down.to(torch.float32) - weight = up.reshape(up_shape[0], -1) @ down.reshape(down_shape[0], -1) + weight = up.reshape(up_shape[0], -1) @ down.reshape(down_shape[0], -1) # swap to our linear format swapped = swap_shift_scale_for_linear_weight(weight) _up, _down = decomposite_weight_matric_with_rank(swapped, rank) - assert(_up.shape == up_shape) - assert(_down.shape == down_shape) + assert _up.shape == up_shape + assert _down.shape == down_shape # down scaling to original dtype, device - state_dict['lora_up.weight'] = _up.to(dtype).to(device=device) - state_dict['lora_down.weight'] = _down.to(dtype).to(device=device) + state_dict["lora_up.weight"] = _up.to(dtype).to(device=device) + state_dict["lora_down.weight"] = _down.to(dtype).to(device=device) return LoRALayer.from_state_dict_values(state_dict) @@ -131,7 +136,7 @@ def add_adaLN_lora_layer_if_present(src_key: str, dst_key: str) -> None: src_layer_dict = grouped_state_dict.pop(src_key) values = get_lora_layer_values(src_layer_dict) layers[dst_key] = approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(values) - + def add_qkv_lora_layer_if_present( src_keys: list[str], src_weight_shapes: list[tuple[int, int]], @@ -274,8 +279,8 @@ def add_qkv_lora_layer_if_present( # Final layer. add_lora_layer_if_present("proj_out", "final_layer.linear") add_adaLN_lora_layer_if_present( - 'norm_out.linear', - 'final_layer.adaLN_modulation.1', + "norm_out.linear", + "final_layer.adaLN_modulation.1", ) # Assert that all keys were processed. diff --git a/tests/backend/patches/layers/test_layer_utils.py b/tests/backend/patches/layers/test_layer_utils.py index 3deca37b38b..2383ec3bacf 100644 --- a/tests/backend/patches/layers/test_layer_utils.py +++ b/tests/backend/patches/layers/test_layer_utils.py @@ -1,6 +1,9 @@ import torch -from invokeai.backend.patches.layers.utils import decomposite_weight_matric_with_rank, swap_shift_scale_for_linear_weight +from invokeai.backend.patches.layers.utils import ( + decomposite_weight_matric_with_rank, + swap_shift_scale_for_linear_weight, +) def test_swap_shift_scale_for_linear_weight(): @@ -9,9 +12,9 @@ def test_swap_shift_scale_for_linear_weight(): expected = torch.Tensor([2, 1]) swapped = swap_shift_scale_for_linear_weight(original) - assert(torch.allclose(expected, swapped)) + assert torch.allclose(expected, swapped) - size= (3, 4) + size = (3, 4) first = torch.randn(size) second = torch.randn(size) @@ -19,11 +22,12 @@ def test_swap_shift_scale_for_linear_weight(): expected = torch.concat([second, first]) swapped = swap_shift_scale_for_linear_weight(original) - assert(torch.allclose(expected, swapped)) + assert torch.allclose(expected, swapped) # call this twice will reconstruct the original reconstructed = swap_shift_scale_for_linear_weight(swapped) - assert(torch.allclose(reconstructed, original)) + assert torch.allclose(reconstructed, original) + def test_decomposite_weight_matric_with_rank(): """Test that decompsition of given matrix into 2 low rank matrices work""" @@ -31,7 +35,6 @@ def test_decomposite_weight_matric_with_rank(): output_dim = 1024 rank = 8 # Low rank - A = torch.randn(input_dim, rank).double() B = torch.randn(rank, output_dim).double() W0 = A @ B @@ -39,8 +42,7 @@ def test_decomposite_weight_matric_with_rank(): C, D = decomposite_weight_matric_with_rank(W0, rank) R = C @ D - assert(C.shape == A.shape) - assert(D.shape == B.shape) + assert C.shape == A.shape + assert D.shape == B.shape assert torch.allclose(W0, R) - diff --git a/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py b/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py index 7a9d15083d3..fd08ce8c3bf 100644 --- a/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py +++ b/tests/backend/patches/lora_conversions/lora_state_dicts/flux_lora_diffusers_with_norm_out_format.py @@ -1,1012 +1,1013 @@ # Sample state dict in the Diffusers FLUX LoRA format. # This from Hyper-SD, having extra `norm_out` layer # From https://huggingface.co/ByteDance/Hyper-SD/tree/main?show_file_info=Hyper-FLUX.1-dev-16steps-lora.safetensors -state_dict_keys = { - "transformer.context_embedder.lora_A.weight": [64, 4096], - "transformer.context_embedder.lora_B.weight": [3072, 64], - "transformer.norm_out.linear.lora_A.weight": [64, 3072], - "transformer.norm_out.linear.lora_B.weight": [6144, 64], - "transformer.proj_out.lora_A.weight": [64, 3072], - "transformer.proj_out.lora_B.weight": [64, 64], - "transformer.single_transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.0.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.0.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.0.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.0.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.0.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.0.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.1.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.1.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.1.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.1.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.1.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.1.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.10.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.10.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.10.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.10.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.10.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.10.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.11.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.11.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.11.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.11.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.11.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.11.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.12.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.12.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.12.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.12.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.12.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.12.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.13.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.13.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.13.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.13.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.13.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.13.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.14.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.14.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.14.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.14.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.14.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.14.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.15.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.15.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.15.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.15.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.15.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.15.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.16.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.16.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.16.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.16.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.16.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.16.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.17.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.17.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.17.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.17.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.17.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.17.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.18.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.18.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.18.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.18.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.18.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.18.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.19.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.19.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.19.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.19.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.19.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.19.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.19.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.19.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.19.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.19.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.19.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.19.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.2.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.2.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.2.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.2.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.2.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.2.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.20.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.20.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.20.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.20.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.20.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.20.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.20.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.20.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.20.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.20.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.20.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.20.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.21.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.21.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.21.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.21.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.21.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.21.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.21.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.21.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.21.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.21.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.21.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.21.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.22.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.22.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.22.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.22.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.22.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.22.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.22.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.22.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.22.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.22.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.22.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.22.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.23.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.23.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.23.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.23.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.23.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.23.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.23.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.23.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.23.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.23.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.23.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.23.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.24.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.24.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.24.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.24.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.24.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.24.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.24.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.24.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.24.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.24.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.24.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.24.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.25.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.25.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.25.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.25.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.25.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.25.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.25.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.25.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.25.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.25.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.25.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.25.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.26.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.26.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.26.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.26.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.26.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.26.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.26.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.26.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.26.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.26.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.26.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.26.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.27.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.27.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.27.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.27.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.27.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.27.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.27.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.27.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.27.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.27.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.27.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.27.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.28.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.28.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.28.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.28.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.28.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.28.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.28.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.28.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.28.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.28.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.28.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.28.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.29.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.29.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.29.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.29.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.29.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.29.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.29.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.29.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.29.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.29.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.29.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.29.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.3.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.3.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.3.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.3.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.3.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.3.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.30.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.30.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.30.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.30.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.30.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.30.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.30.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.30.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.30.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.30.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.30.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.30.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.31.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.31.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.31.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.31.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.31.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.31.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.31.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.31.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.31.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.31.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.31.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.31.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.32.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.32.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.32.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.32.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.32.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.32.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.32.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.32.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.32.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.32.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.32.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.32.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.33.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.33.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.33.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.33.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.33.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.33.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.33.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.33.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.33.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.33.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.33.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.33.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.34.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.34.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.34.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.34.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.34.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.34.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.34.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.34.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.34.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.34.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.34.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.34.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.35.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.35.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.35.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.35.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.35.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.35.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.35.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.35.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.35.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.35.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.35.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.35.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.36.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.36.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.36.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.36.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.36.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.36.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.36.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.36.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.36.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.36.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.36.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.36.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.37.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.37.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.37.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.37.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.37.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.37.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.37.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.37.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.37.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.37.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.37.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.37.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.4.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.4.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.4.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.4.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.4.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.4.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.5.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.5.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.5.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.5.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.5.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.5.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.6.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.6.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.6.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.6.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.6.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.6.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.7.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.7.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.7.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.7.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.7.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.7.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.8.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.8.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.8.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.8.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.8.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.8.proj_out.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64], - "transformer.single_transformer_blocks.9.norm.linear.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.9.norm.linear.lora_B.weight": [9216, 64], - "transformer.single_transformer_blocks.9.proj_mlp.lora_A.weight": [64, 3072], - "transformer.single_transformer_blocks.9.proj_mlp.lora_B.weight": [12288, 64], - "transformer.single_transformer_blocks.9.proj_out.lora_A.weight": [64, 15360], - "transformer.single_transformer_blocks.9.proj_out.lora_B.weight": [3072, 64], - "transformer.time_text_embed.guidance_embedder.linear_1.lora_A.weight": [64, 256], - "transformer.time_text_embed.guidance_embedder.linear_1.lora_B.weight": [3072, 64], - "transformer.time_text_embed.guidance_embedder.linear_2.lora_A.weight": [64, 3072], - "transformer.time_text_embed.guidance_embedder.linear_2.lora_B.weight": [3072, 64], - "transformer.time_text_embed.text_embedder.linear_1.lora_A.weight": [64, 768], - "transformer.time_text_embed.text_embedder.linear_1.lora_B.weight": [3072, 64], - "transformer.time_text_embed.text_embedder.linear_2.lora_A.weight": [64, 3072], - "transformer.time_text_embed.text_embedder.linear_2.lora_B.weight": [3072, 64], - "transformer.time_text_embed.timestep_embedder.linear_1.lora_A.weight": [64, 256], - "transformer.time_text_embed.timestep_embedder.linear_1.lora_B.weight": [3072, 64], - "transformer.time_text_embed.timestep_embedder.linear_2.lora_A.weight": [64, 3072], - "transformer.time_text_embed.timestep_embedder.linear_2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.0.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.0.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.0.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.0.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.0.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.0.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.0.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.0.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.0.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.0.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.0.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.0.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.0.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.0.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.0.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.0.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.0.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.0.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.0.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.0.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.1.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.1.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.1.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.1.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.1.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.1.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.1.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.1.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.1.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.1.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.1.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.1.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.1.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.1.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.1.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.1.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.1.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.1.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.1.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.1.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.10.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.10.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.10.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.10.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.10.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.10.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.10.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.10.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.10.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.10.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.10.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.10.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.10.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.10.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.10.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.10.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.10.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.10.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.10.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.10.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.11.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.11.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.11.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.11.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.11.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.11.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.11.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.11.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.11.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.11.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.11.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.11.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.11.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.11.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.11.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.11.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.11.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.11.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.11.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.11.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.12.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.12.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.12.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.12.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.12.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.12.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.12.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.12.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.12.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.12.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.12.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.12.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.12.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.12.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.12.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.12.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.12.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.12.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.12.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.12.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.13.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.13.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.13.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.13.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.13.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.13.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.13.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.13.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.13.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.13.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.13.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.13.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.13.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.13.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.13.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.13.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.13.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.13.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.13.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.13.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.14.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.14.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.14.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.14.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.14.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.14.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.14.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.14.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.14.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.14.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.14.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.14.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.14.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.14.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.14.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.14.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.14.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.14.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.14.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.14.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.15.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.15.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.15.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.15.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.15.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.15.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.15.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.15.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.15.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.15.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.15.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.15.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.15.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.15.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.15.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.15.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.15.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.15.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.15.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.15.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.16.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.16.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.16.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.16.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.16.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.16.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.16.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.16.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.16.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.16.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.16.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.16.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.16.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.16.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.16.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.16.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.16.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.16.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.16.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.16.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.17.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.17.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.17.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.17.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.17.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.17.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.17.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.17.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.17.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.17.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.17.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.17.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.17.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.17.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.17.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.17.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.17.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.17.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.17.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.17.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.18.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.18.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.18.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.18.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.18.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.18.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.18.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.18.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.18.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.18.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.18.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.18.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.18.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.18.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.18.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.18.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.18.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.18.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.18.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.18.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.2.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.2.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.2.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.2.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.2.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.2.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.2.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.2.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.2.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.2.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.2.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.2.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.2.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.2.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.2.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.2.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.2.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.2.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.2.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.2.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.3.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.3.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.3.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.3.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.3.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.3.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.3.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.3.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.3.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.3.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.3.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.3.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.3.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.3.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.3.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.3.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.3.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.3.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.3.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.3.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.4.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.4.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.4.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.4.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.4.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.4.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.4.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.4.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.4.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.4.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.4.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.4.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.4.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.4.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.4.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.4.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.4.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.4.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.4.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.4.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.5.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.5.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.5.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.5.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.5.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.5.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.5.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.5.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.5.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.5.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.5.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.5.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.5.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.5.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.5.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.5.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.5.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.5.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.5.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.5.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.6.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.6.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.6.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.6.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.6.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.6.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.6.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.6.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.6.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.6.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.6.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.6.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.6.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.6.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.6.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.6.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.6.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.6.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.6.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.6.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.7.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.7.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.7.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.7.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.7.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.7.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.7.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.7.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.7.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.7.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.7.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.7.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.7.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.7.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.7.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.7.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.7.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.7.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.7.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.7.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.8.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.8.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.8.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.8.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.8.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.8.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.8.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.8.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.8.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.8.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.8.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.8.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.8.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.8.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.8.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.8.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.8.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.8.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.8.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.8.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.9.attn.add_k_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.9.attn.add_k_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.9.attn.add_q_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.9.attn.add_q_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.9.attn.add_v_proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.9.attn.add_v_proj.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.9.attn.to_add_out.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.9.attn.to_add_out.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.9.attn.to_out.0.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.9.attn.to_out.0.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.9.ff.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.9.ff.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.9.ff.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.9.ff.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_B.weight": [12288, 64], - "transformer.transformer_blocks.9.ff_context.net.2.lora_A.weight": [64, 12288], - "transformer.transformer_blocks.9.ff_context.net.2.lora_B.weight": [3072, 64], - "transformer.transformer_blocks.9.norm1.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.9.norm1.linear.lora_B.weight": [18432, 64], - "transformer.transformer_blocks.9.norm1_context.linear.lora_A.weight": [64, 3072], - "transformer.transformer_blocks.9.norm1_context.linear.lora_B.weight": [18432, 64], - "transformer.x_embedder.lora_A.weight": [64, 64], - "transformer.x_embedder.lora_B.weight": [3072, 64] } \ No newline at end of file +state_dict_keys = { + "transformer.context_embedder.lora_A.weight": [64, 4096], + "transformer.context_embedder.lora_B.weight": [3072, 64], + "transformer.norm_out.linear.lora_A.weight": [64, 3072], + "transformer.norm_out.linear.lora_B.weight": [6144, 64], + "transformer.proj_out.lora_A.weight": [64, 3072], + "transformer.proj_out.lora_B.weight": [64, 64], + "transformer.single_transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.0.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.0.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.0.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.0.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.0.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.1.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.1.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.1.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.1.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.1.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.10.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.10.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.10.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.10.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.10.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.11.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.11.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.11.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.11.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.11.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.12.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.12.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.12.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.12.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.12.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.13.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.13.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.13.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.13.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.13.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.14.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.14.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.14.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.14.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.14.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.15.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.15.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.15.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.15.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.15.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.16.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.16.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.16.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.16.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.16.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.17.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.17.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.17.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.17.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.17.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.18.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.18.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.18.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.18.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.18.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.19.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.19.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.19.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.19.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.19.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.19.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.19.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.19.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.2.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.2.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.2.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.2.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.2.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.20.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.20.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.20.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.20.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.20.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.20.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.20.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.20.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.21.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.21.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.21.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.21.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.21.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.21.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.21.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.21.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.22.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.22.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.22.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.22.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.22.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.22.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.22.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.22.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.23.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.23.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.23.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.23.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.23.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.23.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.23.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.23.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.24.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.24.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.24.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.24.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.24.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.24.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.24.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.24.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.25.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.25.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.25.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.25.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.25.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.25.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.25.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.25.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.26.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.26.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.26.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.26.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.26.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.26.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.26.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.26.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.27.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.27.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.27.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.27.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.27.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.27.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.27.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.27.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.28.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.28.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.28.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.28.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.28.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.28.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.28.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.28.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.29.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.29.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.29.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.29.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.29.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.29.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.29.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.29.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.3.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.3.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.3.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.3.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.3.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.30.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.30.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.30.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.30.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.30.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.30.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.30.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.30.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.31.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.31.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.31.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.31.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.31.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.31.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.31.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.31.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.32.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.32.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.32.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.32.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.32.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.32.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.32.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.32.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.33.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.33.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.33.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.33.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.33.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.33.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.33.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.33.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.34.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.34.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.34.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.34.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.34.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.34.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.34.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.34.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.35.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.35.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.35.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.35.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.35.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.35.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.35.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.35.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.36.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.36.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.36.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.36.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.36.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.36.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.36.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.36.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.37.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.37.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.37.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.37.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.37.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.37.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.37.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.37.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.4.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.4.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.4.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.4.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.4.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.5.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.5.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.5.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.5.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.5.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.6.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.6.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.6.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.6.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.6.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.7.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.7.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.7.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.7.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.7.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.8.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.8.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.8.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.8.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.8.proj_out.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64], + "transformer.single_transformer_blocks.9.norm.linear.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.norm.linear.lora_B.weight": [9216, 64], + "transformer.single_transformer_blocks.9.proj_mlp.lora_A.weight": [64, 3072], + "transformer.single_transformer_blocks.9.proj_mlp.lora_B.weight": [12288, 64], + "transformer.single_transformer_blocks.9.proj_out.lora_A.weight": [64, 15360], + "transformer.single_transformer_blocks.9.proj_out.lora_B.weight": [3072, 64], + "transformer.time_text_embed.guidance_embedder.linear_1.lora_A.weight": [64, 256], + "transformer.time_text_embed.guidance_embedder.linear_1.lora_B.weight": [3072, 64], + "transformer.time_text_embed.guidance_embedder.linear_2.lora_A.weight": [64, 3072], + "transformer.time_text_embed.guidance_embedder.linear_2.lora_B.weight": [3072, 64], + "transformer.time_text_embed.text_embedder.linear_1.lora_A.weight": [64, 768], + "transformer.time_text_embed.text_embedder.linear_1.lora_B.weight": [3072, 64], + "transformer.time_text_embed.text_embedder.linear_2.lora_A.weight": [64, 3072], + "transformer.time_text_embed.text_embedder.linear_2.lora_B.weight": [3072, 64], + "transformer.time_text_embed.timestep_embedder.linear_1.lora_A.weight": [64, 256], + "transformer.time_text_embed.timestep_embedder.linear_1.lora_B.weight": [3072, 64], + "transformer.time_text_embed.timestep_embedder.linear_2.lora_A.weight": [64, 3072], + "transformer.time_text_embed.timestep_embedder.linear_2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.0.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.0.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.0.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.0.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.0.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.0.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.0.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.1.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.1.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.1.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.1.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.1.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.1.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.1.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.1.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.10.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.10.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.10.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.10.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.10.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.10.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.10.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.10.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.11.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.11.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.11.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.11.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.11.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.11.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.11.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.11.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.12.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.12.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.12.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.12.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.12.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.12.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.12.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.12.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.13.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.13.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.13.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.13.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.13.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.13.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.13.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.13.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.14.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.14.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.14.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.14.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.14.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.14.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.14.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.14.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.15.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.15.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.15.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.15.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.15.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.15.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.15.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.15.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.16.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.16.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.16.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.16.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.16.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.16.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.16.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.16.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.17.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.17.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.17.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.17.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.17.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.17.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.17.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.17.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.18.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.18.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.18.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.18.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.18.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.18.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.18.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.18.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.2.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.2.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.2.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.2.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.2.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.2.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.2.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.2.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.3.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.3.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.3.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.3.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.3.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.3.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.3.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.3.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.4.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.4.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.4.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.4.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.4.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.4.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.4.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.4.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.5.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.5.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.5.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.5.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.5.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.5.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.5.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.5.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.6.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.6.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.6.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.6.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.6.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.6.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.6.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.6.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.7.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.7.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.7.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.7.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.7.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.7.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.7.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.7.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.8.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.8.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.8.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.8.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.8.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.8.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.8.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.8.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.9.attn.add_k_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.add_k_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.add_q_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.add_q_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.add_v_proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.add_v_proj.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_add_out.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_add_out.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_k.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_k.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_out.0.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_out.0.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_q.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_q.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.attn.to_v.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.attn.to_v.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.ff.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.ff.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.9.ff.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.9.ff.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.ff_context.net.0.proj.lora_B.weight": [12288, 64], + "transformer.transformer_blocks.9.ff_context.net.2.lora_A.weight": [64, 12288], + "transformer.transformer_blocks.9.ff_context.net.2.lora_B.weight": [3072, 64], + "transformer.transformer_blocks.9.norm1.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.norm1.linear.lora_B.weight": [18432, 64], + "transformer.transformer_blocks.9.norm1_context.linear.lora_A.weight": [64, 3072], + "transformer.transformer_blocks.9.norm1_context.linear.lora_B.weight": [18432, 64], + "transformer.x_embedder.lora_A.weight": [64, 64], + "transformer.x_embedder.lora_B.weight": [3072, 64], +} diff --git a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py index c7dfaff8812..2d30bd8a678 100644 --- a/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py +++ b/tests/backend/patches/lora_conversions/test_flux_diffusers_lora_conversion_utils.py @@ -1,13 +1,13 @@ -import pytest -import torch from unittest import mock +import pytest +import torch from invokeai.backend.patches.layers.utils import swap_shift_scale_for_linear_weight from invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils import ( + approximate_flux_adaLN_lora_layer_from_diffusers_state_dict, is_state_dict_likely_in_flux_diffusers_format, lora_model_from_flux_diffusers_state_dict, - approximate_flux_adaLN_lora_layer_from_diffusers_state_dict, ) from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX from tests.backend.patches.lora_conversions.lora_state_dicts.flux_dora_onetrainer_format import ( @@ -28,7 +28,14 @@ from tests.backend.patches.lora_conversions.lora_state_dicts.utils import keys_to_mock_state_dict -@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys, flux_diffusers_with_norm_out_state_dict_keys]) +@pytest.mark.parametrize( + "sd_keys", + [ + flux_diffusers_state_dict_keys, + flux_diffusers_no_proj_mlp_state_dict_keys, + flux_diffusers_with_norm_out_state_dict_keys, + ], +) def test_is_state_dict_likely_in_flux_diffusers_format_true(sd_keys: dict[str, list[int]]): """Test that is_state_dict_likely_in_flux_diffusers_format() can identify a state dict in the Diffusers FLUX LoRA format.""" # Construct a state dict that is in the Diffusers FLUX LoRA format. @@ -48,7 +55,14 @@ def test_is_state_dict_likely_in_flux_diffusers_format_false(sd_keys: dict[str, assert not is_state_dict_likely_in_flux_diffusers_format(state_dict) -@pytest.mark.parametrize("sd_keys", [flux_diffusers_state_dict_keys, flux_diffusers_no_proj_mlp_state_dict_keys, flux_diffusers_with_norm_out_state_dict_keys]) +@pytest.mark.parametrize( + "sd_keys", + [ + flux_diffusers_state_dict_keys, + flux_diffusers_no_proj_mlp_state_dict_keys, + flux_diffusers_with_norm_out_state_dict_keys, + ], +) def test_lora_model_from_flux_diffusers_state_dict(sd_keys: dict[str, list[int]]): """Test that lora_model_from_flux_diffusers_state_dict() can load a state dict in the Diffusers FLUX LoRA format.""" # Construct a state dict that is in the Diffusers FLUX LoRA format. @@ -84,12 +98,19 @@ def test_lora_model_from_flux_diffusers_state_dict_extra_keys_error(): lora_model_from_flux_diffusers_state_dict(state_dict, alpha=8.0) -@pytest.mark.parametrize("layer_sd_keys",[ - {}, # no keys - {'lora_A.weight': [1024, 8], 'lora_B.weight': [8, 512]}, # wrong keys - {'lora_up.weight': [1024, 8],}, # missing key - {'lora_down.weight': [8, 512],}, # missing key -]) +@pytest.mark.parametrize( + "layer_sd_keys", + [ + {}, # no keys + {"lora_A.weight": [1024, 8], "lora_B.weight": [8, 512]}, # wrong keys + { + "lora_up.weight": [1024, 8], + }, # missing key + { + "lora_down.weight": [8, 512], + }, # missing key + ], +) def test_approximate_adaLN_from_state_dict_should_only_accept_vanilla_LoRA_format(layer_sd_keys: dict[str, list[int]]): """Should only accept the valid state dict""" layer_state_dict = keys_to_mock_state_dict(layer_sd_keys) @@ -98,10 +119,13 @@ def test_approximate_adaLN_from_state_dict_should_only_accept_vanilla_LoRA_forma approximate_flux_adaLN_lora_layer_from_diffusers_state_dict(layer_state_dict) -@pytest.mark.parametrize("dtype, rtol", [ - (torch.float32, 1e-4), - (torch.half, 1e-3), -]) +@pytest.mark.parametrize( + "dtype, rtol", + [ + (torch.float32, 1e-4), + (torch.half, 1e-3), + ], +) def test_approximate_adaLN_from_state_dict_should_work(dtype: torch.dtype, rtol: float, rate: float = 0.99): """Test that we should approximate good enough adaLN layer from diffusers state dict. This should tolorance some kind of errorness respect to input dtype""" @@ -113,10 +137,7 @@ def test_approximate_adaLN_from_state_dict_should_work(dtype: torch.dtype, rtol: up = torch.randn(input_dim, rank, dtype=dtype) down = torch.randn(rank, output_dim, dtype=dtype) - layer_state_dict = { - 'lora_up.weight': up, - 'lora_down.weight': down - } + layer_state_dict = {"lora_up.weight": up, "lora_down.weight": down} # XXX Layer patcher cast things to f32 original = up.float() @ down.float() @@ -132,15 +153,16 @@ def test_approximate_adaLN_from_state_dict_should_work(dtype: torch.dtype, rtol: assert close_rate > rate + def test_adaLN_should_be_approximated_if_present_while_converting(): """AdaLN layer should be approximated if existed inside given model""" state_dict = keys_to_mock_state_dict(flux_diffusers_with_norm_out_state_dict_keys) - adaLN_layer_key = 'final_layer.adaLN_modulation.1' + adaLN_layer_key = "final_layer.adaLN_modulation.1" prefixed_layer_key = FLUX_LORA_TRANSFORMER_PREFIX + adaLN_layer_key with mock.patch( - 'invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils.approximate_flux_adaLN_lora_layer_from_diffusers_state_dict' + "invokeai.backend.patches.lora_conversions.flux_diffusers_lora_conversion_utils.approximate_flux_adaLN_lora_layer_from_diffusers_state_dict" ) as mock_approximate_func: model = lora_model_from_flux_diffusers_state_dict(state_dict, alpha=8.0)