ip-adapter plus is not working with full face #2400

circuluspibo · 2024-09-19T03:39:40Z

Hi. I am trying to use ip-adapter with openvino,
two model is working well (ip-adapter_sd15.bin, ip-adapter_sd15_light.bin)
but, I wish to use "ip-adapter-full-face_sd15.bin", it is not working well
some modification of example is avaiable to convert that model, but inferencing is fail

how can I use that model with sd1.5?

eaidova · 2024-09-19T03:55:13Z

@circuluspibo could you please provide more details about error and which modifications do you perform?

circuluspibo · 2024-09-19T05:58:40Z

from pathlib import Path
from diffusers import AutoPipelineForText2Image
from transformers import CLIPVisionModelWithProjection
from diffusers.utils import load_image
from diffusers import LCMScheduler


stable_diffusion_id = "circulus/canvers-real-v3.9.1"
ip_adapter_id = "h94/IP-Adapter"
ip_adapter_weight_name = "ip-adapter-full-face_sd15.bin" # "ip-adapter_sd15.bin" changing other adapter model!!!!
lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
models_dir = Path("on-canvers-real-v3.9.1-ov-ip")
int8_model_path = Path("on-canvers-real-v3.9.1-ov-ip-int8")
from optimum.intel import OVConfig, OVQuantizer, OVStableDiffusionPipeline, OVWeightQuantizationConfig
from optimum.intel.openvino.configuration import OVQuantizationMethod

load_original_pipeline = not all(
    [
        (models_dir / model_name).exists()
        for model_name in [
            "text_encoder.xml",
            "image_encoder.xml",
            "unet.xml",
            "vae_decoder.xml",
            "vae_encoder.xml",
        ]
    ]
)


def get_pipeline_components(
    stable_diffusion_id,
    ip_adapter_id,
    ip_adapter_weight_name,
    lcm_lora_id,
    ip_adapter_scale=0.65,
):
    image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="models/image_encoder")
    print(image_encoder)
    pipeline = AutoPipelineForText2Image.from_pretrained(stable_diffusion_id, image_encoder=image_encoder)
    pipeline.load_lora_weights(lcm_lora_id)
    pipeline.fuse_lora()
    pipeline.load_ip_adapter(ip_adapter_id, subfolder="models", weight_name=ip_adapter_weight_name)
    pipeline.set_ip_adapter_scale(ip_adapter_scale)
    scheduler = LCMScheduler.from_pretrained(stable_diffusion_id, subfolder="scheduler")
    return (
        pipeline.tokenizer,
        pipeline.feature_extractor,
        scheduler,
        pipeline.text_encoder,
        pipeline.image_encoder,
        pipeline.unet,
        pipeline.vae,
    )


if load_original_pipeline:
    (
        tokenizer,
        feature_extractor,
        scheduler,
        text_encoder,
        image_encoder,
        unet,
        vae,
    ) = get_pipeline_components(stable_diffusion_id, ip_adapter_id, ip_adapter_weight_name, lcm_lora_id)
    scheduler.save_pretrained(models_dir / "scheduler")
else:
    tokenizer, feature_extractor, scheduler, text_encoder, image_encoder, unet, vae = (
        None,
        None,
        None,
        None,
        None,
        None,
        None,
    )

import openvino as ov
import torch
import gc


def cleanup_torchscript_cache():
    """
    Helper for removing cached model representation
    """
    torch._C._jit_clear_class_registry()
    torch.jit._recursive.concrete_type_store = torch.jit._recursive.ConcreteTypeStore()
    torch.jit._state._clear_class_state()

IMAGE_ENCODER_PATH = models_dir / "image_encoder.xml"
UNET_PATH = models_dir / "unet.xml"
VAE_DECODER_PATH = models_dir / "vae_decoder.xml"
VAE_ENCODER_PATH = models_dir / "vae_encoder.xml"
TEXT_ENCODER_PATH = models_dir / "text_encoder.xml"

if not IMAGE_ENCODER_PATH.exists():
    with torch.no_grad():
        ov_model = ov.convert_model(
            image_encoder,
            example_input=torch.zeros((1, 3, 224, 224)),
            input=[-1, 3, 224, 224],
        )
    ov.save_model(ov_model, IMAGE_ENCODER_PATH)
    feature_extractor.save_pretrained(models_dir / "feature_extractor")
    del ov_model
    cleanup_torchscript_cache()


if not UNET_PATH.exists():
    inputs = {
        "sample": torch.randn((2, 4, 64, 64)),
        "timestep": torch.tensor(1),
        "encoder_hidden_states": torch.randn((2, 77, 768)),
        "added_cond_kwargs": {"image_embeds": torch.ones(( 2,1280))}, #torch.ones((2, 1024) -  original emveds change for avoid failing convert
    }

    print(unet)

    with torch.no_grad():
        ov_model = ov.convert_model(unet, example_input=inputs)
    # dictionary with added_cond_kwargs will be decomposed during conversion
    # in some cases decomposition may lead to losing data type and shape information
    # We need to recover it manually after the conversion
    ov_model.inputs[-1].get_node().set_element_type(ov.Type.f32)
    ov_model.validate_nodes_and_infer_types()
    ov.save_model(ov_model, UNET_PATH)
    del ov_model
    cleanup_torchscript_cache()

if not VAE_DECODER_PATH.exists():

    class VAEDecoderWrapper(torch.nn.Module):
        def __init__(self, vae):
            super().__init__()
            self.vae = vae

        def forward(self, latents):
            return self.vae.decode(latents)

    vae_decoder = VAEDecoderWrapper(vae)
    with torch.no_grad():
        ov_model = ov.convert_model(vae_decoder, example_input=torch.ones([1, 4, 64, 64]))
    ov.save_model(ov_model, VAE_DECODER_PATH)
    del ov_model
    cleanup_torchscript_cache()
    del vae_decoder

if not VAE_ENCODER_PATH.exists():

    class VAEEncoderWrapper(torch.nn.Module):
        def __init__(self, vae):
            super().__init__()
            self.vae = vae

        def forward(self, image):
            return self.vae.encode(x=image)["latent_dist"].sample()

    vae_encoder = VAEEncoderWrapper(vae)
    vae_encoder.eval()
    image = torch.zeros((1, 3, 512, 512))
    with torch.no_grad():
        ov_model = ov.convert_model(vae_encoder, example_input=image)
    ov.save_model(ov_model, VAE_ENCODER_PATH)
    del ov_model
    cleanup_torchscript_cache()


if not TEXT_ENCODER_PATH.exists():
    with torch.no_grad():
        ov_model = ov.convert_model(
            text_encoder,
            example_input=torch.ones([1, 77], dtype=torch.long),
            input=[
                (1, 77),
            ],
        )
    ov.save_model(ov_model, TEXT_ENCODER_PATH)
    del ov_model
    cleanup_torchscript_cache()
    tokenizer.save_pretrained(models_dir / "tokenizer")

converted okay, but fail to inference. It seems to be changing image_embeds difference between original ip_adapter and ip_adapter_plus series. (1024 vs 1280)

Iffa-Intel · 2024-09-30T06:54:55Z

Hi @circuluspibo,

Could you clarify:

Which OpenVINO Notebook demo are you using?
Does the model is custom/modified or pre-trained?
If custom/modified, could you share it with us for validation purposes?

circuluspibo · 2024-10-02T11:45:28Z

openvino 2024.4
https://huggingface.co/h94/IP-Adapter/blob/main/models/ip-adapter-full-face_sd15.bin
not modified

Iffa-Intel · 2024-10-04T06:15:25Z

@circuluspibo do you have the xml file? I notice there are only bin files in that link.
Plus, which OpenVINO Notebook demo code did you use to infer the model? or are you actually using custom inferencing code?

circuluspibo · 2024-10-06T03:01:37Z

Demo code is not working what I mentioned (https://huggingface.co/h94/IP-Adapter/blob/main/models/ip-adapter-full-face_sd15.bin)

also original model is working fine...

model url - https://huggingface.co/circulus/on-canvers-disney-v3.9.1-face-ov/tree/main
convert code - https://huggingface.co/circulus/on-canvers-disney-v3.9.1-face-ov/blob/main/sd_quant_face.py
inference code - https://huggingface.co/circulus/on-canvers-disney-v3.9.1-face-ov/blob/main/infer_face.py

avitial added the support_request label Sep 27, 2024

avitial assigned Munesh-Intel Sep 27, 2024

YuChern-Intel assigned Iffa-Intel Sep 29, 2024

Iffa-Intel added the PSE Escalate to PSE for further investigate label Oct 8, 2024

avitial self-assigned this Oct 10, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ip-adapter plus is not working with full face #2400

ip-adapter plus is not working with full face #2400

circuluspibo commented Sep 19, 2024

eaidova commented Sep 19, 2024

circuluspibo commented Sep 19, 2024 •

edited by andrei-kochin

Loading

Iffa-Intel commented Sep 30, 2024

circuluspibo commented Oct 2, 2024

Iffa-Intel commented Oct 4, 2024 •

edited

Loading

circuluspibo commented Oct 6, 2024 •

edited

Loading

ip-adapter plus is not working with full face #2400

ip-adapter plus is not working with full face #2400

Comments

circuluspibo commented Sep 19, 2024

eaidova commented Sep 19, 2024

circuluspibo commented Sep 19, 2024 • edited by andrei-kochin Loading

Iffa-Intel commented Sep 30, 2024

circuluspibo commented Oct 2, 2024

Iffa-Intel commented Oct 4, 2024 • edited Loading

circuluspibo commented Oct 6, 2024 • edited Loading

also original model is working fine...

circuluspibo commented Sep 19, 2024 •

edited by andrei-kochin

Loading

Iffa-Intel commented Oct 4, 2024 •

edited

Loading

circuluspibo commented Oct 6, 2024 •

edited

Loading