Update llm_diffusion_serving_app

pytorch · Nov 7, 2024 · 300adeb · 300adeb
1 parent db0fd66
commit 300adeb
Show file tree

Hide file tree

Showing 5 changed files with 62 additions and 38 deletions.
diff --git a/examples/usecases/llm_diffusion_serving_app/Readme.md b/examples/usecases/llm_diffusion_serving_app/Readme.md
@@ -1,7 +1,11 @@
 
-# Multi-Image Generation Streamlit App by chaining Llama & Stable Diffusion using TorchServe, torch.compile & OpenVINO
+## Multi-Image Generation Streamlit App by chaining LLaMA & Stable Diffusion using TorchServe, torch.compile & OpenVINO
 
-This Streamlit app is designed to generate multiple images based on a provided text prompt. It leverages [TorchServe](https://pytorch.org/serve/) for efficient model serving and management, and utilizes [Meta-LLaMA-3.2](https://huggingface.co/meta-llama) for prompt generation, and **Stable Diffusion** with [latent-consistency/lcm-sdxl](https://huggingface.co/latent-consistency/lcm-sdxl) and [Torch.compile using OpenVINO backend](https://docs.openvino.ai/2024/openvino-workflow/torch-compile.html) for image generation.
+This Streamlit app is designed to generate multiple images based on a provided text prompt. Instead of using Stable Diffusion directly, this app chains LLaMA and Stable Diffusion to enhance the image generation process. Here’s how it works:
+- The app takes a user prompt and uses [Meta-LLaMA-3.2](https://huggingface.co/meta-llama) to create multiple interesting and relevant prompts.
+- These generated prompts are then sent to Stable Diffusion with [latent-consistency/lcm-sdxl](https://huggingface.co/latent-consistency/lcm-sdxl) model, to generate images.
+- For performance optimization, the models are compiled using [torch.compile using OpenVINO backend.](https://docs.openvino.ai/2024/openvino-workflow/torch-compile.html)
+- The application leverages [TorchServe](https://pytorch.org/serve/) for efficient model serving and management.
 
 ![Multi-Image Generation App Workflow](./docker/workflow-1.png)
 

diff --git a/examples/usecases/llm_diffusion_serving_app/docker/client_app.py b/examples/usecases/llm_diffusion_serving_app/docker/client_app.py
@@ -20,10 +20,10 @@
 logger = logging.getLogger(__name__)
 
 # App title
-st.set_page_config(page_title="Image Generation with SDXL and OpenVINO")
+st.set_page_config(page_title="Multi-image Gen App")
 
 with st.sidebar:
-    st.title("Image Generation with SDXL and OpenVINO")
+    st.title("Image Generation with LLaMA, SDXL and OpenVINO")
 
     st.session_state.model_sd_loaded = False
 
@@ -151,22 +151,31 @@ def preprocess_llm_input(user_prompt, num_images = 2):
     ### Response: 
     """
 
-    # Get 'num_images-1' prompts as the user_prompt is included.
-    prompt_template_with_user_input = template.format(num_images-1, user_prompt)
+    prompt_template_with_user_input = template.format(num_images, user_prompt)
 
     return prompt_template_with_user_input
 
 
-def postprocess_llm_response(prompts, original_prompt=None, num_images=2):
+def postprocess_llm_response(prompts, original_prompt=None, num_images=2, include_user_prompt=False):
     # Parse the JSON string into a Python list
     prompts = prompts.strip()
     prompts = json.loads(prompts)
-    logging.info(f"LLM Model Responded with prompts: {prompts}")
+    logging.info(f"LLM Model Responded with prompts. Required: {num_images}, Generated: {len(prompts)}. Prompts: {prompts}")
 
-    if len(prompts) < num_images:
-         logging.info(f"Llama Model generated too few prompts! Required: {num_images}, Generated: {len(prompts)}")
-    else:
+    # prompts list ideally should contain num_images + 1 (original prompt) 
+    # Trim the list to the desired number of prompts
+    if len(prompts) > num_images + 1:
+        prompts = prompts[:num_images + 1]
+
+    # Remove the original user prompt if desired
+    if include_user_prompt:
         prompts = prompts[:num_images]
+    else:
+        prompts = prompts[1:]
+
+    # If the list is empty, return the original prompt
+    if not prompts:
+        prompts = [original_prompt]
 
     return prompts
 
@@ -203,23 +212,27 @@ def generate_llm_model_response(prompt_template_with_user_input, user_prompt):
 
 
 # Client Page UI
-st.title("Multi-Image Generation App with TorchServe and OpenVINO")
+st.markdown("## Multi-Image Generation App with TorchServe and OpenVINO", unsafe_allow_html=True)
+
 intro_container = st.container()
 with intro_container:
     st.markdown("""
-        The multi-image generation app generates similar image generation prompts using **LLaMA-3.2** and 
-        these prompts are then processed in parallel by **Stable Diffusion**, which is optimized 
-        using the **latent-consistency/lcm-sdxl** model and accelerated with **Torch.compile** using the 
-        **OpenVINO** backend. This approach enables efficient and high-quality image generation, 
-        offering users a selection of interpretations to choose from.
-    """)
+    This Streamlit app is designed to generate multiple images based on a provided text prompt. 
+    Instead of using Stable Diffusion directly, this app chains Llama and Stable Diffusion. 
+    It takes a user prompt and makes use of LLaMA to create multiple interesting relevant prompts 
+    which are then sent to Stable Diffusion to generate images. It leverages:
+    - [TorchServe](https://pytorch.org/serve/) for efficient model serving and management, 
+    - [Meta-LLaMA-3.2](https://huggingface.co/meta-llama) for enhanced prompt generation, 
+    - Stable Diffusion with [latent-consistency/lcm-sdxl](https://huggingface.co/latent-consistency/lcm-sdxl) for image generation,
+    - For performance optimization, the models are compiled using [torch.compile using OpenVINO backend](https://docs.openvino.ai/2024/openvino-workflow/torch-compile.html).
+    """, unsafe_allow_html=True)
+    st.markdown("""<div style='background-color: #f0f0f0; font-size: 14px; padding: 10px; border: 1px solid #ddd; border-radius: 5px;'>
+            NOTE: Initial image generation may take longer due to model warm-up. Subsequent generations will be faster. !
+            </div>""", unsafe_allow_html=True)
     st.image("workflow-2.png")
-    st.markdown("""
-        **NOTE:** The initial image generations might take longer due to model initialization and warm-up. 
-        Subsequent generations should be faster !
-    """)
 
-user_prompt = st.text_input("Enter an Image Generation Prompt :")
+user_prompt = st.text_input("Enter a prompt for image generation:")
+include_user_prompt = st.checkbox("Include orginal prompt", value=False)
 
 prompt_container = st.container()
 status_container = st.container()
@@ -257,7 +270,7 @@ def display_prompts():
             if num_images > 1:
                 prompt_template_with_user_input = preprocess_llm_input(user_prompt, num_images)
                 llm_prompts = generate_llm_model_response(prompt_template_with_user_input, user_prompt)
-                st.session_state.llm_prompts = postprocess_llm_response(llm_prompts, user_prompt, num_images)
+                st.session_state.llm_prompts = postprocess_llm_response(llm_prompts, user_prompt, num_images, include_user_prompt)
 
             st.session_state.llm_time = time.time() - llm_start_time
             display_prompts()

diff --git a/examples/usecases/llm_diffusion_serving_app/docker/server_app.py b/examples/usecases/llm_diffusion_serving_app/docker/server_app.py
@@ -20,7 +20,7 @@
 MODEL_SD = MODEL_NAME_SD.split("---")[1]
 
 # App title
-st.set_page_config(page_title="TorchServe Server")
+st.set_page_config(page_title="Multi-image Gen App Control Center")
 
 def start_server():
     subprocess.run(
@@ -59,7 +59,7 @@ def stop_server():
 def _register_model(url, MODEL_NAME):
     res = requests.post(url)
     if res.status_code != 200:
-        server_state_container.error(f"Error registering model: {MODEL_NAME}", icon="🚫")
+        server_state_container.error(f"Error {res.status_code}: Failed to register model: {MODEL_NAME}", icon="🚫")
         st.session_state.started = True
         return False
 
@@ -184,7 +184,7 @@ def get_sw_versions():
     st.button("Stop Server", on_click=stop_server)
     st.button(f"Register Models", on_click=register_models, args=([MODEL_NAME_LLM, MODEL_NAME_SD],))
 
-    st.subheader("SD Model parameters")
+    st.subheader("Stable Diffusion Model Config Parameters")
 
     workers_sd = st.sidebar.number_input(
         "Num Workers for Stable Diffusion",
@@ -210,17 +210,24 @@ def get_sw_versions():
 
 
 # Server Page UI
-st.title("Multi-Image Generation App Control Center")
-image_container = st.container()
-with image_container:
+st.markdown("## Multi-Image Generation App Control Center", unsafe_allow_html=True)
+
+intro_container = st.container()
+with intro_container:
     st.markdown("""
-    This Streamlit app is designed to generate multiple images based on a provided text prompt.
-    It leverages **TorchServe** for efficient model serving and management, and utilizes **LLaMA3.2** 
-    for prompt generation, and **Stable Diffusion** 
-    with **latent-consistency/lcm-sdxl** and **Torch.compile** using **OpenVINO backend** for image generation.
-    
-    After Starting TorchServe and Registering models, go to Client App running at port 8085.
-    """)
+    This Streamlit app is designed to generate multiple images based on a provided text prompt. 
+    Instead of using Stable Diffusion directly, this app chains LLaMA and Stable Diffusion. 
+    It takes a user prompt and makes use of LLaMA to create multiple interesting relevant prompts 
+    which are then sent to Stable Diffusion to generate images. It leverages:
+    - [TorchServe](https://pytorch.org/serve/) for efficient model serving and management, 
+    - [Meta-LLaMA-3.2](https://huggingface.co/meta-llama) for enhanced prompt generation, 
+    - Stable Diffusion with [latent-consistency/lcm-sdxl](https://huggingface.co/latent-consistency/lcm-sdxl) for image generation,
+    - For performance optimization, the models are compiled using [torch.compile using OpenVINO backend](https://docs.openvino.ai/2024/openvino-workflow/torch-compile.html).
+    """, unsafe_allow_html=True)
+    st.markdown("""<div style='background-color: #f0f0f0; font-size: 14px; padding: 10px; border: 1px solid #ddd; border-radius: 5px;'>
+        NOTE: After Starting TorchServe and Registering models, go to Client App running at port 8085.
+        </div>""", unsafe_allow_html=True)
+
     st.image("workflow-1.png")
 
 server_state_container = st.container()
@@ -231,7 +238,7 @@ def get_sw_versions():
 elif st.session_state.stopped:
     server_state_container.success("Stopped TorchServe", icon="🛑")
 else:
-    server_state_container.write("TorchServe not started !")
+    server_state_container.warning("TorchServe not started !", icon="⚠️")
 
 if st.session_state.registered[MODEL_NAME_LLM]:
     server_state_container.success(f"Registered model {MODEL_NAME_LLM}", icon="✅")

diff --git a/examples/usecases/llm_diffusion_serving_app/docker/workflow-1.png b/examples/usecases/llm_diffusion_serving_app/docker/workflow-1.png
diff --git a/examples/usecases/llm_diffusion_serving_app/docker/workflow-2.png b/examples/usecases/llm_diffusion_serving_app/docker/workflow-2.png