Added Jenkins stage for VLM tests.

quic-dhirajku · quic-dhirajku · commit c0f21bd8b756 · 2025-04-08T09:02:53.000Z
Added a seperate tag for vlm testing in jenkins.
Addressed comments.

Signed-off-by: quic-dhirajku &lt;quic_dhirajku@quicinc.com&gt;
diff --git a/QEfficient/utils/test_utils.py b/QEfficient/utils/test_utils.py
@@ -13,6 +13,12 @@
 
 # Processor class for InternVL models
 class InternProcessor:
+    """
+    InternVL model only has an AutoTokenizer so this class performs the processing tasks similar to an AutoProcessor.
+    The methods used here are borrowed from the original InternVL modelling files.
+    "https://huggingface.co/OpenGVLab/InternVL2_5-1B/"
+    """
+
     def __init__(self, model: nn.Module, tokenizer):
         self.model = model
         image_size = self.model.config.force_image_size or self.model.config.vision_config.image_size
diff --git a/scripts/Jenkinsfile b/scripts/Jenkinsfile
@@ -24,6 +24,7 @@ pipeline {
                    pip install .[test] &&
                    pip install junitparser pytest-xdist &&
                    pip install librosa==0.10.2 soundfile==0.13.1 && #packages needed to load example for whisper testing
+                   pip install --extra-index-url https://download.pytorch.org/whl/cpu timm==1.0.14 torchvision==0.19.1+cpu einops==0.8.1 && #packages to load VLMs
                    rm -rf QEfficient"
                '''
            }
@@ -63,6 +64,22 @@ pipeline {
                        }
                    }
                }
+               stage('Run Non-CLI QAIC MultiModal Tests') {
+                   steps {
+                       timeout(time: 60, unit: 'MINUTES') {
+                           sh '''
+                           sudo docker exec ${BUILD_TAG} bash -c "
+                           cd /efficient-transformers &&
+                           . preflight_qeff/bin/activate &&
+                           mkdir -p $PWD/Non_cli_qaic_multimodal &&
+                           export TOKENIZERS_PARALLELISM=false &&
+                           export QEFF_HOME=$PWD/Non_cli_qaic_multimodal &&
+                           pytest tests -m '(not cli) and (on_qaic) and (multimodal) and (not qnn)' -n 4 --junitxml=tests/tests_log6.xml &&
+                           deactivate"
+                           '''
+                       }
+                   }
+               }
            }
        }
        stage('CLI Tests') {
@@ -114,7 +131,7 @@ pipeline {
                     export TOKENIZERS_PARALLELISM=false &&
                     export QEFF_HOME=$PWD/Qnn_non_cli &&
                     pytest tests -m '(not cli) and (qnn) and (on_qaic)' --junitxml=tests/tests_log5.xml &&
-                    junitparser merge tests/tests_log1.xml tests/tests_log2.xml tests/tests_log3.xml tests/tests_log4.xml tests/tests_log5.xml tests/tests_log.xml &&
+                    junitparser merge tests/tests_log1.xml tests/tests_log2.xml tests/tests_log3.xml tests/tests_log4.xml tests/tests_log5.xml tests/tests_log6.xml tests/tests_log.xml &&
                     deactivate"
                     '''
                 }
diff --git a/tests/transformers/models/test_image_text_to_text_models.py b/tests/transformers/models/test_image_text_to_text_models.py
@@ -30,6 +30,17 @@
 HF_TOKEN = ""
 NEW_GENERATION_TOKENS = 10
 test_models_config = [
+    # CONFIG PARAMS NEEDED FOR A MODEL TO BE TESTED
+    # (
+    # model_name,
+    # batch_size,
+    # prompt_len,
+    # ctx_len,
+    # img_size,
+    # img_url",
+    # text_prompt,
+    # number of layers of the model,
+    # ),
     (
         "llava-hf/llava-1.5-7b-hf",
         1,
@@ -40,16 +51,16 @@
         "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud",
         1,
     ),
-    (
-        "meta-llama/Llama-3.2-11B-Vision-Instruct",
-        1,
-        32,
-        512,
-        560,
-        "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg",
-        "Explain this image",
-        4,
-    ),
+    # (
+    #     "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    #     1,
+    #     32,
+    #     512,
+    #     560,
+    #     "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg",
+    #     "Explain this image",
+    #     4,
+    # ),
 ]
 
 intern_model_config = [
@@ -127,7 +138,6 @@ def check_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
     config = AutoConfig.from_pretrained(
         model_config["model_name"], token=HF_TOKEN, trust_remote_code=True, padding=True
     )
-    config._attn_implementation = "eager"
     config = set_num_layers(config, n_layer=n_layer)
     model_hf, _ = load_image_text_to_text_model(config)
     processor = AutoProcessor.from_pretrained(model_name, token=HF_TOKEN, trust_remote_code=True, padding=True)
@@ -278,6 +288,7 @@ def check_intern_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
 
 
 @pytest.mark.on_qaic
+@pytest.mark.multimodal
 @pytest.mark.parametrize(
     "model_name, batch_size, prompt_len, ctx_len, img_size, img_url, query, n_layer", test_models_config
 )
@@ -306,6 +317,7 @@ def test_image_text_to_text_pytorch_vs_kv_vs_ort_vs_ai100(
 
 
 @pytest.mark.on_qaic
+@pytest.mark.multimodal
 @pytest.mark.parametrize("model_name, batch_size, prompt_len, ctx_len, img_url, query, n_layer", intern_model_config)
 def test_image_text_to_text_intern_pytorch_vs_kv_vs_ort_vs_ai100(
     model_name, batch_size, prompt_len, ctx_len, img_url, query, n_layer

Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@ pipeline {`
`24`	`24`	`pip install .[test] &&`
`25`	`25`	`pip install junitparser pytest-xdist &&`
`26`	`26`	`pip install librosa==0.10.2 soundfile==0.13.1 && #packages needed to load example for whisper testing`
	`27`	`+ pip install --extra-index-url https://download.pytorch.org/whl/cpu timm==1.0.14 torchvision==0.19.1+cpu einops==0.8.1 && #packages to load VLMs`
`27`	`28`	`rm -rf QEfficient"`
`28`	`29`	`'''`
`29`	`30`	`}`
`@@ -63,6 +64,22 @@ pipeline {`
`63`	`64`	`}`
`64`	`65`	`}`
`65`	`66`	`}`
	`67`	`+ stage('Run Non-CLI QAIC MultiModal Tests') {`
	`68`	`+ steps {`
	`69`	`+ timeout(time: 60, unit: 'MINUTES') {`
	`70`	`+ sh '''`
	`71`	`+ sudo docker exec ${BUILD_TAG} bash -c "`
	`72`	`+ cd /efficient-transformers &&`
	`73`	`+ . preflight_qeff/bin/activate &&`
	`74`	`+ mkdir -p $PWD/Non_cli_qaic_multimodal &&`
	`75`	`+ export TOKENIZERS_PARALLELISM=false &&`
	`76`	`+ export QEFF_HOME=$PWD/Non_cli_qaic_multimodal &&`
	`77`	`+ pytest tests -m '(not cli) and (on_qaic) and (multimodal) and (not qnn)' -n 4 --junitxml=tests/tests_log6.xml &&`
	`78`	`+ deactivate"`
	`79`	`+ '''`
	`80`	`+ }`
	`81`	`+ }`
	`82`	`+ }`
`66`	`83`	`}`
`67`	`84`	`}`
`68`	`85`	`stage('CLI Tests') {`
`@@ -114,7 +131,7 @@ pipeline {`
`114`	`131`	`export TOKENIZERS_PARALLELISM=false &&`
`115`	`132`	`export QEFF_HOME=$PWD/Qnn_non_cli &&`
`116`	`133`	`pytest tests -m '(not cli) and (qnn) and (on_qaic)' --junitxml=tests/tests_log5.xml &&`
`117`		`- junitparser merge tests/tests_log1.xml tests/tests_log2.xml tests/tests_log3.xml tests/tests_log4.xml tests/tests_log5.xml tests/tests_log.xml &&`
	`134`	`+ junitparser merge tests/tests_log1.xml tests/tests_log2.xml tests/tests_log3.xml tests/tests_log4.xml tests/tests_log5.xml tests/tests_log6.xml tests/tests_log.xml &&`
`118`	`135`	`deactivate"`
`119`	`136`	`'''`
`120`	`137`	`}`