diff --git a/setup.py b/setup.py index 2729e731f..4d87c1cca 100644 --- a/setup.py +++ b/setup.py @@ -60,9 +60,12 @@ "datasets", "accelerate>=0.20.3,!=1.1.0", "pynvml", - "compressed-tensors" - if version_info.build_type == "release" - else "compressed-tensors-nightly", + "pillow", + ( + "compressed-tensors" + if version_info.build_type == "release" + else "compressed-tensors-nightly" + ), ], extras_require={ "dev": [ diff --git a/tests/lmeval/configs/vl_fp8_dynamic_per_token.yaml b/tests/lmeval/configs/vl_fp8_dynamic_per_token.yaml index 3ae64f093..2b2bb6601 100644 --- a/tests/lmeval/configs/vl_fp8_dynamic_per_token.yaml +++ b/tests/lmeval/configs/vl_fp8_dynamic_per_token.yaml @@ -2,6 +2,7 @@ cadence: weekly model: Qwen/Qwen2-VL-2B-Instruct model_class: TraceableQwen2VLForConditionalGeneration scheme: FP8_DYNAMIC +seed: 42 #compressed model is sensitive to random seed lmeval: model: "hf-multimodal" model_args: @@ -10,7 +11,6 @@ lmeval: convert_img_format: True task: mmmu_val_economics num_fewshot: 0 - limit: 1000 batch_size: 8 metrics: acc,none: 0.333 diff --git a/tests/lmeval/configs/vl_int8_w8a8_dynamic_per_token.yaml b/tests/lmeval/configs/vl_int8_w8a8_dynamic_per_token.yaml index 22b5d8419..aa53c52c4 100644 --- a/tests/lmeval/configs/vl_int8_w8a8_dynamic_per_token.yaml +++ b/tests/lmeval/configs/vl_int8_w8a8_dynamic_per_token.yaml @@ -5,6 +5,7 @@ scheme: INT8_dyn_per_token recipe: tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml dataset_id: lmms-lab/flickr30k dataset_split: "test[:512]" +seed: 42 #compressed model is sensitive to random seed lmeval: model: "hf-multimodal" model_args: @@ -13,7 +14,6 @@ lmeval: convert_img_format: True task: mmmu_val_economics num_fewshot: 0 - limit: 1000 metrics: acc,none: 0.233 batch_size: 8 \ No newline at end of file diff --git a/tests/lmeval/configs/vl_w4a16_actorder_weight.yaml b/tests/lmeval/configs/vl_w4a16_actorder_weight.yaml index b7fa161c8..b28cf3dd7 100644 --- a/tests/lmeval/configs/vl_w4a16_actorder_weight.yaml +++ b/tests/lmeval/configs/vl_w4a16_actorder_weight.yaml @@ -5,6 +5,7 @@ recipe: tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml dataset_id: lmms-lab/flickr30k dataset_split: "test[:512]" scheme: W4A16_actorder_group +seed: 42 #compressed model is sensitive to random seed lmeval: model: "hf-multimodal" model_args: @@ -13,7 +14,6 @@ lmeval: convert_img_format: True task: mmmu_val_economics num_fewshot: 0 - limit: 1000 metrics: - acc,none: 0.4 + acc,none: 0.366 batch_size: 4 \ No newline at end of file diff --git a/tests/lmeval/test_lmeval.py b/tests/lmeval/test_lmeval.py index e5b9efcef..34db1356d 100644 --- a/tests/lmeval/test_lmeval.py +++ b/tests/lmeval/test_lmeval.py @@ -1,9 +1,11 @@ import os +import random import shutil from pathlib import Path import numpy import pytest +import torch import yaml from loguru import logger from pydantic import BaseModel @@ -73,6 +75,12 @@ def set_up(self): self.quant_type = eval_config.get("quant_type") self.save_dir = eval_config.get("save_dir") + seed = eval_config.get("seed", None) + if seed is not None: + random.seed(seed) + numpy.random.seed(seed) + torch.manual_seed(seed) + logger.info("========== RUNNING ==============") logger.info(self.scheme)