From 089c58f1a3dbae20294ebdfe6a6a25820b131817 Mon Sep 17 00:00:00 2001 From: Su Zhou Date: Fri, 29 Mar 2024 17:17:21 +0000 Subject: [PATCH 01/12] update mmcv mmdet --- src/autogluon/bench/frameworks/multimodal/setup.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/autogluon/bench/frameworks/multimodal/setup.sh b/src/autogluon/bench/frameworks/multimodal/setup.sh index 286414fa..b5448c2d 100755 --- a/src/autogluon/bench/frameworks/multimodal/setup.sh +++ b/src/autogluon/bench/frameworks/multimodal/setup.sh @@ -36,5 +36,6 @@ python3 -m pip install -e core[all] python3 -m pip install -e features python3 -m pip install -e multimodal -python3 -m mim install "mmcv==2.0.1" -python3 -m pip install "mmdet==3.0.0" +python3 -m mim install "mmcv==2.1.0" +python3 -m pip install "mmdet==3.2.0" + From df7468cf7694a32a47e694283f8df3fa01a4703f Mon Sep 17 00:00:00 2001 From: Su Zhou Date: Fri, 29 Mar 2024 17:57:57 +0000 Subject: [PATCH 02/12] update job name --- .../bench/cloud/aws/batch_stack/lambdas/lambda_function.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py b/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py index c460d1bd..7b61e074 100644 --- a/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py +++ b/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py @@ -2,6 +2,7 @@ import itertools import logging import os +import re import zipfile import requests @@ -228,7 +229,10 @@ def generate_config_combinations(config, metrics_bucket, batch_job_queue, batch_ config_s3_path = upload_config(config_list=job_configs, bucket=metrics_bucket, benchmark_name=benchmark_name) env = [{"name": "config_file", "value": config_s3_path}] job_type = "array" if len(job_configs) > 1 else "single" - job_name = f"{benchmark_name}-{config['module']}-{config['framework']}-{config['constraint']}-{job_type}-job" + constraint = config.get("amlb_constraint") or config.get("constraint") + job_name = f"{benchmark_name}-{config['module']}-{config['framework']}-{constraint}-{job_type}-job" + job_name = re.sub(r'(?![-_])\W', '-', job_name)[:128] # AWS Bath Job name can only contain letters, numbers, "-" and "_" + parent_job_id = submit_batch_job( env=env, job_name=job_name, From 051ce28d0c77802bffeab3d49dbdc3a813516a12 Mon Sep 17 00:00:00 2001 From: Su Zhou Date: Fri, 29 Mar 2024 17:58:18 +0000 Subject: [PATCH 03/12] save primary metric as result --- src/autogluon/bench/frameworks/multimodal/exec.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/autogluon/bench/frameworks/multimodal/exec.py b/src/autogluon/bench/frameworks/multimodal/exec.py index b0f53847..9efda327 100644 --- a/src/autogluon/bench/frameworks/multimodal/exec.py +++ b/src/autogluon/bench/frameworks/multimodal/exec.py @@ -256,6 +256,9 @@ def run( framework, version = framework, ag_version metric_name = test_data.metric if metrics_func is None else metrics_func.name + primary_metric = metric_name[0] if isinstance(metric_name, list) else metric_name + result = scores[primary_metric] + if hasattr(train_data, "id"): id = f"id/{train_data.id}" else: @@ -268,7 +271,8 @@ def run( "version": version, "fold": 0, "type": predictor.problem_type, - "metric": metric_name, + "metric": primary_metric, + "result": result, "utc": utc_time, "training_duration": training_duration, "predict_duration": predict_duration, From 1ab8bbae46faabb51418c8261a23fc9741d61a8a Mon Sep 17 00:00:00 2001 From: Su Zhou Date: Fri, 29 Mar 2024 19:23:48 +0000 Subject: [PATCH 04/12] remove hash value requirement --- .../bench/datasets/multimodal_dataset.py | 32 ++++--------------- .../datasets/object_detection_dataset.py | 4 +-- 2 files changed, 8 insertions(+), 28 deletions(-) diff --git a/src/autogluon/bench/datasets/multimodal_dataset.py b/src/autogluon/bench/datasets/multimodal_dataset.py index 39ce15ac..47e99837 100644 --- a/src/autogluon/bench/datasets/multimodal_dataset.py +++ b/src/autogluon/bench/datasets/multimodal_dataset.py @@ -51,7 +51,7 @@ def __init__(self, split: str, dataset_name: str, data_info: dict): try: ext = os.path.splitext(data_info[split]["url"])[-1] self._path = os.path.join(get_data_home_dir(), dataset_name, f"{split}{ext}") - download(data_info[split]["url"], path=self._path, sha1_hash=data_info[split]["sha1sum"]) + download(data_info[split]["url"], path=self._path) if ext == ".csv": self._data = pd.read_csv(self._path) elif ext == ".pq": @@ -119,7 +119,6 @@ class Shopee(BaseImageDataset): _INFO = { "data": { "url": get_repo_url() + "vision_datasets/shopee.zip", - "sha1sum": "59dffcfd0921cf0aa97215550dee3d1e3de656ca", }, } _registry_name = "shopee" @@ -127,7 +126,7 @@ class Shopee(BaseImageDataset): def __init__(self, split="train"): self._split = split self._path = os.path.join(get_data_home_dir(), "shopee") - load_zip.unzip(self._INFO["data"]["url"], unzip_dir=self._path, sha1sum=self._INFO["data"]["sha1sum"]) + load_zip.unzip(self._INFO["data"]["url"], unzip_dir=self._path) self._base_folder = os.path.join(self._path, "shopee") try: data_path = os.path.join(self._base_folder, f"{self._split}.csv") @@ -175,7 +174,6 @@ class StanfordOnline(BaseMatcherDataset): _INFO = { "data": { "url": get_repo_url() + "Stanford_Online_Products.zip", - "sha1sum": "4951af1dfcceb54b9b8f2126e995668e1b139cec", }, } _registry_name = "stanford_online" @@ -183,7 +181,7 @@ class StanfordOnline(BaseMatcherDataset): def __init__(self, split="train"): self._split = split self._path = os.path.join(get_data_home_dir(), "Stanford_Online_Products") - load_zip.unzip(self._INFO["data"]["url"], unzip_dir=self._path, sha1sum=self._INFO["data"]["sha1sum"]) + load_zip.unzip(self._INFO["data"]["url"], unzip_dir=self._path) self._base_folder = os.path.join(self._path, "Stanford_Online_Products") try: self._data = pd.read_csv(os.path.join(self._base_folder, f"{self._split}.csv"), index_col=0) @@ -229,7 +227,7 @@ def problem_type(self): class Flickr30k(BaseMatcherDataset): _SOURCE = "https://paperswithcode.com/dataset/flickr30k" _INFO = { - "data": {"url": get_repo_url() + "flickr30k.zip", "sha1sum": "13d879429cff00022966324ab486d3317017d706"}, + "data": {"url": get_repo_url() + "flickr30k.zip"}, } _registry_name = "flickr30k" @@ -293,9 +291,8 @@ class SNLI(BaseMatcherDataset): _INFO = { "train": { "url": get_repo_url() + "snli/snli_train.csv", - "sha1sum": "2ebac97d99112f0817a0070dc48826f08ae2b42b", }, - "test": {"url": get_repo_url() + "snli/snli_test.csv", "sha1sum": "87d304ad75b3d64f0f58e316befc7aeba4729b8f"}, + "test": {"url": get_repo_url() + "snli/snli_test.csv"}, } _registry_name = "snli" @@ -303,7 +300,7 @@ def __init__(self, split="train"): self._split = split self._path = os.path.join(get_data_home_dir(), "snli", f"{split}.csv") try: - download(self._INFO[split]["url"], path=self._path, sha1_hash=self._INFO[split]["sha1sum"]) + download(self._INFO[split]["url"], path=self._path) self._data = pd.read_csv(self._path, delimiter="|") except Exception: logger.warn(f"The data split {self._split} is not available.") @@ -344,11 +341,9 @@ class MitMovies(BaseMultiModalDataset): _INFO = { "train": { "url": get_repo_url() + "ner/mit-movies/train_v2.csv", - "sha1sum": "6732ddd21040ab8cd14418f4970af280b4b38a7a", }, "test": { "url": get_repo_url() + "ner/mit-movies/test_v2.csv", - "sha1sum": "99040f5f9d4990f62498ad0deeebc472a97e7885", }, } _registry_name = "mit_movies" @@ -382,11 +377,9 @@ class WomenClothingReview(BaseMultiModalDataset): _INFO = { "train": { "url": get_repo_url() + "women_clothing_review/train.pq", - "sha1sum": "980023e4c063eae51adafc98482610a9a6a1878b", }, "test": { "url": get_repo_url() + "women_clothing_review/test.pq", - "sha1sum": "fbc84f757b8a08210a772613ca8342f3990eb1f7", }, } _registry_name = "women_clothing_review" @@ -437,11 +430,9 @@ class MelBourneAirBnb(BaseMultiModalDataset): _INFO = { "train": { "url": get_repo_url() + "airbnb_melbourne/train.pq", - "sha1sum": "49f7d95df663d1199e6d860102d5863e48765caf", }, "test": { "url": get_repo_url() + "airbnb_melbourne/test.pq", - "sha1sum": "c28611514b659295fe4b345c3995005719499946", }, } _registry_name = "melbourne_airbnb" @@ -506,11 +497,9 @@ class AEPricePrediction(BaseMultiModalDataset): _INFO = { "train": { "url": get_repo_url() + "ae_price_prediction/train.pq", - "sha1sum": "5b8a6327cc9429176d58af33ca3cc3480fe6c759", }, "test": { "url": get_repo_url() + "ae_price_prediction/test.pq", - "sha1sum": "7bebcaae48410386f610fd7a9c37ba0e89602858", }, } _registry_name = "ae_price_prediction" @@ -556,11 +545,9 @@ class IMDBGenrePrediction(BaseMultiModalDataset): _INFO = { "train": { "url": get_repo_url() + "imdb_genre_prediction/train.csv", - "sha1sum": "56d2d5e3b19663d033fdfb6e33e4eb9c79c67864", }, "test": { "url": get_repo_url() + "imdb_genre_prediction/test.csv", - "sha1sum": "0e435e917159542d725d21135cfa514ae936d2c1", }, } _registry_name = "imdb_genre_prediction" @@ -606,11 +593,9 @@ class JCPennyCategory(BaseMultiModalDataset): _INFO = { "train": { "url": get_repo_url() + "jc_penney_products/train.csv", - "sha1sum": "b59ce843ad05073a3fccf5ebc4840b3b0649f059", }, "test": { "url": get_repo_url() + "jc_penney_products/test.csv", - "sha1sum": "23bca284354deec13a11ef7bd726d35a01eb1332", }, } _registry_name = "jc_penney_products" @@ -656,11 +641,9 @@ class NewsPopularity(BaseMultiModalDataset): _INFO = { "train": { "url": get_repo_url() + "news_popularity2/train.csv", - "sha1sum": "390b15e77fa77a2722ce2d459a977034a9565f46", }, "test": { "url": get_repo_url() + "news_popularity2/test.csv", - "sha1sum": "297253bdca18f6aafbaee0262be430126c1f9044", }, } _registry_name = "news_popularity" @@ -706,11 +689,9 @@ class NewsChannel(BaseMultiModalDataset): _INFO = { "train": { "url": get_repo_url() + "news_channel/train.csv", - "sha1sum": "ab226210b6a878b449d01f33a195014c65c22311", }, "test": { "url": get_repo_url() + "news_channel/test.csv", - "sha1sum": "a71516784ce6e168bd9933e9ec50080f65cb05fd", }, } _registry_name = "news_channel" @@ -749,3 +730,4 @@ def metric(self): @property def problem_type(self): return _MULTICLASS + diff --git a/src/autogluon/bench/datasets/object_detection_dataset.py b/src/autogluon/bench/datasets/object_detection_dataset.py index 73fb5756..8ebbb86c 100644 --- a/src/autogluon/bench/datasets/object_detection_dataset.py +++ b/src/autogluon/bench/datasets/object_detection_dataset.py @@ -22,7 +22,7 @@ def __init__(self, split: str, dataset_name: str, data_info: dict): split (str): Specifies the dataset split. It should be one of the following options: 'train', 'val', 'test'. """ self._path = os.path.join(get_data_home_dir(), dataset_name) - load_zip.unzip(data_info["data"]["url"], unzip_dir=self._path, sha1sum=data_info["data"]["sha1sum"]) + load_zip.unzip(data_info["data"]["url"], unzip_dir=self._path) self._base_folder = os.path.join(self._path, dataset_name) self._data_path = os.path.join(self._base_folder, "Annotations", f"{split}_cocoformat.json") if not os.path.exists(self._data_path): @@ -54,7 +54,6 @@ class TinyMotorbike(BaseObjectDetectionDataset): _INFO = { "data": { "url": get_repo_url() + "object_detection_dataset/tiny_motorbike_coco.zip", - "sha1sum": "45c883b2feb0721d6eef29055fa28fb46b6e5346", }, } _registry_name = "tiny_motorbike" @@ -81,7 +80,6 @@ class Clipart(BaseObjectDetectionDataset): _INFO = { "data": { "url": get_repo_url() + "few_shot_object_detection/clipart.zip", - "sha1sum": "d25b2f905da597d7857297ac8e3efe4555e0bf32", }, } _registry_name = "clipart" From 694eb99d3260cbeafe93da511810dc811e30db5a Mon Sep 17 00:00:00 2001 From: Su Zhou Date: Fri, 29 Mar 2024 20:10:57 +0000 Subject: [PATCH 05/12] override predictor time_limit with constraint --- .../bench/frameworks/multimodal/exec.py | 8 ++++++++ .../multimodal/multimodal_benchmark.py | 4 ++++ src/autogluon/bench/runbenchmark.py | 18 +++++++++++++----- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/autogluon/bench/frameworks/multimodal/exec.py b/src/autogluon/bench/frameworks/multimodal/exec.py index 9efda327..093ba847 100644 --- a/src/autogluon/bench/frameworks/multimodal/exec.py +++ b/src/autogluon/bench/frameworks/multimodal/exec.py @@ -45,6 +45,7 @@ def get_args(): "--custom_dataloader", type=str, default=None, help="Custom dataloader to use in the benchmark." ) parser.add_argument("--custom_metrics", type=str, default=None, help="Custom metrics to use in the benchmark.") + parser.add_argument("--time_limit", type=int, default=None, help="Time limit used to fit the predictor.") args = parser.parse_args() return args @@ -149,6 +150,7 @@ def run( params: Optional[dict] = None, custom_dataloader: Optional[dict] = None, custom_metrics: Optional[dict] = None, + time_limit: Optional[int] = None, ): """Runs the AutoGluon multimodal benchmark on a given dataset. @@ -219,6 +221,11 @@ def run( predictor = MultiModalPredictor(**predictor_args) + if time_limit is not None: + params["time_limit"] = time_limit + logger.warning(f"params[\"time_limit\"] is being overriden by time_limit specified in constraints.yaml. params[\"time_limit\"] = {time_limit}") + + fit_args = {"train_data": train_data.data, "tuning_data": val_data.data, **params} utc_time = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S") @@ -300,4 +307,5 @@ def run( params=args.params, custom_dataloader=args.custom_dataloader, custom_metrics=args.custom_metrics, + time_limit=args.time_limit, ) diff --git a/src/autogluon/bench/frameworks/multimodal/multimodal_benchmark.py b/src/autogluon/bench/frameworks/multimodal/multimodal_benchmark.py index fa8efb43..291c0294 100644 --- a/src/autogluon/bench/frameworks/multimodal/multimodal_benchmark.py +++ b/src/autogluon/bench/frameworks/multimodal/multimodal_benchmark.py @@ -56,6 +56,7 @@ def run( params: Optional[dict] = None, custom_dataloader: Optional[dict] = None, custom_metrics: Optional[dict] = None, + time_limit: Optional[int] = None, ): """ Runs the benchmark on a given dataset. @@ -84,6 +85,7 @@ def run( metrics_path: path_to/metrics.py # relative path to WORKDIR function_name: custom_metrics_function **kwargs (of ) + time_limit (Optional[int], None): Time limit for predictor.fit() Returns: None @@ -116,6 +118,8 @@ def run( command += ["--custom_dataloader", json.dumps(custom_dataloader)] if custom_metrics is not None: command += ["--custom_metrics", json.dumps(custom_metrics)] + if time_limit is not None: + command += ["--time_limit", str(time_limit)] result = subprocess.run(command) if result.returncode != 0: sys.exit(1) diff --git a/src/autogluon/bench/runbenchmark.py b/src/autogluon/bench/runbenchmark.py index 1ed7b639..db982082 100644 --- a/src/autogluon/bench/runbenchmark.py +++ b/src/autogluon/bench/runbenchmark.py @@ -32,6 +32,7 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) AMLB_DEPENDENT_MODULES = ["tabular", "timeseries"] +INDEPENDENT_MODULES = ["multimodal"] with importlib.resources.path("autogluon.bench", "Dockerfile") as docker_file: project_path = os.path.join(os.path.dirname(docker_file)) @@ -48,7 +49,7 @@ def get_kwargs(module: str, configs: dict): A dictionary containing the keyword arguments to be used for setting up and running the benchmark. """ - if module == "multimodal": + if module in INDEPENDENT_MODULES: framework_configs = get_framework_configs(configs=configs) return { "setup_kwargs": { @@ -62,6 +63,7 @@ def get_kwargs(module: str, configs: dict): "params": framework_configs.get("params"), "custom_dataloader": configs.get("custom_dataloader"), "custom_metrics": configs.get("custom_metrics"), + "time_limit": configs.get("time_limit"), }, } elif module in AMLB_DEPENDENT_MODULES: @@ -341,11 +343,11 @@ def get_resource(configs: dict, resource_name: str): return resources -def update_resource_constraint(configs: dict): +def get_resource_constraint(configs: dict): constraint_name = configs.get("constraint", "test") constraints = get_resource(configs=configs, resource_name="multimodal_constraints") constraint_configs = constraints[constraint_name] - configs["cdk_context"].update(constraint_configs) + return constraint_configs def get_framework_configs(configs: dict): @@ -430,7 +432,7 @@ def run( _mount_dir(orig_path=original_path, new_path=path) os.environ["AMLB_USER_DIR"] = default_user_dir # For Docker build configs["amlb_user_dir"] = default_user_dir # For Lambda job config - elif module == "multimodal": + elif module in INDEPENDENT_MODULES: if configs.get("custom_dataloader") is not None: original_path, custom_dataloader_path = update_custom_dataloader(configs=configs) paths.append(custom_dataloader_path) @@ -443,8 +445,11 @@ def run( _umount_if_needed(custom_metrics_path) _mount_dir(orig_path=original_path, new_path=custom_metrics_path) - update_resource_constraint(configs=configs) + resource_constraint = get_resource_constraint(configs=configs) + configs["cdk_context"].update(resource_constraint) + framework_configs = get_framework_configs(configs=configs) + if configs.get("custom_resource_dir") is not None: custom_resource_path = os.path.join(project_path, "custom_configs", "resources") paths.append(custom_resource_path) @@ -532,6 +537,9 @@ def run( if amlb_user_dir and amlb_user_dir.startswith("s3://"): tmpdir = tempfile.TemporaryDirectory() configs["amlb_user_dir"] = download_dir_from_s3(s3_path=amlb_user_dir, local_path=tmpdir.name) + elif module in INDEPENDENT_MODULES: + resource_constraint = get_resource_constraint(configs=configs) + configs["time_limit"] = resource_constraint["TIME_LIMIT"] logger.info(f"Running benchmark {benchmark_name} at {benchmark_dir}.") From 9d5673f4aa0b1e71866e7bd445f14387a84b33f0 Mon Sep 17 00:00:00 2001 From: Su Zhou Date: Sat, 30 Mar 2024 00:38:24 +0000 Subject: [PATCH 06/12] update configs --- sample_configs/multimodal_local_configs.yaml | 8 ++++---- sample_configs/resources/multimodal_constraints.yaml | 2 +- sample_configs/resources/multimodal_frameworks.yaml | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sample_configs/multimodal_local_configs.yaml b/sample_configs/multimodal_local_configs.yaml index 0bfadcd3..ce4099a0 100644 --- a/sample_configs/multimodal_local_configs.yaml +++ b/sample_configs/multimodal_local_configs.yaml @@ -4,14 +4,14 @@ mode: local # required benchmark_name: ag_bench # required root_dir: ag_bench_runs # optional, default = "ag_bench_runs" # METRICS_BUCKET: autogluon-benchmark-metrics # optional, required only if you want to upload metrics to S3 +constraint: 10m4x # Multimodal specific -framework: AutoGluon_stable # required -dataset_name: # required - melbourne_airbnb +framework: AutoGluon_branch # required +dataset_name: clipart #### Customizations #### -# custom_resource_dir: sample_configs/resources/ # path to custom multimodal_frameworks.yaml and multimodal_constraints.yaml +custom_resource_dir: sample_configs/resources/ # path to custom multimodal_frameworks.yaml and multimodal_constraints.yaml # custom_dataloader: # dataloader_file: sample_configs/dataloaders/vision_dataloader.py # relative path to WORKDIR # class_name: VisionDataLoader diff --git a/sample_configs/resources/multimodal_constraints.yaml b/sample_configs/resources/multimodal_constraints.yaml index 22defb3c..a88a6962 100644 --- a/sample_configs/resources/multimodal_constraints.yaml +++ b/sample_configs/resources/multimodal_constraints.yaml @@ -1,5 +1,5 @@ 10m4x: - TIME_LIMIT: 500 + TIME_LIMIT: 100 INSTANCE: g4dn.4xlarge # MAX_MACHINE_NUM: 20 # optional, default 20 # BLOCK_DEVICE_VOLUME: 100 # optional, default 100GB diff --git a/sample_configs/resources/multimodal_frameworks.yaml b/sample_configs/resources/multimodal_frameworks.yaml index 0c384263..ef572b6b 100644 --- a/sample_configs/resources/multimodal_frameworks.yaml +++ b/sample_configs/resources/multimodal_frameworks.yaml @@ -1,9 +1,9 @@ AutoGluon_branch: repo: https://github.com/autogluon/autogluon.git - version: stable_GA4_update + version: master params: # MultimodalPredictor.fit(params) presets: medium_quality - time_limit: 90 + time_limit: 500 hyperparameters: - optimization.max_epochs: 1 + optimization.max_epochs: 50 optimization.learning_rate: 0.005 From 84435bff1efe8d12e3c5e16a0214e38d1f6cd785 Mon Sep 17 00:00:00 2001 From: Su Zhou Date: Sat, 30 Mar 2024 00:45:37 +0000 Subject: [PATCH 07/12] lint --- .../bench/cloud/aws/batch_stack/lambdas/lambda_function.py | 4 +++- src/autogluon/bench/datasets/multimodal_dataset.py | 1 - src/autogluon/bench/frameworks/multimodal/exec.py | 5 +++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py b/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py index 7b61e074..9db10528 100644 --- a/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py +++ b/src/autogluon/bench/cloud/aws/batch_stack/lambdas/lambda_function.py @@ -231,7 +231,9 @@ def generate_config_combinations(config, metrics_bucket, batch_job_queue, batch_ job_type = "array" if len(job_configs) > 1 else "single" constraint = config.get("amlb_constraint") or config.get("constraint") job_name = f"{benchmark_name}-{config['module']}-{config['framework']}-{constraint}-{job_type}-job" - job_name = re.sub(r'(?![-_])\W', '-', job_name)[:128] # AWS Bath Job name can only contain letters, numbers, "-" and "_" + job_name = re.sub(r"(?![-_])\W", "-", job_name)[ + :128 + ] # AWS Bath Job name can only contain letters, numbers, "-" and "_" parent_job_id = submit_batch_job( env=env, diff --git a/src/autogluon/bench/datasets/multimodal_dataset.py b/src/autogluon/bench/datasets/multimodal_dataset.py index 47e99837..f9c2ec55 100644 --- a/src/autogluon/bench/datasets/multimodal_dataset.py +++ b/src/autogluon/bench/datasets/multimodal_dataset.py @@ -730,4 +730,3 @@ def metric(self): @property def problem_type(self): return _MULTICLASS - diff --git a/src/autogluon/bench/frameworks/multimodal/exec.py b/src/autogluon/bench/frameworks/multimodal/exec.py index 093ba847..93af6d59 100644 --- a/src/autogluon/bench/frameworks/multimodal/exec.py +++ b/src/autogluon/bench/frameworks/multimodal/exec.py @@ -223,8 +223,9 @@ def run( if time_limit is not None: params["time_limit"] = time_limit - logger.warning(f"params[\"time_limit\"] is being overriden by time_limit specified in constraints.yaml. params[\"time_limit\"] = {time_limit}") - + logger.warning( + f'params["time_limit"] is being overriden by time_limit specified in constraints.yaml. params["time_limit"] = {time_limit}' + ) fit_args = {"train_data": train_data.data, "tuning_data": val_data.data, **params} From 2062bf1eec07f8d80bab4ed077c85cc8285647ec Mon Sep 17 00:00:00 2001 From: Su Zhou Date: Sat, 30 Mar 2024 21:04:14 +0000 Subject: [PATCH 08/12] fix --- sample_configs/dataloaders/text_dataloader.py | 2 +- sample_configs/dataloaders/vision_dataloader.py | 2 +- src/autogluon/bench/resources/multimodal_frameworks.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sample_configs/dataloaders/text_dataloader.py b/sample_configs/dataloaders/text_dataloader.py index 0c699d43..0fecfeb8 100644 --- a/sample_configs/dataloaders/text_dataloader.py +++ b/sample_configs/dataloaders/text_dataloader.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -class TextDataLoaer: +class TextDataLoader: def __init__( self, dataset_name: str, diff --git a/sample_configs/dataloaders/vision_dataloader.py b/sample_configs/dataloaders/vision_dataloader.py index 8a629706..e87d5e7a 100644 --- a/sample_configs/dataloaders/vision_dataloader.py +++ b/sample_configs/dataloaders/vision_dataloader.py @@ -16,7 +16,7 @@ def path_expander(path, base_folder): logger = logging.getLogger(__name__) -class VisionDataLoaer: +class VisionDataLoader: def __init__(self, dataset_name: str, dataset_config_file: str, split: str = "train"): with open(dataset_config_file, "r") as f: config = yaml.safe_load(f) diff --git a/src/autogluon/bench/resources/multimodal_frameworks.yaml b/src/autogluon/bench/resources/multimodal_frameworks.yaml index 062a0541..4506a8b9 100644 --- a/src/autogluon/bench/resources/multimodal_frameworks.yaml +++ b/src/autogluon/bench/resources/multimodal_frameworks.yaml @@ -1,6 +1,6 @@ AutoGluon_stable: repo: https://github.com/autogluon/autogluon.git - version: master + version: stable params: # MultimodalPredictor.fit(params) presets: best_quality hyperparameters: From af937ef42ddc2ed86d9a56edf5cacec44a270c29 Mon Sep 17 00:00:00 2001 From: Su Zhou Date: Sat, 30 Mar 2024 00:59:56 +0000 Subject: [PATCH 09/12] update test --- tests/unittests/benchmark/test_runbenchmarks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unittests/benchmark/test_runbenchmarks.py b/tests/unittests/benchmark/test_runbenchmarks.py index d2800818..840e2827 100644 --- a/tests/unittests/benchmark/test_runbenchmarks.py +++ b/tests/unittests/benchmark/test_runbenchmarks.py @@ -106,7 +106,7 @@ def test_get_kwargs_multimodal(): expected_result = { "setup_kwargs": { "git_uri": "https://github.com/autogluon/autogluon.git", - "git_branch": "master", + "git_branch": "stable", }, "run_kwargs": { "dataset_name": "dataset", @@ -115,6 +115,7 @@ def test_get_kwargs_multimodal(): "params": {"presets": "best_quality", "hyperparameters": {"optimization.max_epochs": 10}}, "custom_dataloader": None, "custom_metrics": None, + "time_limit": None, }, } From 53ec68cdf8b1426e4cd534ea76ece54990ec559d Mon Sep 17 00:00:00 2001 From: Su Zhou Date: Sat, 30 Mar 2024 21:31:11 +0000 Subject: [PATCH 10/12] accept random seed --- src/autogluon/bench/frameworks/multimodal/exec.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/autogluon/bench/frameworks/multimodal/exec.py b/src/autogluon/bench/frameworks/multimodal/exec.py index 93af6d59..d8b043d2 100644 --- a/src/autogluon/bench/frameworks/multimodal/exec.py +++ b/src/autogluon/bench/frameworks/multimodal/exec.py @@ -4,10 +4,13 @@ import json import logging import os +import random import time from datetime import datetime from typing import Optional, Union +import numpy as np + from autogluon.bench.datasets.dataset_registry import multimodal_dataset_registry from autogluon.core.metrics import make_scorer from autogluon.multimodal import MultiModalPredictor @@ -28,6 +31,11 @@ def _flatten_dict(data): return flattened +def set_seed(seed): + np.random.seed(seed) + random.seed(seed) + + def get_args(): parser = argparse.ArgumentParser() @@ -183,6 +191,9 @@ def run( Returns: None """ + seed = params.get("seed", 42) + set_seed(seed) + train_data, val_data, test_data = load_dataset(dataset_name=dataset_name, custom_dataloader=custom_dataloader) try: label_column = train_data.label_columns[0] From 2f06aa9514d64ed11e42163707969a31d1121cd3 Mon Sep 17 00:00:00 2001 From: Su Zhou Date: Sat, 30 Mar 2024 21:32:10 +0000 Subject: [PATCH 11/12] split for test data --- src/autogluon/bench/frameworks/multimodal/exec.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/autogluon/bench/frameworks/multimodal/exec.py b/src/autogluon/bench/frameworks/multimodal/exec.py index d8b043d2..3d60f8d1 100644 --- a/src/autogluon/bench/frameworks/multimodal/exec.py +++ b/src/autogluon/bench/frameworks/multimodal/exec.py @@ -10,6 +10,7 @@ from typing import Optional, Union import numpy as np +from sklearn.model_selection import train_test_split from autogluon.bench.datasets.dataset_registry import multimodal_dataset_registry from autogluon.core.metrics import make_scorer @@ -195,6 +196,12 @@ def run( set_seed(seed) train_data, val_data, test_data = load_dataset(dataset_name=dataset_name, custom_dataloader=custom_dataloader) + if test_data.data is None: + logger.warning("No test data found, splitting test data from train data") + train_set, test_set = train_test_split(train_data.data, test_size=0.2, random_state=seed) + train_data.data = train_set + test_data.data = test_set + try: label_column = train_data.label_columns[0] except (AttributeError, IndexError): # Object Detection does not have label columns From ac03ae83e3b789279b9f85f154151c0131918e7f Mon Sep 17 00:00:00 2001 From: Su Zhou Date: Sun, 31 Mar 2024 03:19:56 +0000 Subject: [PATCH 12/12] update version --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4c6f6785..2391826c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,8 +41,7 @@ dependencies = [ "constructs >=10.0.0,<10.1.289", "fsspec >=2023.5.0,<=2023.6.0", "matplotlib >=3.4,<3.8", - "pandas >=1.4.1,<2.2.0", - "pydantic>=1.10.4,<2.0", # https://github.com/ray-project/ray/issues/36990 + "pandas >=2.0.0,<2.2.0", "pyyaml >=5.4,<7", "ray[default] >=2.6.3,<2.7", "s3fs >=2023.5.0,<=2023.6.0",