From 1b67aed19731efbf1f597e0768f9ad7d7a564aef Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Mon, 15 Jan 2024 12:10:57 -0300 Subject: [PATCH 01/19] Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now --- .../factories/finetune_factory/__init__.py | 6 --- aixplain/modules/finetune/__init__.py | 7 --- aixplain/modules/finetune/hyperparameters.py | 50 +++++++++++++++---- aixplain/modules/finetune/peft.py | 10 ---- .../data/finetune_test_cost_estimation.json | 6 --- .../finetune/data/finetune_test_end2end.json | 4 +- 6 files changed, 43 insertions(+), 40 deletions(-) delete mode 100644 aixplain/modules/finetune/peft.py diff --git a/aixplain/factories/finetune_factory/__init__.py b/aixplain/factories/finetune_factory/__init__.py index 07b7b4c3..c171c0e6 100644 --- a/aixplain/factories/finetune_factory/__init__.py +++ b/aixplain/factories/finetune_factory/__init__.py @@ -28,7 +28,6 @@ from aixplain.modules.finetune import Finetune from aixplain.modules.finetune.cost import FinetuneCost from aixplain.modules.finetune.hyperparameters import Hyperparameters -from aixplain.modules.finetune.peft import Peft from aixplain.modules.dataset import Dataset from aixplain.modules.model import Model from aixplain.utils import config @@ -66,7 +65,6 @@ def create( model: Model, prompt_template: Optional[Text] = None, hyperparameters: Optional[Hyperparameters] = None, - peft: Optional[Peft] = None, train_percentage: Optional[float] = 100, dev_percentage: Optional[float] = 0, ) -> Finetune: @@ -78,7 +76,6 @@ def create( model (Model): Model to be fine-tuned. prompt_template (Text, optional): Fine-tuning prompt_template. Should reference columns in the dataset using format <>. Defaults to None. hyperparameters (Hyperparameters, optional): Hyperparameters for fine-tuning. Defaults to None. - peft (Peft, optional): PEFT (Parameter-Efficient Fine-Tuning) configuration. Defaults to None. train_percentage (float, optional): Percentage of training samples. Defaults to 100. dev_percentage (float, optional): Percentage of development samples. Defaults to 0. Returns: @@ -106,8 +103,6 @@ def create( parameters["prompt"] = prompt_template if hyperparameters is not None: parameters["hyperparameters"] = hyperparameters.to_dict() - if peft is not None: - parameters["peft"] = peft.to_dict() payload["parameters"] = parameters logging.info(f"Start service for POST Create FineTune - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry("post", url, headers=headers, json=payload) @@ -123,7 +118,6 @@ def create( dev_percentage=dev_percentage, prompt_template=prompt_template, hyperparameters=hyperparameters, - peft=peft, ) except Exception: error_message = f"Create FineTune: Error with payload {json.dumps(payload)}" diff --git a/aixplain/modules/finetune/__init__.py b/aixplain/modules/finetune/__init__.py index 6683ee9a..e1b63941 100644 --- a/aixplain/modules/finetune/__init__.py +++ b/aixplain/modules/finetune/__init__.py @@ -26,7 +26,6 @@ from urllib.parse import urljoin from aixplain.modules.finetune.cost import FinetuneCost from aixplain.modules.finetune.hyperparameters import Hyperparameters -from aixplain.modules.finetune.peft import Peft from aixplain.factories.model_factory import ModelFactory from aixplain.modules.asset import Asset from aixplain.modules.dataset import Dataset @@ -52,7 +51,6 @@ class Finetune(Asset): dev_percentage (float): Percentage of development samples. prompt_template (Text): Fine-tuning prompt_template. hyperparameters (Hyperparameters): Hyperparameters for fine-tuning. - peft (Peft): PEFT (Parameter-Efficient Fine-Tuning) configuration. additional_info (dict): Additional information to be saved with the FineTune. backend_url (str): URL of the backend. api_key (str): The TEAM API key used for authentication. @@ -72,7 +70,6 @@ def __init__( dev_percentage: Optional[float] = 0, prompt_template: Optional[Text] = None, hyperparameters: Optional[Hyperparameters] = None, - peft: Optional[Peft] = None, **additional_info, ) -> None: """Create a FineTune with the necessary information. @@ -90,7 +87,6 @@ def __init__( dev_percentage (float, optional): Percentage of development samples. Defaults to 0. prompt_template (Text, optional): Fine-tuning prompt_template. Should reference columns in the dataset using format <>. Defaults to None. hyperparameters (Hyperparameters, optional): Hyperparameters for fine-tuning. Defaults to None. - peft (Peft, optional): PEFT (Parameter-Efficient Fine-Tuning) configuration. Defaults to None. **additional_info: Additional information to be saved with the FineTune. """ super().__init__(id, name, description, supplier, version) @@ -101,7 +97,6 @@ def __init__( self.dev_percentage = dev_percentage self.prompt_template = prompt_template self.hyperparameters = hyperparameters - self.peft = peft self.additional_info = additional_info self.backend_url = config.BACKEND_URL self.api_key = config.TEAM_API_KEY @@ -134,8 +129,6 @@ def start(self) -> Model: parameters["prompt"] = self.prompt_template if self.hyperparameters is not None: parameters["hyperparameters"] = self.hyperparameters.to_dict() - if self.peft is not None: - parameters["peft"] = self.peft.to_dict() payload["parameters"] = parameters logging.info(f"Start service for POST Start FineTune - {url} - {headers} - {json.dumps(payload)}") r = _request_with_retry("post", url, headers=headers, json=payload) diff --git a/aixplain/modules/finetune/hyperparameters.py b/aixplain/modules/finetune/hyperparameters.py index 51dc9842..2874294e 100644 --- a/aixplain/modules/finetune/hyperparameters.py +++ b/aixplain/modules/finetune/hyperparameters.py @@ -1,8 +1,10 @@ from dataclasses import dataclass from dataclasses_json import dataclass_json +from enum import Enum +from typing import Text -class SchedulerType: +class SchedulerType(Text, Enum): LINEAR = "linear" COSINE = "cosine" COSINE_WITH_RESTARTS = "cosine_with_restarts" @@ -13,19 +15,49 @@ class SchedulerType: REDUCE_ON_PLATEAU = "reduce_lr_on_plateau" +EPOCHS_MAX_VALUE = 4 +MAX_SEQ_LENGTH_MAX_VALUE = 4096 +GENERATION_MAX_LENGTH_MAX_VALUE = 225 + + @dataclass_json @dataclass class Hyperparameters(object): - epochs: int = 4 - train_batch_size: int = 4 - eval_batch_size: int = 4 - learning_rate: float = 2e-5 + epochs: int = 1 + learning_rate: float = 1e-5 generation_max_length: int = 225 - tokenizer_batch_size: int = 256 - gradient_checkpointing: bool = False - gradient_accumulation_steps: int = 1 max_seq_length: int = 4096 warmup_ratio: float = 0.0 warmup_steps: int = 0 - early_stopping_patience: int = 1 lr_scheduler_type: SchedulerType = SchedulerType.LINEAR + + def __post_init__(self): + if not isinstance(self.epochs, int): + raise TypeError("epochs should be of type int") + + if not isinstance(self.learning_rate, float): + raise TypeError("learning_rate should be of type float") + + if not isinstance(self.generation_max_length, int): + raise TypeError("generation_max_length should be of type int") + + if not isinstance(self.max_seq_length, int): + raise TypeError("max_seq_length should be of type int") + + if not isinstance(self.warmup_ratio, float): + raise TypeError("warmup_ratio should be of type float") + + if not isinstance(self.warmup_steps, int): + raise TypeError("warmup_steps should be of type int") + + if not isinstance(self.lr_scheduler_type, SchedulerType): + raise TypeError("lr_scheduler_type should be of type SchedulerType") + + if self.epochs > EPOCHS_MAX_VALUE: + raise ValueError(f"epochs must be one less than {EPOCHS_MAX_VALUE}") + + if self.max_seq_length > MAX_SEQ_LENGTH_MAX_VALUE: + raise ValueError(f"max_seq_length must be less than {MAX_SEQ_LENGTH_MAX_VALUE}") + + if self.generation_max_length > GENERATION_MAX_LENGTH_MAX_VALUE: + raise ValueError(f"generation_max_length must be less than {GENERATION_MAX_LENGTH_MAX_VALUE}") diff --git a/aixplain/modules/finetune/peft.py b/aixplain/modules/finetune/peft.py deleted file mode 100644 index d17efecf..00000000 --- a/aixplain/modules/finetune/peft.py +++ /dev/null @@ -1,10 +0,0 @@ -from dataclasses import dataclass -from dataclasses_json import dataclass_json - - -@dataclass_json -@dataclass -class Peft(object): - peft_lora_r: int = 8 - peft_lora_alpha: int = 32 - peft_lora_dropout: float = 0.05 diff --git a/tests/functional/finetune/data/finetune_test_cost_estimation.json b/tests/functional/finetune/data/finetune_test_cost_estimation.json index a12ccdfb..46a89e02 100644 --- a/tests/functional/finetune/data/finetune_test_cost_estimation.json +++ b/tests/functional/finetune/data/finetune_test_cost_estimation.json @@ -1,11 +1,5 @@ [ {"model_name": "gpt2", "model_id": "64e615671567f848804985e1", "dataset_name": "Test text generation dataset"}, - {"model_name": "falcon 7b instruct", "model_id": "65519d57bf42e6037ab109d5", "dataset_name": "Test text generation dataset"}, - {"model_name": "bloomz 7b", "model_id": "6551ab17bf42e6037ab109e0", "dataset_name": "Test text generation dataset"}, - {"model_name": "MPT 7B", "model_id": "6551a72bbf42e6037ab109d9", "dataset_name": "Test text generation dataset"}, - {"model_name": "falcon 7b", "model_id": "6551bff9bf42e6037ab109e1", "dataset_name": "Test text generation dataset"}, - {"model_name": "mistral 7b", "model_id": "6551a9e7bf42e6037ab109de", "dataset_name": "Test text generation dataset"}, - {"model_name": "MPT 7B Storywriter", "model_id": "6551a870bf42e6037ab109db", "dataset_name": "Test text generation dataset"}, {"model_name": "llama 2 7b", "model_id": "6543cb991f695e72028e9428", "dataset_name": "Test text generation dataset"}, {"model_name": "Llama 2 7B Chat", "model_id": "65519ee7bf42e6037ab109d8", "dataset_name": "Test text generation dataset"} ] \ No newline at end of file diff --git a/tests/functional/finetune/data/finetune_test_end2end.json b/tests/functional/finetune/data/finetune_test_end2end.json index 9682efa2..287f2a00 100644 --- a/tests/functional/finetune/data/finetune_test_end2end.json +++ b/tests/functional/finetune/data/finetune_test_end2end.json @@ -1,7 +1,7 @@ [ { - "model_name": "gpt2", - "model_id": "64e615671567f848804985e1", + "model_name": "llama2 7b", + "model_id": "6543cb991f695e72028e9428", "dataset_name": "Test text generation dataset", "inference_data": "Hello!", "required_dev": true From 61359d8c11dd699360c32ae7daaecb025fcaaddd Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 15 Jan 2024 17:37:56 -0300 Subject: [PATCH 02/19] Fixing pipeline general asset test (#106) --- tests/functional/general_assets/asset_functional_test.py | 5 ++++- .../functional/general_assets/data/asset_run_test_data.json | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/functional/general_assets/asset_functional_test.py b/tests/functional/general_assets/asset_functional_test.py index 56e05609..6a9dceda 100644 --- a/tests/functional/general_assets/asset_functional_test.py +++ b/tests/functional/general_assets/asset_functional_test.py @@ -40,7 +40,10 @@ def test_list(asset_name): def test_run(inputs, asset_name): asset_details = inputs[asset_name] AssetFactory = __get_asset_factory(asset_name) - asset = AssetFactory.get(asset_details["id"]) + if asset_name == "pipeline": + asset = AssetFactory.list(query=asset_details["name"])["results"][0] + else: + asset = AssetFactory.get(asset_details["id"]) payload = asset_details["data"] if type(payload) is dict: output = asset.run(**payload) diff --git a/tests/functional/general_assets/data/asset_run_test_data.json b/tests/functional/general_assets/data/asset_run_test_data.json index e7126a13..abe7a3e9 100644 --- a/tests/functional/general_assets/data/asset_run_test_data.json +++ b/tests/functional/general_assets/data/asset_run_test_data.json @@ -4,7 +4,7 @@ "data": "This is a test sentence." }, "pipeline": { - "id" : "64da138fa27cffd5e0c3c30d", + "name": "SingleNodePipeline", "data": "This is a test sentence." }, "metric": { From 8365218d1d1db3385ef6ed3399ef91611c0d18c2 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Mon, 29 Jan 2024 12:43:26 -0300 Subject: [PATCH 03/19] Update Finetuner functional tests (#112) --- .../finetune/data/finetune_test_cost_estimation.json | 1 - .../finetune/data/finetune_test_prompt_validator.json | 6 ++++-- tests/functional/finetune/finetune_functional_test.py | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/functional/finetune/data/finetune_test_cost_estimation.json b/tests/functional/finetune/data/finetune_test_cost_estimation.json index 46a89e02..05b52c2e 100644 --- a/tests/functional/finetune/data/finetune_test_cost_estimation.json +++ b/tests/functional/finetune/data/finetune_test_cost_estimation.json @@ -1,5 +1,4 @@ [ - {"model_name": "gpt2", "model_id": "64e615671567f848804985e1", "dataset_name": "Test text generation dataset"}, {"model_name": "llama 2 7b", "model_id": "6543cb991f695e72028e9428", "dataset_name": "Test text generation dataset"}, {"model_name": "Llama 2 7B Chat", "model_id": "65519ee7bf42e6037ab109d8", "dataset_name": "Test text generation dataset"} ] \ No newline at end of file diff --git a/tests/functional/finetune/data/finetune_test_prompt_validator.json b/tests/functional/finetune/data/finetune_test_prompt_validator.json index cf92e978..94ee6ba8 100644 --- a/tests/functional/finetune/data/finetune_test_prompt_validator.json +++ b/tests/functional/finetune/data/finetune_test_prompt_validator.json @@ -1,12 +1,14 @@ [ { - "model_name": "GPT2", + "model_name": "llama2 7b", + "model_id": "6543cb991f695e72028e9428", "dataset_name": "Test text generation dataset", "prompt_template": "Source: <>\nReference: <>", "is_valid": true }, { - "model_name": "GPT2", + "model_name": "llama2 7b", + "model_id": "6543cb991f695e72028e9428", "dataset_name": "Test text generation dataset", "prompt_template": "Source: <>\nReference: <>", "is_valid": false diff --git a/tests/functional/finetune/finetune_functional_test.py b/tests/functional/finetune/finetune_functional_test.py index 90b8b7b2..67a0314f 100644 --- a/tests/functional/finetune/finetune_functional_test.py +++ b/tests/functional/finetune/finetune_functional_test.py @@ -110,7 +110,7 @@ def test_list_finetunable_models(list_input_map): def test_prompt_validator(validate_prompt_input_map): - model = ModelFactory.list(query=validate_prompt_input_map["model_name"], is_finetunable=True)["results"][0] + model = ModelFactory.get(validate_prompt_input_map["model_id"]) dataset_list = [DatasetFactory.list(query=validate_prompt_input_map["dataset_name"])["results"][0]] if validate_prompt_input_map["is_valid"]: finetune = FinetuneFactory.create( From 74af040da0243b9e87eff8fd22278759e53b897d Mon Sep 17 00:00:00 2001 From: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Date: Mon, 29 Jan 2024 11:46:29 -0800 Subject: [PATCH 04/19] Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us --- aixplain/cli_groups.py | 5 +- aixplain/factories/cli/model_factory_cli.py | 38 ++++++++++++ aixplain/factories/model_factory.py | 65 ++++++++++++++++++++ docs/user/user_doc.md | 11 ++++ pyproject.toml | 2 +- tests/functional/model/hf_onboarding_test.py | 51 +++++++++++++++ 6 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 tests/functional/model/hf_onboarding_test.py diff --git a/aixplain/cli_groups.py b/aixplain/cli_groups.py index d61a6caa..c5f05826 100644 --- a/aixplain/cli_groups.py +++ b/aixplain/cli_groups.py @@ -21,7 +21,7 @@ CLI Runner """ import click -from aixplain.factories.cli.model_factory_cli import list_host_machines, list_functions, create_asset_repo, asset_repo_login, onboard_model +from aixplain.factories.cli.model_factory_cli import list_host_machines, list_functions, create_asset_repo, asset_repo_login, onboard_model, deploy_huggingface_model, get_huggingface_model_status @click.group('cli') def cli(): @@ -52,7 +52,10 @@ def onboard(): list.add_command(list_host_machines) list.add_command(list_functions) get.add_command(asset_repo_login) +get.add_command(get_huggingface_model_status) onboard.add_command(onboard_model) +onboard.add_command(deploy_huggingface_model) + def run_cli(): cli() \ No newline at end of file diff --git a/aixplain/factories/cli/model_factory_cli.py b/aixplain/factories/cli/model_factory_cli.py index 69f48d4d..264fadd9 100644 --- a/aixplain/factories/cli/model_factory_cli.py +++ b/aixplain/factories/cli/model_factory_cli.py @@ -135,3 +135,41 @@ def onboard_model(model_id: Text, image_tag: Text, image_hash: Text, ret_val = ModelFactory.onboard_model(model_id, image_tag, image_hash, api_key) ret_val_yaml = yaml.dump(ret_val) click.echo(ret_val_yaml) + +@click.command("hf-model") +@click.option("--name", help="User-defined name for Hugging Face model.") +@click.option("--hf-repo-id", help="Repository ID from Hugging Face in {supplier}/{model name} form.") +@click.option("--hf-token", help="Hugging Face token used to authenticate to this model.") +@click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") +def deploy_huggingface_model(name: Text, hf_repo_id: Text, + hf_token: Optional[Text] = None, + api_key: Optional[Text] = None) -> None: + """CLI wrapper function for the DEPLOY_HUGGINGFACE_MODEL function in ModelFactory. + + Args: + name (Text): User-defined name for Hugging Face model. + api_key (Text, optional): Team API key. Defaults to None. + + Returns: + None + """ + ret_val = ModelFactory.deploy_huggingface_model(name, hf_repo_id, hf_token, api_key) + ret_val_yaml = yaml.dump(ret_val) + click.echo(ret_val_yaml) + +@click.command("hf-model-status") +@click.option("--model-id", help="Model ID from DEPLOY_HUGGINGFACE_MODEL.") +@click.option("--api-key", default=None, help="TEAM_API_KEY if not already set in environment.") +def get_huggingface_model_status(model_id: Text, api_key: Optional[Text] = None) -> None: + """CLI wrapper function for the GET_HUGGINGFACE_MODEL_STATUS function in ModelFactory. + + Args: + model_id (Text): Model ID obtained from DEPLOY_HUGGINGFACE_MODEL. + api_key (Text, optional): Team API key. Defaults to None. + + Returns: + None + """ + ret_val = ModelFactory.get_huggingface_model_status(model_id, api_key) + ret_val_yaml = yaml.dump(ret_val) + click.echo(ret_val_yaml) \ No newline at end of file diff --git a/aixplain/factories/model_factory.py b/aixplain/factories/model_factory.py index cf75dd51..cd7de970 100644 --- a/aixplain/factories/model_factory.py +++ b/aixplain/factories/model_factory.py @@ -404,3 +404,68 @@ def onboard_model(cls, model_id: Text, image_tag: Text, image_hash: Text, api_ke message = "Your onboarding request has been submitted to an aiXplain specialist for finalization. We will notify you when the process is completed." logging.info(message) return response + + @classmethod + def deploy_huggingface_model(cls, name: Text, hf_repo_id: Text, hf_token: Optional[Text] = "", api_key: Optional[Text] = None) -> Dict: + """Onboards and deploys a Hugging Face large language model. + + Args: + name (Text): The user's name for the model. + hf_repo_id (Text): The Hugging Face repository ID for this model ({author}/{model name}). + hf_token (Text, optional): Hugging Face access token. Defaults to None. + api_key (Text, optional): Team API key. Defaults to None. + Returns: + Dict: Backend response + """ + supplier, model_name = hf_repo_id.split("/") + deploy_url = urljoin(config.BACKEND_URL, f"sdk/model-onboarding/onboard") + if api_key: + headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} + else: + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + body = { + "model": { + "name": name, + "description": "A user-deployed Hugging Face model", + "connectionType": ["synchronous"], + "function": "text-generation", + "documentationUrl": "aiXplain", + "sourceLanguage": "en", + }, + "source": "huggingface", + "onboardingParams": { + "hf_model_name": model_name, + "hf_supplier": supplier, + "hf_token": hf_token + } + } + response = _request_with_retry("post", deploy_url, headers=headers, json=body) + logging.debug(response.text) + response_dicts = json.loads(response.text) + return response_dicts + + @classmethod + def get_huggingface_model_status(cls, model_id: Text, api_key: Optional[Text] = None): + """Gets the on-boarding status of a Hugging Face model with ID MODEL_ID. + + Args: + model_id (Text): The model's ID as returned by DEPLOY_HUGGINGFACE_MODEL + api_key (Text, optional): Team API key. Defaults to None. + Returns: + Dict: Backend response + """ + status_url = urljoin(config.BACKEND_URL, f"sdk/models/{model_id}") + if api_key: + headers = {"Authorization": f"Token {api_key}", "Content-Type": "application/json"} + else: + headers = {"Authorization": f"Token {config.TEAM_API_KEY}", "Content-Type": "application/json"} + response = _request_with_retry("get", status_url, headers=headers) + logging.debug(response.text) + response_dicts = json.loads(response.text) + ret_dict = { + "status": response_dicts["status"], + "name": response_dicts["name"], + "id": response_dicts["id"], + "pricing": response_dicts["pricing"] + } + return ret_dict \ No newline at end of file diff --git a/docs/user/user_doc.md b/docs/user/user_doc.md index 33ff00c1..e9cb882c 100644 --- a/docs/user/user_doc.md +++ b/docs/user/user_doc.md @@ -57,6 +57,17 @@ poll_url = start_response["url"] ## Poll to see current job status poll_response = model.poll(poll_url) ``` +### Deploying Hugging Face Large Language Models +You can deploy your very own Hugging Face large language models on our platform using the aiXplain SDK: +```console +$ aixplain onboard hf-model --name --hf-repo-id --hf-token [--api-key ] +``` +This command will return your model's ID. The on-boarding process will take 5 to 15 minutes, during which you can check the on-boarding status by running the following: +```console +$ aixplain get hf-model-status --model-id [--api-key ] +``` + +Once the on-boarding process has completed, you can use this newly-deployed large language model just like any other model on our platform. Note that our platform currently only supports language models up 7 billion parameters in size (~30 GB), so any attempts to deploy larger models will result in an error message. ### Uploading Models In addition to exploring and running models, the aiXplain SDK allows you to upload your own models to the aiXplain platform. This requires a working model image in line with the template specified [here](https://github.com/aixplain/model-interfaces/blob/main/docs/user/model_setup.md). [These](https://github.com/aixplain/model-interfaces/tree/main) are the interfaces with which you will be working. You will also be required to have an aiXplain account as well as a TEAM_API_KEY which should be set either as an environment variable or passed into each of the following functions. diff --git a/pyproject.toml b/pyproject.toml index ab7b901e..22320925 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] where = ["."] -include = ["aixplain"] +include = ["aixplain", "tests"] namespaces = true [project] diff --git a/tests/functional/model/hf_onboarding_test.py b/tests/functional/model/hf_onboarding_test.py new file mode 100644 index 00000000..5757fe07 --- /dev/null +++ b/tests/functional/model/hf_onboarding_test.py @@ -0,0 +1,51 @@ +__author__ = "michaellam" + +import time + +from aixplain.factories.model_factory import ModelFactory +from tests.test_utils import delete_asset +from aixplain.utils import config + +def test_deploy_model(): + # Start the deployment + model_name = "Test Model" + repo_id = "tiiuae/falcon-7b" + response = ModelFactory.deploy_huggingface_model(model_name, repo_id, "mock_key") + assert "id" in response.keys() + + # Check for status + model_id = response["id"] + num_retries = 120 + counter = 0 + while ModelFactory.get_huggingface_model_status(model_id)["status"].lower() != "onboarded": + time.sleep(10) + counter += 1 + if counter == num_retries: + assert ModelFactory.get_huggingface_model_status(model_id)["status"].lower() == "onboarded" + + # Clean up + delete_asset(model_id, config.TEAM_API_KEY) + +def test_nonexistent_model(): + # Start the deployment + model_name = "Test Model" + repo_id = "nonexistent-supplier/nonexistent-model" + response = ModelFactory.deploy_huggingface_model(model_name, repo_id, "mock_key") + assert response["statusCode"] == 400 + assert response["message"] == "err.unable_to_onboard_model" + +def test_size_limit(): + # Start the deployment + model_name = "Test Model" + repo_id = "tiiuae/falcon-40b" + response = ModelFactory.deploy_huggingface_model(model_name, repo_id, "mock_key") + assert response["statusCode"] == 400 + assert response["message"] == "err.unable_to_onboard_model" + +def test_gated_model(): + # Start the deployment + model_name = "Test Model" + repo_id = "meta-llama/Llama-2-7b-hf" + response = ModelFactory.deploy_huggingface_model(model_name, repo_id, "mock_key") + assert response["statusCode"] == 400 + assert response["message"] == "err.unable_to_onboard_model" \ No newline at end of file From 7adfdddab02b0b9bd433dbb35ed88673fe4be5a7 Mon Sep 17 00:00:00 2001 From: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Date: Tue, 30 Jan 2024 11:09:16 -0800 Subject: [PATCH 05/19] Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> --- aixplain/utils/config.py | 1 + tests/functional/model/hf_onboarding_test.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/aixplain/utils/config.py b/aixplain/utils/config.py index a8596282..3bb0eb09 100644 --- a/aixplain/utils/config.py +++ b/aixplain/utils/config.py @@ -26,3 +26,4 @@ PIPELINE_API_KEY = os.getenv("PIPELINE_API_KEY", "") MODEL_API_KEY = os.getenv("MODEL_API_KEY", "") LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") +HF_TOKEN = os.getenv("HF_TOKEN", "") \ No newline at end of file diff --git a/tests/functional/model/hf_onboarding_test.py b/tests/functional/model/hf_onboarding_test.py index 5757fe07..b70b0580 100644 --- a/tests/functional/model/hf_onboarding_test.py +++ b/tests/functional/model/hf_onboarding_test.py @@ -10,7 +10,7 @@ def test_deploy_model(): # Start the deployment model_name = "Test Model" repo_id = "tiiuae/falcon-7b" - response = ModelFactory.deploy_huggingface_model(model_name, repo_id, "mock_key") + response = ModelFactory.deploy_huggingface_model(model_name, repo_id, config.HF_TOKEN) assert "id" in response.keys() # Check for status @@ -30,7 +30,7 @@ def test_nonexistent_model(): # Start the deployment model_name = "Test Model" repo_id = "nonexistent-supplier/nonexistent-model" - response = ModelFactory.deploy_huggingface_model(model_name, repo_id, "mock_key") + response = ModelFactory.deploy_huggingface_model(model_name, repo_id, config.HF_TOKEN) assert response["statusCode"] == 400 assert response["message"] == "err.unable_to_onboard_model" @@ -38,7 +38,7 @@ def test_size_limit(): # Start the deployment model_name = "Test Model" repo_id = "tiiuae/falcon-40b" - response = ModelFactory.deploy_huggingface_model(model_name, repo_id, "mock_key") + response = ModelFactory.deploy_huggingface_model(model_name, repo_id, config.HF_TOKEN) assert response["statusCode"] == 400 assert response["message"] == "err.unable_to_onboard_model" From 4c5358c2f412ff139bf4cfc38883592c20aab775 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 31 Jan 2024 21:15:26 -0300 Subject: [PATCH 06/19] Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira --- .../processes/data_onboarding/process_text_files.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/aixplain/processes/data_onboarding/process_text_files.py b/aixplain/processes/data_onboarding/process_text_files.py index 8ead651e..48db3f4e 100644 --- a/aixplain/processes/data_onboarding/process_text_files.py +++ b/aixplain/processes/data_onboarding/process_text_files.py @@ -3,6 +3,7 @@ import logging import os import pandas as pd +import validators from aixplain.enums.file_type import FileType from aixplain.enums.storage_type import StorageType @@ -33,6 +34,15 @@ def process_text(content: str, storage_type: StorageType) -> Text: text = f.read() else: text = content + + # if the row is a textual URL (which should not be downloaded), tag it + if storage_type in [StorageType.FILE, StorageType.TEXT] and ( + str(text).startswith("s3://") + or str(text).startswith("http://") + or str(text).startswith("https://") + or validators.url(text) + ): + text = "DONOTDOWNLOAD" + str(text) return text From cb14b5a1dc34977493b1afacefa6374d57bd85ef Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Mon, 5 Feb 2024 07:15:22 -0300 Subject: [PATCH 07/19] Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira --- aixplain/factories/corpus_factory.py | 4 +++- aixplain/factories/dataset_factory.py | 12 +++++++----- .../processes/data_onboarding/onboard_functions.py | 8 ++++++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/aixplain/factories/corpus_factory.py b/aixplain/factories/corpus_factory.py index 4dbb981d..1f81ac4d 100644 --- a/aixplain/factories/corpus_factory.py +++ b/aixplain/factories/corpus_factory.py @@ -245,6 +245,7 @@ def create( functions: List[Function] = [], privacy: Privacy = Privacy.PRIVATE, error_handler: ErrorHandler = ErrorHandler.SKIP, + api_key: Optional[Text] = None ) -> Dict: """Asynchronous call to Upload a corpus to the user's dashboard. @@ -259,6 +260,7 @@ def create( functions (Optional[List[Function]], optional): AI functions for which the corpus may be used. Defaults to []. privacy (Optional[Privacy], optional): visibility of the corpus. Defaults to Privacy.PRIVATE. error_handler (ErrorHandler, optional): how to handle failed rows in the data asset. Defaults to ErrorHandler.SKIP. + api_key (Optional[Text]): team api key. Defaults to None. Returns: Dict: response dict @@ -351,7 +353,7 @@ def create( corpus_payload = onboard_functions.build_payload_corpus(corpus, [ref.id for ref in ref_data], error_handler) - response = onboard_functions.create_data_asset(corpus_payload) + response = onboard_functions.create_data_asset(payload=corpus_payload, api_key=api_key) if response["success"] is True: return_dict = {"status": response["status"], "asset_id": response["asset_id"]} else: diff --git a/aixplain/factories/dataset_factory.py b/aixplain/factories/dataset_factory.py index 04d04f47..5e69d572 100644 --- a/aixplain/factories/dataset_factory.py +++ b/aixplain/factories/dataset_factory.py @@ -280,8 +280,9 @@ def create( split_labels: Optional[List[Text]] = None, split_rate: Optional[List[float]] = None, error_handler: ErrorHandler = ErrorHandler.SKIP, - s3_link: Optional[str] = None, - aws_credentials: Optional[Dict[str, str]] = {"AWS_ACCESS_KEY_ID": None, "AWS_SECRET_ACCESS_KEY": None}, + s3_link: Optional[Text] = None, + aws_credentials: Optional[Dict[Text, Text]] = {"AWS_ACCESS_KEY_ID": None, "AWS_SECRET_ACCESS_KEY": None}, + api_key: Optional[Text] = None ) -> Dict: """Dataset Onboard @@ -302,8 +303,9 @@ def create( tags (List[Text], optional): datasets description tags. Defaults to []. privacy (Privacy, optional): dataset privacy. Defaults to Privacy.PRIVATE. error_handler (ErrorHandler, optional): how to handle failed rows in the data asset. Defaults to ErrorHandler.SKIP. - s3_link (Optional[str]): s3 url to files or directories - aws_credentials (Optional[Dict[str, str]]) : credentials for AWS and it should contains these two keys `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` + s3_link (Optional[Text]): s3 url to files or directories + aws_credentials (Optional[Dict[Text, Text]]) : credentials for AWS and it should contains these two keys `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` + api_key (Optional[Text]): team api key. Defaults to None. Returns: Dict: dataset onboard status """ @@ -485,7 +487,7 @@ def create( # len(dataset_payload["output"]) > 0 # ), "Data Asset Onboarding Error: Please specify the output data of your dataset." - response = onboard_functions.create_data_asset(dataset_payload, data_asset_type="dataset") + response = onboard_functions.create_data_asset(payload=dataset_payload, data_asset_type="dataset", api_key=api_key) if response["success"] is True: return_dict = {"status": response["status"], "asset_id": response["asset_id"]} else: diff --git a/aixplain/processes/data_onboarding/onboard_functions.py b/aixplain/processes/data_onboarding/onboard_functions.py index 0c038a1f..091458fd 100644 --- a/aixplain/processes/data_onboarding/onboard_functions.py +++ b/aixplain/processes/data_onboarding/onboard_functions.py @@ -288,17 +288,21 @@ def build_payload_dataset( return payload -def create_data_asset(payload: Dict, data_asset_type: Text = "corpus") -> Dict: +def create_data_asset(payload: Dict, data_asset_type: Text = "corpus", api_key: Optional[Text] = None) -> Dict: """Service to call onboard process in coreengine Args: payload (Dict): onboard payload data_asset_type (Text, optional): corpus or dataset. Defaults to "corpus". + api_key (Optional[Text]): team api key. Defaults to None. Returns: Dict: onboard status """ - team_key = config.TEAM_API_KEY + if api_key is not None: + team_key = api_key + else: + team_key = config.TEAM_API_KEY headers = {"Authorization": "token " + team_key} url = urljoin(config.BACKEND_URL, f"sdk/{data_asset_type}/onboard") From 66a043b8ce03a8b828c40a327bacfe3040cc3afd Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Wed, 7 Feb 2024 19:29:21 -0300 Subject: [PATCH 08/19] Update Finetuner hyperparameters (#125) * Update Finetuner hyperparameters * Change hyperparameters error message --- aixplain/modules/finetune/hyperparameters.py | 21 +++++- tests/unit/hyperparameters_test.py | 75 ++++++++++++++++++++ 2 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 tests/unit/hyperparameters_test.py diff --git a/aixplain/modules/finetune/hyperparameters.py b/aixplain/modules/finetune/hyperparameters.py index 2874294e..116454c8 100644 --- a/aixplain/modules/finetune/hyperparameters.py +++ b/aixplain/modules/finetune/hyperparameters.py @@ -16,6 +16,7 @@ class SchedulerType(Text, Enum): EPOCHS_MAX_VALUE = 4 +BATCH_SIZE_VALUES = [1, 2, 4, 8, 16, 32, 64, 128] MAX_SEQ_LENGTH_MAX_VALUE = 4096 GENERATION_MAX_LENGTH_MAX_VALUE = 225 @@ -24,6 +25,8 @@ class SchedulerType(Text, Enum): @dataclass class Hyperparameters(object): epochs: int = 1 + train_batch_size: int = 4 + eval_batch_size: int = 4 learning_rate: float = 1e-5 generation_max_length: int = 225 max_seq_length: int = 4096 @@ -35,6 +38,12 @@ def __post_init__(self): if not isinstance(self.epochs, int): raise TypeError("epochs should be of type int") + if not isinstance(self.train_batch_size, int): + raise TypeError("train_batch_size should be of type int") + + if not isinstance(self.eval_batch_size, int): + raise TypeError("eval_batch_size should be of type int") + if not isinstance(self.learning_rate, float): raise TypeError("learning_rate should be of type float") @@ -54,10 +63,16 @@ def __post_init__(self): raise TypeError("lr_scheduler_type should be of type SchedulerType") if self.epochs > EPOCHS_MAX_VALUE: - raise ValueError(f"epochs must be one less than {EPOCHS_MAX_VALUE}") + raise ValueError(f"epochs must be less or equal to {EPOCHS_MAX_VALUE}") + + if self.train_batch_size not in BATCH_SIZE_VALUES: + raise ValueError(f"train_batch_size must be one of the following values: {BATCH_SIZE_VALUES}") + + if self.eval_batch_size not in BATCH_SIZE_VALUES: + raise ValueError(f"eval_batch_size must be one of the following values: {BATCH_SIZE_VALUES}") if self.max_seq_length > MAX_SEQ_LENGTH_MAX_VALUE: - raise ValueError(f"max_seq_length must be less than {MAX_SEQ_LENGTH_MAX_VALUE}") + raise ValueError(f"max_seq_length must be less or equal to {MAX_SEQ_LENGTH_MAX_VALUE}") if self.generation_max_length > GENERATION_MAX_LENGTH_MAX_VALUE: - raise ValueError(f"generation_max_length must be less than {GENERATION_MAX_LENGTH_MAX_VALUE}") + raise ValueError(f"generation_max_length must be less or equal to {GENERATION_MAX_LENGTH_MAX_VALUE}") diff --git a/tests/unit/hyperparameters_test.py b/tests/unit/hyperparameters_test.py new file mode 100644 index 00000000..f22f0b5d --- /dev/null +++ b/tests/unit/hyperparameters_test.py @@ -0,0 +1,75 @@ +__author__ = "lucaspavanelli" + +""" +Copyright 2022 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import uuid +import json +from dotenv import load_dotenv + +load_dotenv() +from aixplain.factories import FinetuneFactory +from aixplain.modules.finetune import Hyperparameters +from aixplain.modules.finetune.hyperparameters import ( + EPOCHS_MAX_VALUE, + BATCH_SIZE_VALUES, + MAX_SEQ_LENGTH_MAX_VALUE, + GENERATION_MAX_LENGTH_MAX_VALUE, +) + + +import pytest + + +def test_create(): + hyp = Hyperparameters() + assert hyp is not None + + +@pytest.mark.parametrize( + "params", + [ + {"epochs": "string"}, + {"train_batch_size": "string"}, + {"eval_batch_size": "string"}, + {"learning_rate": "string"}, + {"generation_max_length": "string"}, + {"max_seq_length": "string"}, + {"warmup_ratio": "string"}, + {"warmup_steps": "string"}, + {"lr_scheduler_type": "string"}, + ], +) +def test_create_invalid_type(params): + with pytest.raises(Exception) as exc_info: + Hyperparameters(**params) + assert exc_info.type is TypeError + + +@pytest.mark.parametrize( + "params", + [ + {"epochs": EPOCHS_MAX_VALUE + 1}, + {"train_batch_size": max(BATCH_SIZE_VALUES) + 1}, + {"eval_batch_size": max(BATCH_SIZE_VALUES) + 1}, + {"generation_max_length": GENERATION_MAX_LENGTH_MAX_VALUE + 1}, + {"max_seq_length": MAX_SEQ_LENGTH_MAX_VALUE + 1}, + ], +) +def test_create_invalid_values(params): + with pytest.raises(Exception) as exc_info: + Hyperparameters(**params) + assert exc_info.type is ValueError From 3bc04d3da6331bef58953159497f38ed952ce91d Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Mon, 12 Feb 2024 23:16:41 -0300 Subject: [PATCH 09/19] Add new LLMs finetuner models (mistral and solar) (#128) --- .../finetune/data/finetune_test_cost_estimation.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/functional/finetune/data/finetune_test_cost_estimation.json b/tests/functional/finetune/data/finetune_test_cost_estimation.json index 05b52c2e..a9c8d10f 100644 --- a/tests/functional/finetune/data/finetune_test_cost_estimation.json +++ b/tests/functional/finetune/data/finetune_test_cost_estimation.json @@ -1,4 +1,6 @@ [ {"model_name": "llama 2 7b", "model_id": "6543cb991f695e72028e9428", "dataset_name": "Test text generation dataset"}, - {"model_name": "Llama 2 7B Chat", "model_id": "65519ee7bf42e6037ab109d8", "dataset_name": "Test text generation dataset"} + {"model_name": "Llama 2 7B Chat", "model_id": "65519ee7bf42e6037ab109d8", "dataset_name": "Test text generation dataset"}, + {"model_name": "mistral 7b", "model_id": "6551a9e7bf42e6037ab109de", "dataset_name": "Test text generation dataset"}, + {"model_name": "solar 10b", "model_id": "65b7baac1d5ea75105c14971", "dataset_name": "Test text generation dataset"} ] \ No newline at end of file From 58855c1a1ec0a2cf40f781bb53605267bdc03742 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Thu, 15 Feb 2024 18:54:35 -0300 Subject: [PATCH 10/19] Enabling dataset ID and model ID as parameters for finetuner creation (#131) Co-authored-by: Thiago Castro Ferreira --- .../factories/finetune_factory/__init__.py | 20 ++++++++++++++----- tests/unit/finetune_test.py | 6 +++++- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/aixplain/factories/finetune_factory/__init__.py b/aixplain/factories/finetune_factory/__init__.py index c171c0e6..7b05b759 100644 --- a/aixplain/factories/finetune_factory/__init__.py +++ b/aixplain/factories/finetune_factory/__init__.py @@ -22,9 +22,11 @@ """ import logging -from typing import Dict, List, Optional, Text +from typing import Dict, List, Optional, Text, Union import json +from aixplain.factories.dataset_factory import DatasetFactory from aixplain.factories.finetune_factory.prompt_validator import validate_prompt +from aixplain.factories.model_factory import ModelFactory from aixplain.modules.finetune import Finetune from aixplain.modules.finetune.cost import FinetuneCost from aixplain.modules.finetune.hyperparameters import Hyperparameters @@ -61,8 +63,8 @@ def _create_cost_from_response(cls, response: Dict) -> FinetuneCost: def create( cls, name: Text, - dataset_list: List[Dataset], - model: Model, + dataset_list: List[Union[Dataset, Text]], + model: Union[Model, Text], prompt_template: Optional[Text] = None, hyperparameters: Optional[Hyperparameters] = None, train_percentage: Optional[float] = 100, @@ -72,8 +74,8 @@ def create( Args: name (Text): Name of the Finetune. - dataset_list (List[Dataset]): List of Datasets to be used for fine-tuning. - model (Model): Model to be fine-tuned. + dataset_list (List[Dataset]): List of Datasets (or dataset IDs) to be used for fine-tuning. + model (Model): Model (Model ID) to be fine-tuned. prompt_template (Text, optional): Fine-tuning prompt_template. Should reference columns in the dataset using format <>. Defaults to None. hyperparameters (Hyperparameters, optional): Hyperparameters for fine-tuning. Defaults to None. train_percentage (float, optional): Percentage of training samples. Defaults to 100. @@ -86,6 +88,14 @@ def create( assert ( train_percentage + dev_percentage <= 100 ), f"Create FineTune: Train percentage + dev percentage ({train_percentage + dev_percentage}) must be less than or equal to one" + + for i, dataset in enumerate(dataset_list): + if isinstance(dataset, str) is True: + dataset_list[i] = DatasetFactory.get(dataset_id=dataset) + + if isinstance(model, str) is True: + model = ModelFactory.get(model_id=model) + if prompt_template is not None: prompt_template = validate_prompt(prompt_template, dataset_list) try: diff --git a/tests/unit/finetune_test.py b/tests/unit/finetune_test.py index a0303825..5696572b 100644 --- a/tests/unit/finetune_test.py +++ b/tests/unit/finetune_test.py @@ -51,12 +51,16 @@ def percentage_exception_map(request): def test_create(): + model_map = read_data(MODEL_FILE) with requests_mock.Mocker() as mock: + test_model = "test_asset_id" + url = f"{MODEL_URL}/{test_model}" + mock.get(url, headers=FIXED_HEADER, json=model_map) cost_estimation_map = read_data(COST_ESTIMATION_FILE) mock.post(COST_ESTIMATION_URL, headers=FIXED_HEADER, json=cost_estimation_map) - test_model = Model("", "") finetune = FinetuneFactory.create("", [], test_model) assert finetune is not None + assert finetune.model.id == test_model assert finetune.cost.to_dict() == cost_estimation_map From 426213ed1593dad8864d4768caea66c06fc74bc8 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Fri, 16 Feb 2024 11:41:01 -0300 Subject: [PATCH 11/19] Fix supplier representation of a model (#132) * Fix supplier representation of a model * Fixing parameter typing --------- Co-authored-by: Thiago Castro Ferreira --- aixplain/modules/asset.py | 6 +++--- aixplain/modules/corpus.py | 3 +++ aixplain/modules/model.py | 13 ++++++++----- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/aixplain/modules/asset.py b/aixplain/modules/asset.py index c5fb7f9f..34fea4e4 100644 --- a/aixplain/modules/asset.py +++ b/aixplain/modules/asset.py @@ -23,7 +23,7 @@ from aixplain.enums.license import License from aixplain.enums.supplier import Supplier from aixplain.enums.privacy import Privacy -from typing import Dict, Optional, Text, Tuple +from typing import Dict, Optional, Text, Union class Asset: @@ -32,7 +32,7 @@ def __init__( id: Text, name: Text, description: Text, - supplier: Tuple[Dict, Text, Supplier, int] = "aiXplain", + supplier: Union[Dict, Text, Supplier, int] = "aiXplain", version: Text = "1.0", license: Optional[License] = None, privacy: Privacy = Privacy.PRIVATE, @@ -44,7 +44,7 @@ def __init__( id (Text): ID of the Asset name (Text): Name of the Asset description (Text): Description of the Asset - supplier (Tuple[Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". + supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". version (Optional[Text], optional): asset version. Defaults to "1.0". """ self.id = id diff --git a/aixplain/modules/corpus.py b/aixplain/modules/corpus.py index f023dc4b..b65664b6 100644 --- a/aixplain/modules/corpus.py +++ b/aixplain/modules/corpus.py @@ -82,6 +82,9 @@ def __init__( self.length = length self.kwargs = kwargs + def __repr__(self): + return f"" + def delete(self) -> None: """Delete Corpus service""" try: diff --git a/aixplain/modules/model.py b/aixplain/modules/model.py index a4264107..0804af29 100644 --- a/aixplain/modules/model.py +++ b/aixplain/modules/model.py @@ -27,7 +27,7 @@ import traceback from typing import List from aixplain.factories.file_factory import FileFactory -from aixplain.enums import Function +from aixplain.enums import Function, Supplier from aixplain.modules.asset import Asset from aixplain.utils import config from urllib.parse import urljoin @@ -44,7 +44,7 @@ class Model(Asset): description (Text, optional): description of the model. Defaults to "". api_key (Text, optional): API key of the Model. Defaults to None. url (Text, optional): endpoint of the model. Defaults to config.MODELS_RUN_URL. - supplier (Text, optional): model supplier. Defaults to "aiXplain". + supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". version (Text, optional): version of the model. Defaults to "1.0". function (Text, optional): model AI function. Defaults to None. url (str): URL to run the model. @@ -58,7 +58,7 @@ def __init__( name: Text, description: Text = "", api_key: Optional[Text] = None, - supplier: Text = "aiXplain", + supplier: Union[Dict, Text, Supplier, int] = "aiXplain", version: Optional[Text] = None, function: Optional[Text] = None, is_subscribed: bool = False, @@ -71,7 +71,7 @@ def __init__( name (Text): Name of the Model description (Text, optional): description of the model. Defaults to "". api_key (Text, optional): API key of the Model. Defaults to None. - supplier (Text, optional): model supplier. Defaults to "aiXplain". + supplier (Union[Dict, Text, Supplier, int], optional): supplier of the asset. Defaults to "aiXplain". version (Text, optional): version of the model. Defaults to "1.0". function (Text, optional): model AI function. Defaults to None. is_subscribed (bool, optional): Is the user subscribed. Defaults to False. @@ -95,7 +95,10 @@ def to_dict(self) -> Dict: return {"id": self.id, "name": self.name, "supplier": self.supplier, "additional_info": clean_additional_info} def __repr__(self): - return f"" + try: + return f"" + except Exception: + return f"" def __polling(self, poll_url: Text, name: Text = "model_process", wait_time: float = 0.5, timeout: float = 300) -> Dict: """Keeps polling the platform to check whether an asynchronous call is done. From d62ad8353ff7edd4295d4d7c5eda5a5ae8a31f57 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Wed, 21 Feb 2024 15:29:18 -0300 Subject: [PATCH 12/19] Fixing indentation in documentation sample code (#134) Co-authored-by: Thiago Castro Ferreira --- docs/user/user_doc.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/user/user_doc.md b/docs/user/user_doc.md index e9cb882c..00034240 100644 --- a/docs/user/user_doc.md +++ b/docs/user/user_doc.md @@ -219,11 +219,17 @@ You can also process an aiXplain data asset, being a Corpus or a Dataset, using ```python # Run Synchronously -result = pipeline.run(data="64acbad666608858f693a3a0", data_asset="64acbad666608858f693a39f") +result = pipeline.run( + data_asset="64acbad666608858f693a39f", + data="64acbad666608858f693a3a0" +) # Run Asynchronously ## Start async job -start_response = pipeline.run_async(data="64acbad666608858f693a3a0", data_asset="64acbad666608858f693a39f") +start_response = pipeline.run_async( + data_asset="64acbad666608858f693a39f", + data="64acbad666608858f693a3a0" +) poll_url = start_response["url"] ## Poll to see current job status poll_response = pipeline.poll(poll_url) From 2f107e78c7e3d84f3660a1d74026b2edc0c19cc4 Mon Sep 17 00:00:00 2001 From: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Date: Fri, 1 Mar 2024 13:29:57 -0300 Subject: [PATCH 13/19] Update FineTune unit and functional tests (#136) --- aixplain/modules/finetune/hyperparameters.py | 10 +--------- .../data/finetune_test_cost_estimation.json | 12 +++++++++--- .../finetune/data/finetune_test_end2end.json | 16 +++++++++++++--- .../finetune/finetune_functional_test.py | 8 +++++++- tests/unit/finetune_test.py | 14 +++++++++++++- tests/unit/hyperparameters_test.py | 6 ------ 6 files changed, 43 insertions(+), 23 deletions(-) diff --git a/aixplain/modules/finetune/hyperparameters.py b/aixplain/modules/finetune/hyperparameters.py index 116454c8..915a5b27 100644 --- a/aixplain/modules/finetune/hyperparameters.py +++ b/aixplain/modules/finetune/hyperparameters.py @@ -16,9 +16,8 @@ class SchedulerType(Text, Enum): EPOCHS_MAX_VALUE = 4 -BATCH_SIZE_VALUES = [1, 2, 4, 8, 16, 32, 64, 128] +BATCH_SIZE_VALUES = [1, 2, 4, 8, 16, 32, 64] MAX_SEQ_LENGTH_MAX_VALUE = 4096 -GENERATION_MAX_LENGTH_MAX_VALUE = 225 @dataclass_json @@ -28,7 +27,6 @@ class Hyperparameters(object): train_batch_size: int = 4 eval_batch_size: int = 4 learning_rate: float = 1e-5 - generation_max_length: int = 225 max_seq_length: int = 4096 warmup_ratio: float = 0.0 warmup_steps: int = 0 @@ -47,9 +45,6 @@ def __post_init__(self): if not isinstance(self.learning_rate, float): raise TypeError("learning_rate should be of type float") - if not isinstance(self.generation_max_length, int): - raise TypeError("generation_max_length should be of type int") - if not isinstance(self.max_seq_length, int): raise TypeError("max_seq_length should be of type int") @@ -73,6 +68,3 @@ def __post_init__(self): if self.max_seq_length > MAX_SEQ_LENGTH_MAX_VALUE: raise ValueError(f"max_seq_length must be less or equal to {MAX_SEQ_LENGTH_MAX_VALUE}") - - if self.generation_max_length > GENERATION_MAX_LENGTH_MAX_VALUE: - raise ValueError(f"generation_max_length must be less or equal to {GENERATION_MAX_LENGTH_MAX_VALUE}") diff --git a/tests/functional/finetune/data/finetune_test_cost_estimation.json b/tests/functional/finetune/data/finetune_test_cost_estimation.json index a9c8d10f..80f4d331 100644 --- a/tests/functional/finetune/data/finetune_test_cost_estimation.json +++ b/tests/functional/finetune/data/finetune_test_cost_estimation.json @@ -1,6 +1,12 @@ [ - {"model_name": "llama 2 7b", "model_id": "6543cb991f695e72028e9428", "dataset_name": "Test text generation dataset"}, + {"model_name": "Llama 2 7b", "model_id": "6543cb991f695e72028e9428", "dataset_name": "Test text generation dataset"}, {"model_name": "Llama 2 7B Chat", "model_id": "65519ee7bf42e6037ab109d8", "dataset_name": "Test text generation dataset"}, - {"model_name": "mistral 7b", "model_id": "6551a9e7bf42e6037ab109de", "dataset_name": "Test text generation dataset"}, - {"model_name": "solar 10b", "model_id": "65b7baac1d5ea75105c14971", "dataset_name": "Test text generation dataset"} + {"model_name": "Mistral 7b", "model_id": "6551a9e7bf42e6037ab109de", "dataset_name": "Test text generation dataset"}, + {"model_name": "Solar 10b", "model_id": "65b7baac1d5ea75105c14971", "dataset_name": "Test text generation dataset"}, + {"model_name": "Falcon 7b", "model_id": "6551bff9bf42e6037ab109e1", "dataset_name": "Test text generation dataset"}, + {"model_name": "Falcon 7b Instruct", "model_id": "65519d57bf42e6037ab109d5", "dataset_name": "Test text generation dataset"}, + {"model_name": "MPT 7b", "model_id": "6551a72bbf42e6037ab109d9", "dataset_name": "Test text generation dataset"}, + {"model_name": "MPT 7b storywriter", "model_id": "6551a870bf42e6037ab109db", "dataset_name": "Test text generation dataset"}, + {"model_name": "BloomZ 7b", "model_id": "6551ab17bf42e6037ab109e0", "dataset_name": "Test text generation dataset"}, + {"model_name": "BloomZ 7b MT", "model_id": "656e80147ca71e334752d5a3", "dataset_name": "Test text generation dataset"} ] \ No newline at end of file diff --git a/tests/functional/finetune/data/finetune_test_end2end.json b/tests/functional/finetune/data/finetune_test_end2end.json index 287f2a00..ead1fd88 100644 --- a/tests/functional/finetune/data/finetune_test_end2end.json +++ b/tests/functional/finetune/data/finetune_test_end2end.json @@ -4,13 +4,23 @@ "model_id": "6543cb991f695e72028e9428", "dataset_name": "Test text generation dataset", "inference_data": "Hello!", - "required_dev": true + "required_dev": true, + "search_metadata": false }, { "model_name": "aiR", "model_id": "6499cc946eb5633de15d82a1", - "dataset_name": "Test search dataset", + "dataset_name": "Test search dataset metadata", "inference_data": "Hello!", - "required_dev": false + "required_dev": false, + "search_metadata": true + }, + { + "model_name": "vectara", + "model_id": "655e20f46eb563062a1aa301", + "dataset_name": "Test search dataset metadata", + "inference_data": "Hello!", + "required_dev": false, + "search_metadata": true } ] \ No newline at end of file diff --git a/tests/functional/finetune/finetune_functional_test.py b/tests/functional/finetune/finetune_functional_test.py index 67a0314f..0231d7cb 100644 --- a/tests/functional/finetune/finetune_functional_test.py +++ b/tests/functional/finetune/finetune_functional_test.py @@ -60,7 +60,7 @@ def validate_prompt_input_map(request): return request.param -def test_end2end_text_generation(run_input_map): +def test_end2end(run_input_map): model = ModelFactory.get(run_input_map["model_id"]) dataset_list = [DatasetFactory.list(query=run_input_map["dataset_name"])["results"][0]] train_percentage, dev_percentage = 100, 0 @@ -84,7 +84,13 @@ def test_end2end_text_generation(run_input_map): end = time.time() assert finetune_model.check_finetune_status() == "onboarded" result = finetune_model.run(run_input_map["inference_data"]) + print(f"Result: {result}") assert result is not None + if run_input_map["search_metadata"]: + assert "details" in result + assert len(result["details"]) > 0 + assert "metadata" in result["details"][0] + assert len(result["details"][0]["metadata"]) > 0 finetune_model.delete() diff --git a/tests/unit/finetune_test.py b/tests/unit/finetune_test.py index 5696572b..d95089ea 100644 --- a/tests/unit/finetune_test.py +++ b/tests/unit/finetune_test.py @@ -21,11 +21,13 @@ from dotenv import load_dotenv load_dotenv() +import requests import requests_mock from aixplain.utils import config from aixplain.factories import ModelFactory from aixplain.factories import FinetuneFactory from aixplain.modules import Model, Finetune +from aixplain.modules.finetune import Hyperparameters from aixplain.enums import Function import pytest @@ -58,11 +60,21 @@ def test_create(): mock.get(url, headers=FIXED_HEADER, json=model_map) cost_estimation_map = read_data(COST_ESTIMATION_FILE) mock.post(COST_ESTIMATION_URL, headers=FIXED_HEADER, json=cost_estimation_map) - finetune = FinetuneFactory.create("", [], test_model) + finetune = FinetuneFactory.create("", [], test_model, prompt_template="test", hyperparameters=Hyperparameters()) assert finetune is not None assert finetune.model.id == test_model assert finetune.cost.to_dict() == cost_estimation_map +def test_create_exception(): + model_map = read_data(MODEL_FILE) + with requests_mock.Mocker() as mock: + test_model = "test_asset_id" + url = f"{MODEL_URL}/{test_model}" + mock.get(url, headers=FIXED_HEADER, json=model_map) + mock.post(COST_ESTIMATION_URL, exc=requests.exceptions.ConnectTimeout) + finetune = FinetuneFactory.create("", [], test_model, prompt_template="test", hyperparameters=Hyperparameters()) + assert finetune is None + def test_create_train_dev_percentage(percentage_exception_map): with requests_mock.Mocker() as mock: diff --git a/tests/unit/hyperparameters_test.py b/tests/unit/hyperparameters_test.py index f22f0b5d..d64a3c0e 100644 --- a/tests/unit/hyperparameters_test.py +++ b/tests/unit/hyperparameters_test.py @@ -16,18 +16,14 @@ limitations under the License. """ -import uuid -import json from dotenv import load_dotenv load_dotenv() -from aixplain.factories import FinetuneFactory from aixplain.modules.finetune import Hyperparameters from aixplain.modules.finetune.hyperparameters import ( EPOCHS_MAX_VALUE, BATCH_SIZE_VALUES, MAX_SEQ_LENGTH_MAX_VALUE, - GENERATION_MAX_LENGTH_MAX_VALUE, ) @@ -46,7 +42,6 @@ def test_create(): {"train_batch_size": "string"}, {"eval_batch_size": "string"}, {"learning_rate": "string"}, - {"generation_max_length": "string"}, {"max_seq_length": "string"}, {"warmup_ratio": "string"}, {"warmup_steps": "string"}, @@ -65,7 +60,6 @@ def test_create_invalid_type(params): {"epochs": EPOCHS_MAX_VALUE + 1}, {"train_batch_size": max(BATCH_SIZE_VALUES) + 1}, {"eval_batch_size": max(BATCH_SIZE_VALUES) + 1}, - {"generation_max_length": GENERATION_MAX_LENGTH_MAX_VALUE + 1}, {"max_seq_length": MAX_SEQ_LENGTH_MAX_VALUE + 1}, ], ) From 32179d6f8e12da444cd5a4cdcd34dddf0940ca86 Mon Sep 17 00:00:00 2001 From: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Date: Tue, 5 Mar 2024 12:50:19 -0800 Subject: [PATCH 14/19] Click fix (#140) * Merge to prod (#119) * Merge dev to test (#107) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Development to Test (#109) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> * Merge to test (#111) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#118) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> * Merge to prod. (#130) * Merge dev to test (#107) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Development to Test (#109) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> * Merge to test (#111) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) --------- Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#118) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Merge to test (#124) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira * Merge dev to test (#126) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira * Update Finetuner hyperparameters (#125) * Update Finetuner hyperparameters * Change hyperparameters error message --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira * Merge dev to test (#129) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) * Hf deployment test (#115) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Hf deployment test (#117) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain * Merge dev to test (#113) * Create bounds for FineTune hyperparameters (#103) * Test bound to hyperparameters * Update finetune llm hyperparameters * Remove option to use PEFT, always on use now * Fixing pipeline general asset test (#106) * Update Finetuner functional tests (#112) --------- Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Hf deployment test (#114) * Started adding Hugging Face deployment to aiXplain SDK Signed-off-by: mikelam-us-aixplain * Added model status function to SDK Signed-off-by: mikelam-us-aixplain * Updating Signed-off-by: mikelam-us-aixplain * Updated CLI Signed-off-by: mikelam-us * Adding CLI Signed-off-by: mikelam-us-aixplain * Corrected request error Signed-off-by: mikelam-us-aixplain * Clearing out unnecessary information in return Signed-off-by: mikelam-us-aixplain * Adding status Signed-off-by: mikelam-us-aixplain * Simplifying status Signed-off-by: mikelam-us-aixplain * Adding tests and correcting tokens Signed-off-by: mikelam-us-aixplain * Added bad repo ID test Signed-off-by: mikelam-us-aixplain * Finished rough draft of tests Signed-off-by: mikelam-us-aixplain * Adding tests Signed-off-by: mikelam-us-aixplain * Fixing hf token Signed-off-by: mikelam-us-aixplain * Adding hf token Signed-off-by: mikelam-us-aixplain * Correcting first test Signed-off-by: mikelam-us-aixplain * Testing Signed-off-by: mikelam-us-aixplain * Adding config Signed-off-by: mikelam-us-aixplain * Added user doc Signed-off-by: mikelam-us-aixplain * Added gated model test Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us * Adding HF token Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> * Do not download textual URLs (#120) * Do not download textual URLs * Treat as string --------- Co-authored-by: Thiago Castro Ferreira * Enable api key parameter in data asset creation (#122) Co-authored-by: Thiago Castro Ferreira * Update Finetuner hyperparameters (#125) * Update Finetuner hyperparameters * Change hyperparameters error message * Add new LLMs finetuner models (mistral and solar) (#128) --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: mikelam-us-aixplain <131073216+mikelam-us-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira * Downgraded click Signed-off-by: mikelam-us-aixplain --------- Signed-off-by: mikelam-us-aixplain Signed-off-by: mikelam-us Co-authored-by: ikxplain <88332269+ikxplain@users.noreply.github.com> Co-authored-by: Lucas Pavanelli <86805709+lucas-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Co-authored-by: Thiago Castro Ferreira --- aixplain/modules/finetune/hyperparameters.py | 2 +- pyproject.toml | 2 +- tests/unit/hyperparameters_test.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/aixplain/modules/finetune/hyperparameters.py b/aixplain/modules/finetune/hyperparameters.py index 915a5b27..5bcba682 100644 --- a/aixplain/modules/finetune/hyperparameters.py +++ b/aixplain/modules/finetune/hyperparameters.py @@ -67,4 +67,4 @@ def __post_init__(self): raise ValueError(f"eval_batch_size must be one of the following values: {BATCH_SIZE_VALUES}") if self.max_seq_length > MAX_SEQ_LENGTH_MAX_VALUE: - raise ValueError(f"max_seq_length must be less or equal to {MAX_SEQ_LENGTH_MAX_VALUE}") + raise ValueError(f"max_seq_length must be less or equal to {MAX_SEQ_LENGTH_MAX_VALUE}") \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 22320925..0691c76c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ dependencies = [ "python-dotenv>=1.0.0", "validators>=0.20.0", "filetype>=1.2.0", - "click>=8.1.7", + "click>=7.1.2,<8.0.0", "PyYAML>=6.0.1", "dataclasses-json==0.6.1" ] diff --git a/tests/unit/hyperparameters_test.py b/tests/unit/hyperparameters_test.py index 268c68a0..d64a3c0e 100644 --- a/tests/unit/hyperparameters_test.py +++ b/tests/unit/hyperparameters_test.py @@ -16,7 +16,6 @@ limitations under the License. """ - from dotenv import load_dotenv load_dotenv() From 34861a31418d46053e405bc786e68a1658e46afa Mon Sep 17 00:00:00 2001 From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com> Date: Tue, 19 Mar 2024 02:20:55 +0530 Subject: [PATCH 15/19] M 5905660469 enhance benchmark job response (#145) * added fetching score * add simplified benchmark job scores * add getting failuire rates * added explanations to benchmark * temp push 1 --- aixplain/factories/benchmark_factory.py | 22 ++++ aixplain/modules/benchmark_job.py | 129 ++++++++++++++++++++++++ aixplain/modules/metric.py | 2 +- pyproject.toml | 2 +- 4 files changed, 153 insertions(+), 2 deletions(-) diff --git a/aixplain/factories/benchmark_factory.py b/aixplain/factories/benchmark_factory.py index 88d2411b..57d4a833 100644 --- a/aixplain/factories/benchmark_factory.py +++ b/aixplain/factories/benchmark_factory.py @@ -26,6 +26,7 @@ import json import pandas as pd from pathlib import Path +from aixplain.enums.supplier import Supplier from aixplain.modules import Dataset, Metric, Model from aixplain.modules.benchmark_job import BenchmarkJob from aixplain.modules.benchmark import Benchmark @@ -237,3 +238,24 @@ def list_normalization_options(cls, metric: Metric, model: Model) -> List[str]: error_message = f"Listing Normalization Options: Error in getting Normalization Options: {e}" logging.error(error_message, exc_info=True) return [] + + @classmethod + def get_benchmark_job_scores(cls, job_id): + def __get_model_name(model_id): + model = ModelFactory.get(model_id) + supplier = str(model.supplier) + try: + if isinstance(supplier, Supplier): + name = f"{supplier.name}" + else: + name = f"{eval(supplier)['name']}" + except Exception as e: + logging.error(f"{e}") + name = f"{supplier}" + if model.version is not None: + name = f"{name}({model.version})" + return name + benchmarkJob = cls.get_job(job_id) + scores_df = benchmarkJob.get_scores() + scores_df["Model"] = scores_df["Model"].apply(lambda x: __get_model_name(x)) + return scores_df \ No newline at end of file diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py index 8531127a..f0506787 100644 --- a/aixplain/modules/benchmark_job.py +++ b/aixplain/modules/benchmark_job.py @@ -92,3 +92,132 @@ def download_results_as_csv(self, save_path: Optional[Text] = None, return_dataf error_message = f"Downloading Benchmark Results: Error in Downloading Benchmark Results : {e}" logging.error(error_message, exc_info=True) raise Exception(error_message) + + def __simplify_scores(self, scores): + simplified_score_list = [] + for model_id, model_info in scores.items(): + model_scores = model_info["rawScores"] + # model = Mode + row = {"Model": model_id} + for score_info in model_scores: + row[score_info["longName"]] = score_info["average"] + simplified_score_list.append(row) + return simplified_score_list + + + + + def get_scores(self, return_simplified=True, return_as_dataframe=True): + ## Temp + temp_data = [ + {"Model":"Llama 2 7b", "Score": 0.714}, + {"Model":"Llama 2 7b (Finetuned)", "Score": 0.742}, + ] + return pd.DataFrame(temp_data) + + try: + resp = self._fetch_current_response(self.id) + iterations = resp.get("iterations", []) + scores = {} + for iteration_info in iterations: + model_id = iteration_info["pipeline"] + model_info = { + "creditsUsed" : round(iteration_info.get("credits", 0),5), + "timeSpent" : round(iteration_info.get("runtime", 0),2), + "status" : iteration_info["status"], + "rawScores" : iteration_info["scores"], + } + scores[model_id] = model_info + + if return_simplified: + simplified_scores = self.__simplify_scores(scores) + if return_as_dataframe: + simplified_scores = pd.DataFrame(simplified_scores) + return simplified_scores + else: + return scores + except Exception as e: + error_message = f"Benchmark scores: Error in Getting benchmark scores: {e}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) + + + def get_failuire_rate(self, return_as_dataframe=True): + try: + scores = self.get_scores(return_simplified=False) + failure_rates = {} + for model_id, model_info in scores.items(): + if len(model_info["rawScores"]) == 0: + failure_rates[model_id] = 0 + continue + score_info = model_info["rawScores"][0] + num_succesful = score_info["count"] + num_failed = score_info["failedSegmentsCount"] + failuire_rate = (num_failed * 100) / (num_succesful+num_failed) + failure_rates[model_id] = failuire_rate + if return_as_dataframe: + df = pd.DataFrame() + df["Model"] = list(failure_rates.keys()) + df["Failuire Rate"] = list(failure_rates.values()) + return df + else: + return failure_rates + except Exception as e: + error_message = f"Benchmark scores: Error in Getting benchmark failuire rate: {e}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) + + def get_all_explanations(self): + try: + resp = self._fetch_current_response(self) + raw_explanations = resp.get("explanation", {}) + if "metricInDependent" not in raw_explanations: + raw_explanations["metricInDependent"] = [] + if "metricDependent" not in raw_explanations: + raw_explanations["metricDependent"] = [] + return raw_explanations + except Exception as e: + error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) + + def get_localized_explanations(self, metric_dependant: bool, group_by_task: bool = False): + try: + raw_explanations = self.get_all_explanations() + if metric_dependant: + localized_explanations = raw_explanations["metricDependent"] + if len(localized_explanations) == 0: + localized_explanations = {} + else: + grouped_explanations = {} + task_list = [] + first_explanation = localized_explanations[0] + for task in first_explanation: + if task not in ["scoreId", "datasetId"]: + task_list.append(task) + + if group_by_task: + for task in task_list: + task_explanation = {} + for explanation_item in localized_explanations: + item_task_explanation = explanation_item[task] + identifier = explanation_item["scoreId"] + task_explanation[identifier] = item_task_explanation + grouped_explanations[task] = task_explanation + else: + for explanation_item in localized_explanations: + identifier = explanation_item["scoreId"] + grouped_explanations[identifier] = explanation_item + localized_explanations = grouped_explanations + else: + localized_explanations = raw_explanations["metricInDependent"] + if len(localized_explanations) == 0: + localized_explanations = {} + else: + localized_explanations = localized_explanations[0] + return localized_explanations + + except Exception as e: + error_message = f"Benchmark scores: Error in Getting benchmark explanations: {e}" + logging.error(error_message, exc_info=True) + raise Exception(error_message) \ No newline at end of file diff --git a/aixplain/modules/metric.py b/aixplain/modules/metric.py index 8d8844f0..a20fac07 100644 --- a/aixplain/modules/metric.py +++ b/aixplain/modules/metric.py @@ -24,7 +24,7 @@ from typing import Optional, Text, List, Union from aixplain.modules.asset import Asset from aixplain.utils.file_utils import _request_with_retry -from aixplain.factories.model_factory import ModelFactory +# from aixplain.factories.model_factory import ModelFactory class Metric(Asset): diff --git a/pyproject.toml b/pyproject.toml index 0691c76c..9ad67878 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ namespaces = true [project] name = "aiXplain" -version = "0.2.4" +version = "0.2.5rc" description = "aiXplain SDK adds AI functions to software." readme = "README.md" requires-python = ">=3.5, <4" From 6fc964608ba770a6653068fc5e28cea8cb9eee90 Mon Sep 17 00:00:00 2001 From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com> Date: Tue, 19 Mar 2024 17:55:44 +0530 Subject: [PATCH 16/19] M 5905660469 enhance benchmark job response (#146) * added fetching score * add simplified benchmark job scores * add getting failuire rates * added explanations to benchmark * temp push 1 * undo temp change --- aixplain/modules/benchmark_job.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/aixplain/modules/benchmark_job.py b/aixplain/modules/benchmark_job.py index f0506787..7dae2d96 100644 --- a/aixplain/modules/benchmark_job.py +++ b/aixplain/modules/benchmark_job.py @@ -108,13 +108,6 @@ def __simplify_scores(self, scores): def get_scores(self, return_simplified=True, return_as_dataframe=True): - ## Temp - temp_data = [ - {"Model":"Llama 2 7b", "Score": 0.714}, - {"Model":"Llama 2 7b (Finetuned)", "Score": 0.742}, - ] - return pd.DataFrame(temp_data) - try: resp = self._fetch_current_response(self.id) iterations = resp.get("iterations", []) From b4e5b6795cab7ae31beab4a3e2ac0809d1e070f6 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Tue, 19 Mar 2024 11:31:20 -0300 Subject: [PATCH 17/19] New pipeline functional tests (#143) * New pipeline functional tests * Fixe on attribute name * Fixing test cases --------- Co-authored-by: Thiago Castro Ferreira --- tests/functional/pipelines/run_test.py | 58 ++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/functional/pipelines/run_test.py b/tests/functional/pipelines/run_test.py index 99e66a10..e7af6c4e 100644 --- a/tests/functional/pipelines/run_test.py +++ b/tests/functional/pipelines/run_test.py @@ -107,3 +107,61 @@ def test_run_multipipe_with_datasets(batchmode: bool): **{"batchmode": batchmode} ) assert response["status"] == "SUCCESS" + + +@pytest.mark.parametrize("batchmode", [True, False]) +def test_run_segment_reconstruct(batchmode: bool): + pipeline = PipelineFactory.list(query="Segmentation/Reconstruction Functional Test - DO NOT DELETE")["results"][0] + response = pipeline.run("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", **{"batchmode": batchmode}) + + assert response["status"] == "SUCCESS" + output = response["data"][0] + assert output["label"] == "Output 1" + + +@pytest.mark.parametrize("batchmode", [True, False]) +def test_run_metric(batchmode: bool): + pipeline = PipelineFactory.list(query="ASR Metric Functional Test - DO NOT DELETE")["results"][0] + response = pipeline.run({ + "AudioInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", + "ReferenceInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt" + }, **{"batchmode": batchmode}) + + assert response["status"] == "SUCCESS" + assert len(response["data"]) == 2 + assert response["data"][0]["label"] in ["TranscriptOutput", "ScoreOutput"] + assert response["data"][1]["label"] in ["TranscriptOutput", "ScoreOutput"] + + +@pytest.mark.parametrize( + "batchmode,input_data,output_data", + [ + (True, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput"), + (False, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput"), + (True, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput"), + (False, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput") + ] +) +def test_run_router(batchmode: bool, input_data: str, output_data: str): + pipeline = PipelineFactory.list(query="Router Test - DO NOT DELETE")["results"][0] + response = pipeline.run(input_data, **{"batchmode": batchmode}) + + assert response["status"] == "SUCCESS" + assert response["data"][0]["label"] == output_data + + +@pytest.mark.parametrize( + "batchmode,input_data,output_data", + [ + (True, "I love it.", "PositiveOutput"), + (False, "I love it.", "PositiveOutput"), + (True, "I hate it.", "NegativeOutput"), + (False, "I hate it.", "NegativeOutput") + ] +) +def test_run_decision(batchmode: bool, input_data: str, output_data: str): + pipeline = PipelineFactory.list(query="Decision Test - DO NOT DELETE")["results"][0] + response = pipeline.run(input_data, **{"batchmode": batchmode}) + + assert response["status"] == "SUCCESS" + assert response["data"][0]["label"] == output_data \ No newline at end of file From 873cb6aee1c311b94a9ef7e33cb7b548284a86e3 Mon Sep 17 00:00:00 2001 From: Thiago Castro Ferreira <85182544+thiago-aixplain@users.noreply.github.com> Date: Thu, 21 Mar 2024 15:28:36 -0300 Subject: [PATCH 18/19] M 6107719447 check finetuner status (#133) * Finetune status object * New finetune status checker * Covering some None cases * Finetune status checker updates and new unit tests --------- Co-authored-by: Thiago Castro Ferreira Co-authored-by: Thiago Castro Ferreira Co-authored-by: Thiago Castro Ferreira --- aixplain/enums/asset_status.py | 43 +++++++++++++++ aixplain/modules/__init__.py | 1 + aixplain/modules/finetune/status.py | 36 +++++++++++++ aixplain/modules/metric.py | 2 + aixplain/modules/model.py | 53 ++++++++++++++++--- tests/unit/finetune_test.py | 28 +++++++--- .../finetune_status_response.json | 41 ++++++++++++++ .../finetune_status_response_2.json | 49 +++++++++++++++++ 8 files changed, 240 insertions(+), 13 deletions(-) create mode 100644 aixplain/enums/asset_status.py create mode 100644 aixplain/modules/finetune/status.py create mode 100644 tests/unit/mock_responses/finetune_status_response.json create mode 100644 tests/unit/mock_responses/finetune_status_response_2.json diff --git a/aixplain/enums/asset_status.py b/aixplain/enums/asset_status.py new file mode 100644 index 00000000..134af26e --- /dev/null +++ b/aixplain/enums/asset_status.py @@ -0,0 +1,43 @@ +__author__ = "thiagocastroferreira" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Duraikrishna Selvaraju, Thiago Castro Ferreira, Shreyas Sharma and Lucas Pavanelli +Date: February 21st 2024 +Description: + Asset Enum +""" + +from enum import Enum +from typing import Text + +class AssetStatus(Text, Enum): + HIDDEN = 'hidden' + SCHEDULED = 'scheduled' + ONBOARDING = 'onboarding' + ONBOARDED = 'onboarded' + PENDING = 'pending' + FAILED = 'failed' + TRAINING = 'training' + REJECTED = 'rejected' + ENABLING = 'enabling' + DELETING = 'deleting' + DISABLED = 'disabled' + DELETED = 'deleted' + IN_PROGRESS = 'in_progress' + COMPLETED = 'completed' + CANCELING = 'canceling' + CANCELED = 'canceled' \ No newline at end of file diff --git a/aixplain/modules/__init__.py b/aixplain/modules/__init__.py index 0902eaf4..bb9e696b 100644 --- a/aixplain/modules/__init__.py +++ b/aixplain/modules/__init__.py @@ -29,5 +29,6 @@ from .model import Model from .pipeline import Pipeline from .finetune import Finetune, FinetuneCost +from .finetune.status import FinetuneStatus from .benchmark import Benchmark from .benchmark_job import BenchmarkJob diff --git a/aixplain/modules/finetune/status.py b/aixplain/modules/finetune/status.py new file mode 100644 index 00000000..4994ce55 --- /dev/null +++ b/aixplain/modules/finetune/status.py @@ -0,0 +1,36 @@ +__author__ = "thiagocastroferreira" + +""" +Copyright 2024 The aiXplain SDK authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Author: Duraikrishna Selvaraju, Thiago Castro Ferreira, Shreyas Sharma and Lucas Pavanelli +Date: February 21st 2024 +Description: + FinetuneCost Class +""" + +from aixplain.enums.asset_status import AssetStatus +from dataclasses import dataclass +from dataclasses_json import dataclass_json +from typing import Optional, Text + +@dataclass_json +@dataclass +class FinetuneStatus(object): + status: "AssetStatus" + model_status: "AssetStatus" + epoch: Optional[float] = None + training_loss: Optional[float] = None + validation_loss: Optional[float] = None diff --git a/aixplain/modules/metric.py b/aixplain/modules/metric.py index a20fac07..a3e3d646 100644 --- a/aixplain/modules/metric.py +++ b/aixplain/modules/metric.py @@ -23,6 +23,7 @@ from typing import Optional, Text, List, Union from aixplain.modules.asset import Asset + from aixplain.utils.file_utils import _request_with_retry # from aixplain.factories.model_factory import ModelFactory @@ -92,6 +93,7 @@ def run(self, hypothesis: Optional[Union[str, List[str]]]=None, source: Optional source (Optional[Union[str, List[str]]], optional): Can give a single source or a list of sources for metric calculation. Defaults to None. reference (Optional[Union[str, List[str]]], optional): Can give a single reference or a list of references for metric calculation. Defaults to None. """ + from aixplain.factories.model_factory import ModelFactory model = ModelFactory.get(self.id) payload = { "function": self.function, diff --git a/aixplain/modules/model.py b/aixplain/modules/model.py index 0804af29..fc3a82cd 100644 --- a/aixplain/modules/model.py +++ b/aixplain/modules/model.py @@ -20,7 +20,6 @@ Description: Model Class """ - import time import json import logging @@ -251,23 +250,65 @@ def run_async(self, data: Union[Text, Dict], name: Text = "model_process", param response["error"] = msg return response - def check_finetune_status(self): + def check_finetune_status(self, after_epoch: Optional[int] = None): """Check the status of the FineTune model. + Args: + after_epoch (Optional[int], optional): status after a given epoch. Defaults to None. + Raises: Exception: If the 'TEAM_API_KEY' is not provided. Returns: - str: The status of the FineTune model. + FinetuneStatus: The status of the FineTune model. """ + from aixplain.enums.asset_status import AssetStatus + from aixplain.modules.finetune.status import FinetuneStatus headers = {"x-api-key": self.api_key, "Content-Type": "application/json"} + resp = None try: - url = urljoin(self.backend_url, f"sdk/models/{self.id}") + url = urljoin(self.backend_url, f"sdk/finetune/{self.id}/ml-logs") logging.info(f"Start service for GET Check FineTune status Model - {url} - {headers}") r = _request_with_retry("get", url, headers=headers) resp = r.json() - status = resp["status"] - logging.info(f"Response for GET Check FineTune status Model - Id {self.id} / Status {status}.") + finetune_status = AssetStatus(resp["finetuneStatus"]) + model_status = AssetStatus(resp["modelStatus"]) + logs = sorted(resp["logs"], key=lambda x: float(x["epoch"])) + + target_epoch = None + if after_epoch is not None: + logs = [log for log in logs if float(log["epoch"]) > after_epoch] + if len(logs) > 0: + target_epoch = float(logs[0]["epoch"]) + elif len(logs) > 0: + target_epoch = float(logs[-1]["epoch"]) + + if target_epoch is not None: + log = None + for log_ in logs: + if int(log_["epoch"]) == target_epoch: + if log is None: + log = log_ + else: + if log_["trainLoss"] is not None: + log["trainLoss"] = log_["trainLoss"] + if log_["evalLoss"] is not None: + log["evalLoss"] = log_["evalLoss"] + + status = FinetuneStatus( + status=finetune_status, + model_status=model_status, + epoch=float(log["epoch"]) if "epoch" in log and log["epoch"] is not None else None, + training_loss=float(log["trainLoss"]) if "trainLoss" in log and log["trainLoss"] is not None else None, + validation_loss=float(log["evalLoss"]) if "evalLoss" in log and log["evalLoss"] is not None else None, + ) + else: + status = FinetuneStatus( + status=finetune_status, + model_status=model_status, + ) + + logging.info(f"Response for GET Check FineTune status Model - Id {self.id} / Status {status.status.value}.") return status except Exception as e: message = "" diff --git a/tests/unit/finetune_test.py b/tests/unit/finetune_test.py index d95089ea..13287c32 100644 --- a/tests/unit/finetune_test.py +++ b/tests/unit/finetune_test.py @@ -29,6 +29,7 @@ from aixplain.modules import Model, Finetune from aixplain.modules.finetune import Hyperparameters from aixplain.enums import Function +from urllib.parse import urljoin import pytest @@ -37,6 +38,8 @@ COST_ESTIMATION_FILE = "tests/unit/mock_responses/cost_estimation_response.json" FINETUNE_URL = f"{config.BACKEND_URL}/sdk/finetune" FINETUNE_FILE = "tests/unit/mock_responses/finetune_response.json" +FINETUNE_STATUS_FILE = "tests/unit/mock_responses/finetune_status_response.json" +FINETUNE_STATUS_FILE_2 = "tests/unit/mock_responses/finetune_status_response_2.json" PERCENTAGE_EXCEPTION_FILE = "tests/unit/data/create_finetune_percentage_exception.json" MODEL_FILE = "tests/unit/mock_responses/model_response.json" MODEL_URL = f"{config.BACKEND_URL}/sdk/models" @@ -106,16 +109,27 @@ def test_start(): assert fine_tuned_model is not None assert fine_tuned_model.id == model_map["id"] - -def test_check_finetuner_status(): - model_map = read_data(MODEL_FILE) +@pytest.mark.parametrize( + "input_path,after_epoch,training_loss,validation_loss", + [ + (FINETUNE_STATUS_FILE, None, 0.4, 0.0217), + (FINETUNE_STATUS_FILE, 1, 0.2, 0.0482), + (FINETUNE_STATUS_FILE_2, None, 2.657801408034, 2.596168756485), + (FINETUNE_STATUS_FILE_2, 0, None, 2.684150457382) + ] +) +def test_check_finetuner_status(input_path, after_epoch, training_loss, validation_loss): + model_map = read_data(input_path) asset_id = "test_id" with requests_mock.Mocker() as mock: test_model = Model(asset_id, "") - url = f"{MODEL_URL}/{asset_id}" + url = urljoin(config.BACKEND_URL, f"sdk/finetune/{asset_id}/ml-logs") mock.get(url, headers=FIXED_HEADER, json=model_map) - status = test_model.check_finetune_status() - assert status == model_map["status"] + status = test_model.check_finetune_status(after_epoch=after_epoch) + assert status.status.value == model_map["finetuneStatus"] + assert status.model_status.value == model_map["modelStatus"] + assert status.training_loss == training_loss + assert status.validation_loss == validation_loss @pytest.mark.parametrize("is_finetunable", [True, False]) @@ -132,4 +146,4 @@ def test_list_finetunable_models(is_finetunable): model_list = result["results"] assert len(model_list) > 0 for model_index in range(len(model_list)): - assert model_list[model_index].id == list_map["items"][model_index]["id"] + assert model_list[model_index].id == list_map["items"][model_index]["id"] \ No newline at end of file diff --git a/tests/unit/mock_responses/finetune_status_response.json b/tests/unit/mock_responses/finetune_status_response.json new file mode 100644 index 00000000..9647b164 --- /dev/null +++ b/tests/unit/mock_responses/finetune_status_response.json @@ -0,0 +1,41 @@ +{ + "finetuneStatus": "onboarding", + "modelStatus": "onboarded", + "logs": [ + { + "epoch": 1, + "learningRate": 9.938725490196079e-05, + "trainLoss": 0.1, + "evalLoss": 0.1106, + "step": 10 + }, + { + "epoch": 2, + "learningRate": 9.877450980392157e-05, + "trainLoss": 0.2, + "evalLoss": 0.0482, + "step": 20 + }, + { + "epoch": 3, + "learningRate": 9.816176470588235e-05, + "trainLoss": 0.3, + "evalLoss": 0.0251, + "step": 30 + }, + { + "epoch": 4, + "learningRate": 9.754901960784314e-05, + "trainLoss": 0.9, + "evalLoss": 0.0228, + "step": 40 + }, + { + "epoch": 5, + "learningRate": 9.693627450980392e-05, + "trainLoss": 0.4, + "evalLoss": 0.0217, + "step": 50 + } + ] +} \ No newline at end of file diff --git a/tests/unit/mock_responses/finetune_status_response_2.json b/tests/unit/mock_responses/finetune_status_response_2.json new file mode 100644 index 00000000..ea5814a0 --- /dev/null +++ b/tests/unit/mock_responses/finetune_status_response_2.json @@ -0,0 +1,49 @@ +{ + "id": "65fb26268fe9153a6c9c29c4", + "finetuneStatus": "in_progress", + "modelStatus": "training", + "logs": [ + { + "epoch": 1, + "learningRate": null, + "trainLoss": null, + "validationLoss": null, + "step": null, + "evalLoss": 2.684150457382, + "totalFlos": null, + "evalRuntime": 12.4129, + "trainRuntime": null, + "evalStepsPerSecond": 0.322, + "trainStepsPerSecond": null, + "evalSamplesPerSecond": 16.112 + }, + { + "epoch": 2, + "learningRate": null, + "trainLoss": null, + "validationLoss": null, + "step": null, + "evalLoss": 2.596168756485, + "totalFlos": null, + "evalRuntime": 11.8249, + "trainRuntime": null, + "evalStepsPerSecond": 0.338, + "trainStepsPerSecond": null, + "evalSamplesPerSecond": 16.913 + }, + { + "epoch": 2, + "learningRate": null, + "trainLoss": 2.657801408034, + "validationLoss": null, + "step": null, + "evalLoss": null, + "totalFlos": 11893948284928, + "evalRuntime": null, + "trainRuntime": 221.7946, + "evalStepsPerSecond": null, + "trainStepsPerSecond": 0.117, + "evalSamplesPerSecond": null + } + ] +} \ No newline at end of file From c2a2cfdaa81a4e5c6c1caed606429fd3c2c3a8ec Mon Sep 17 00:00:00 2001 From: Shreyas Sharma <85180538+shreyasXplain@users.noreply.github.com> Date: Fri, 22 Mar 2024 18:08:38 +0530 Subject: [PATCH 19/19] Update metric.py (#147) --- aixplain/modules/metric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aixplain/modules/metric.py b/aixplain/modules/metric.py index a3e3d646..04a0bdd7 100644 --- a/aixplain/modules/metric.py +++ b/aixplain/modules/metric.py @@ -25,7 +25,7 @@ from aixplain.modules.asset import Asset from aixplain.utils.file_utils import _request_with_retry -# from aixplain.factories.model_factory import ModelFactory +from aixplain.factories.model_factory import ModelFactory class Metric(Asset):