diff --git a/src/sagemaker/huggingface/llm_utils.py b/src/sagemaker/huggingface/llm_utils.py index de5e624dbc..974cffcddf 100644 --- a/src/sagemaker/huggingface/llm_utils.py +++ b/src/sagemaker/huggingface/llm_utils.py @@ -65,6 +65,13 @@ def get_huggingface_llm_image_uri( image_scope="inference", inference_tool="neuronx", ) + if backend == "huggingface-tei": + return image_uris.retrieve( + "huggingface-tei", + region=region, + version=version, + image_scope="inference", + ) if backend == "lmi": version = version or "0.24.0" return image_uris.retrieve(framework="djl-deepspeed", region=region, version=version) diff --git a/src/sagemaker/image_uri_config/huggingface-tei.json b/src/sagemaker/image_uri_config/huggingface-tei.json new file mode 100644 index 0000000000..b7c597df18 --- /dev/null +++ b/src/sagemaker/image_uri_config/huggingface-tei.json @@ -0,0 +1,59 @@ +{ + "inference": { + "processors": [ + "gpu" + ], + "version_aliases": { + "1.2": "1.2.3" + }, + "versions": { + "1.2.3": { + "py_versions": [ + "py310" + ], + "registries": { + "af-south-1": "510948584623", + "ap-east-1": "651117190479", + "ap-northeast-1": "354813040037", + "ap-northeast-2": "366743142698", + "ap-northeast-3": "867004704886", + "ap-south-1": "720646828776", + "ap-south-2": "628508329040", + "ap-southeast-1": "121021644041", + "ap-southeast-2": "783357654285", + "ap-southeast-3": "951798379941", + "ap-southeast-4": "106583098589", + "ca-central-1": "341280168497", + "ca-west-1": "190319476487", + "cn-north-1": "450853457545", + "cn-northwest-1": "451049120500", + "eu-central-1": "492215442770", + "eu-central-2": "680994064768", + "eu-north-1": "662702820516", + "eu-south-1": "978288397137", + "eu-south-2": "104374241257", + "eu-west-1": "141502667606", + "eu-west-2": "764974769150", + "eu-west-3": "659782779980", + "il-central-1": "898809789911", + "me-central-1": "272398656194", + "me-south-1": "801668240914", + "sa-east-1": "737474898029", + "us-east-1": "683313688378", + "us-east-2": "257758044811", + "us-gov-east-1": "237065988967", + "us-gov-west-1": "414596584902", + "us-iso-east-1": "833128469047", + "us-isob-east-1": "281123927165", + "us-west-1": "746614075791", + "us-west-2": "246618743249" + }, + "tag_prefix": "2.0.1-tei1.2.3", + "repository": "tei", + "container_version": { + "gpu": "cu122-ubuntu22.04" + } + } + } + } +} \ No newline at end of file diff --git a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py index 5a9662c164..b1e8e8253e 100644 --- a/tests/unit/sagemaker/image_uris/test_huggingface_llm.py +++ b/tests/unit/sagemaker/image_uris/test_huggingface_llm.py @@ -18,6 +18,11 @@ from tests.unit.sagemaker.image_uris import expected_uris, conftest LMI_VERSIONS = ["0.24.0"] +TEI_VERSIONS_MAPPING = { + "gpu": { + "1.2.3": "2.0.1-tei1.2.3-gpu-py310-cu122-ubuntu22.04", + }, +} HF_VERSIONS_MAPPING = { "gpu": { "0.6.0": "2.0.0-tgi0.6.0-gpu-py39-cu118-ubuntu20.04", @@ -68,6 +73,25 @@ def test_huggingface_uris(load_config): assert expected == uri +@pytest.mark.parametrize("load_config", ["huggingface-tei.json"], indirect=True) +def test_huggingface_tei_uris(load_config): + VERSIONS = load_config["inference"]["versions"] + device = load_config["inference"]["processors"][0] + backend = "huggingface-tei" + for version in VERSIONS: + ACCOUNTS = load_config["inference"]["versions"][version]["registries"] + for region in ACCOUNTS.keys(): + uri = get_huggingface_llm_image_uri(backend, region=region, version=version) + expected = expected_uris.huggingface_llm_framework_uri( + "tei", + ACCOUNTS[region], + version, + TEI_VERSIONS_MAPPING[device][version], + region=region, + ) + assert expected == uri + + @pytest.mark.parametrize("load_config", ["huggingface-llm.json"], indirect=True) def test_lmi_uris(load_config): VERSIONS = load_config["inference"]["versions"]