Skip to content

Commit f1bc6cc

Browse files
haixiwmufaddal-rohawala
authored andcommitted
feat: onboard tei image config to pysdk (aws#4681)
* feat: onboard tei image config to pysdk * fix formatting issue * minor fix func name * fix unit tests --------- Co-authored-by: Mufaddal Rohawala <89424143+mufaddal-rohawala@users.noreply.github.com>
1 parent 2eec027 commit f1bc6cc

File tree

3 files changed

+90
-0
lines changed

3 files changed

+90
-0
lines changed

src/sagemaker/huggingface/llm_utils.py

+7
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ def get_huggingface_llm_image_uri(
6565
image_scope="inference",
6666
inference_tool="neuronx",
6767
)
68+
if backend == "huggingface-tei":
69+
return image_uris.retrieve(
70+
"huggingface-tei",
71+
region=region,
72+
version=version,
73+
image_scope="inference",
74+
)
6875
if backend == "lmi":
6976
version = version or "0.24.0"
7077
return image_uris.retrieve(framework="djl-deepspeed", region=region, version=version)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"inference": {
3+
"processors": [
4+
"gpu"
5+
],
6+
"version_aliases": {
7+
"1.2": "1.2.3"
8+
},
9+
"versions": {
10+
"1.2.3": {
11+
"py_versions": [
12+
"py310"
13+
],
14+
"registries": {
15+
"af-south-1": "510948584623",
16+
"ap-east-1": "651117190479",
17+
"ap-northeast-1": "354813040037",
18+
"ap-northeast-2": "366743142698",
19+
"ap-northeast-3": "867004704886",
20+
"ap-south-1": "720646828776",
21+
"ap-south-2": "628508329040",
22+
"ap-southeast-1": "121021644041",
23+
"ap-southeast-2": "783357654285",
24+
"ap-southeast-3": "951798379941",
25+
"ap-southeast-4": "106583098589",
26+
"ca-central-1": "341280168497",
27+
"ca-west-1": "190319476487",
28+
"cn-north-1": "450853457545",
29+
"cn-northwest-1": "451049120500",
30+
"eu-central-1": "492215442770",
31+
"eu-central-2": "680994064768",
32+
"eu-north-1": "662702820516",
33+
"eu-south-1": "978288397137",
34+
"eu-south-2": "104374241257",
35+
"eu-west-1": "141502667606",
36+
"eu-west-2": "764974769150",
37+
"eu-west-3": "659782779980",
38+
"il-central-1": "898809789911",
39+
"me-central-1": "272398656194",
40+
"me-south-1": "801668240914",
41+
"sa-east-1": "737474898029",
42+
"us-east-1": "683313688378",
43+
"us-east-2": "257758044811",
44+
"us-gov-east-1": "237065988967",
45+
"us-gov-west-1": "414596584902",
46+
"us-iso-east-1": "833128469047",
47+
"us-isob-east-1": "281123927165",
48+
"us-west-1": "746614075791",
49+
"us-west-2": "246618743249"
50+
},
51+
"tag_prefix": "2.0.1-tei1.2.3",
52+
"repository": "tei",
53+
"container_version": {
54+
"gpu": "cu122-ubuntu22.04"
55+
}
56+
}
57+
}
58+
}
59+
}

tests/unit/sagemaker/image_uris/test_huggingface_llm.py

+24
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@
1818
from tests.unit.sagemaker.image_uris import expected_uris, conftest
1919

2020
LMI_VERSIONS = ["0.24.0"]
21+
TEI_VERSIONS_MAPPING = {
22+
"gpu": {
23+
"1.2.3": "2.0.1-tei1.2.3-gpu-py310-cu122-ubuntu22.04",
24+
},
25+
}
2126
HF_VERSIONS_MAPPING = {
2227
"gpu": {
2328
"0.6.0": "2.0.0-tgi0.6.0-gpu-py39-cu118-ubuntu20.04",
@@ -68,6 +73,25 @@ def test_huggingface_uris(load_config):
6873
assert expected == uri
6974

7075

76+
@pytest.mark.parametrize("load_config", ["huggingface-tei.json"], indirect=True)
77+
def test_huggingface_tei_uris(load_config):
78+
VERSIONS = load_config["inference"]["versions"]
79+
device = load_config["inference"]["processors"][0]
80+
backend = "huggingface-tei"
81+
for version in VERSIONS:
82+
ACCOUNTS = load_config["inference"]["versions"][version]["registries"]
83+
for region in ACCOUNTS.keys():
84+
uri = get_huggingface_llm_image_uri(backend, region=region, version=version)
85+
expected = expected_uris.huggingface_llm_framework_uri(
86+
"tei",
87+
ACCOUNTS[region],
88+
version,
89+
TEI_VERSIONS_MAPPING[device][version],
90+
region=region,
91+
)
92+
assert expected == uri
93+
94+
7195
@pytest.mark.parametrize("load_config", ["huggingface-llm.json"], indirect=True)
7296
def test_lmi_uris(load_config):
7397
VERSIONS = load_config["inference"]["versions"]

0 commit comments

Comments
 (0)