Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

chore(components): Add test machine spec support to preview.llm pipelines #10616

Merged
merged 1 commit into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
def resolve_machine_spec(
accelerator_type: str = '',
accelerator_type: str = 'GPU',
use_test_spec: bool = False,
) -> NamedTuple(
'MachineSpec',
Expand All @@ -37,7 +37,8 @@ def resolve_machine_spec(
accelerator_type: One of 'TPU' or 'GPU'. If 'TPU' is specified, tuning
components run in europe-west4. Otherwise tuning components run in
us-central1 on GPUs. Default is 'GPU'.
use_test_spec: Whether to use a lower resource machine for testing.
use_test_spec: Whether to use a lower resource machine for testing. If True,
a machine with the specified `accelerator_type` is provisioned.

Returns:
Machine spec.
Expand All @@ -61,14 +62,27 @@ def resolve_machine_spec(
accelerator_count=32,
tuning_location='europe-west4',
)
else:
elif accelerator_type == 'GPU':
return outputs(
machine_type='a2-highgpu-1g',
accelerator_type='NVIDIA_TESLA_A100',
accelerator_count=1,
tuning_location='us-central1',
)
elif accelerator_type == 'TPU':
elif accelerator_type == 'CPU':
return outputs(
machine_type='e2-standard-16',
accelerator_type='ACCELERATOR_TYPE_UNSPECIFIED',
accelerator_count=0,
tuning_location='us-central1',
)
else:
raise ValueError(
f'Unsupported test accelerator_type {accelerator_type}. Must be one '
'of TPU, GPU or CPU.'
)

if accelerator_type == 'TPU':
return outputs(
machine_type='cloud-tpu',
accelerator_type='TPU_V3',
Expand All @@ -82,10 +96,11 @@ def resolve_machine_spec(
accelerator_count=8,
tuning_location='us-central1',
)
raise ValueError(
f'Unsupported accelerator type {accelerator_type}. Must be one of'
'TPU or GPU.'
)
else:
raise ValueError(
f'Unsupported accelerator_type {accelerator_type}. Must be one of'
'TPU or GPU.'
)


@dsl.component(base_image=_image.GCPC_IMAGE_TAG, install_kfp_package=False)
Expand Down Expand Up @@ -114,7 +129,7 @@ def resolve_refined_image_uri(
Raises:
ValueError: if an unsupported accelerator type is provided.
"""
if not accelerator_type:
if not accelerator_type or accelerator_type == 'ACCELERATOR_TYPE_UNSPECIFIED':
accelerator_postfix = 'cpu'
elif 'TPU' in accelerator_type:
accelerator_postfix = 'tpu'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,14 @@ def validate_pipeline(
f' {supported_pipeline_regions}.'
)

valid_cmek_config = location == 'us-central1' and accelerator_type == 'GPU'
valid_cmek_accelerator_types = {
'GPU',
'CPU', # Only used for testing.
}
valid_cmek_config = (
location == 'us-central1'
and accelerator_type in valid_cmek_accelerator_types
)
if encryption_spec_key_name and not valid_cmek_config:
raise ValueError(
'encryption_spec_key_name (CMEK) is only supported for GPU training'
Expand Down
Loading