Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Refactoring - Split core.py into smaller files #363

Merged
merged 1 commit into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# editor and IDE paraphernalia
.idea/
.vscode/

*__pycache__*
tmp/
Expand Down
4 changes: 0 additions & 4 deletions pytype-conf.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,8 @@ exclude =
src/xpk/commands
src/xpk/core/tests
src/xpk/core/__init__.py
src/xpk/core/app_profile.py
src/xpk/core/blueprint.py
src/xpk/core/cluster_private.py
src/xpk/core/commands.py
src/xpk/core/core.py
src/xpk/core/job_template.py
src/xpk/core/kjob.py
src/xpk/core/kueue.py
src/xpk/core/nap.py
Expand Down
6 changes: 3 additions & 3 deletions src/xpk/commands/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

from argparse import Namespace

from ..core.commands import run_command_for_value
from ..core.gcloud_context import add_zone_and_project
from ..core.kjob import AppProfileDefaults
from ..core.kueue import LOCAL_QUEUE_NAME
from ..utils.console import xpk_exit, xpk_print
from .common import set_cluster_command
from ..core.core import add_zone_and_project
from ..core.kjob import AppProfileDefaults
from ..core.commands import run_command_for_value
from .kind import set_local_cluster_command


Expand Down
46 changes: 22 additions & 24 deletions src/xpk/commands/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,58 +14,56 @@
limitations under the License.
"""

from ..core.commands import run_command_for_value, run_command_with_updates
from ..core.core import (
VERTEX_TENSORBOARD_FEATURE_FLAG,
add_zone_and_project,
create_cluster_configmaps,
create_cluster_network_config,
create_vertex_tensorboard,
delete_cluster_subnets,
from tabulate import tabulate

from ..core.capacity import H100_DEVICE_TYPE
from ..core.cluster import (
get_all_clusters_programmatic,
get_cluster_credentials,
get_gke_control_plane_version,
get_gke_node_pool_version,
get_gke_server_config,
h100_device_type,
install_nccl_on_cluster,
run_gke_node_pool_create_command,
set_jobset_on_cluster,
set_up_cluster_network_for_gpu,
setup_k8s_env,
update_cluster_with_gcsfuse_driver_if_necessary,
update_cluster_with_workload_identity_if_necessary,
zone_to_region,
get_user_input,
)
from ..core.cluster_private import authorize_private_cluster_access_if_necessary
from ..core.kjob import (
verify_kjob_installed,
prepare_kjob,
apply_kjob_crds,
from ..core.commands import run_command_for_value, run_command_with_updates
from ..core.config import VERTEX_TENSORBOARD_FEATURE_FLAG
from ..core.gcloud_context import (
add_zone_and_project,
get_gke_control_plane_version,
get_gke_server_config,
zone_to_region,
)
from ..core.kjob import apply_kjob_crds, prepare_kjob, verify_kjob_installed
from ..core.kueue import (
cluster_preheat_yml,
install_kueue_crs,
install_kueue_on_cluster,
wait_for_kueue_available,
)
from ..core.nap import enable_autoprovisioning_on_cluster
from ..core.network import (
create_cluster_network_config,
delete_cluster_subnets,
set_up_cluster_network_for_gpu,
)
from ..core.nodepool import get_gke_node_pool_version, run_gke_node_pool_create_command
from ..core.ray import install_ray_cluster
from ..core.resources import create_cluster_configmaps
from ..core.storage import install_storage_crd
from ..core.system_characteristics import (
AcceleratorType,
AcceleratorTypeToAcceleratorCharacteristics,
SystemCharacteristics,
get_system_characteristics,
)
from ..core.vertex import create_vertex_tensorboard
from ..core.workload import get_workload_list
from ..utils.console import get_user_input, xpk_exit, xpk_print
from ..utils.file import write_tmp_file
from ..utils.console import xpk_exit, xpk_print
from . import cluster_gcluster

from tabulate import tabulate


def cluster_create(args) -> None:
"""Function around cluster creation.
Expand Down Expand Up @@ -148,7 +146,7 @@ def cluster_create(args) -> None:
if set_up_cluster_network_code != 0:
xpk_exit(set_up_cluster_network_code)

if system.device_type == h100_device_type:
if system.device_type == H100_DEVICE_TYPE:
xpk_print('Creating Network Config for cluster')
create_cluster_network_config_code = create_cluster_network_config(args)
if create_cluster_network_config_code != 0:
Expand Down
16 changes: 12 additions & 4 deletions src/xpk/commands/cluster_gcluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,24 @@
limitations under the License.
"""

from ..core.blueprint.blueprint_generator import BlueprintGenerator, BlueprintGeneratorOutput, supported_device_types, a3mega_device_type, a3ultra_device_type
import os

from ..core.blueprint.blueprint_generator import (
BlueprintGenerator,
BlueprintGeneratorOutput,
a3mega_device_type,
a3ultra_device_type,
supported_device_types,
)
from ..core.capacity import get_capacity_type
from ..core.docker_manager import DockerManager
from ..core.gcloud_context import zone_to_region
from ..core.gcluster_manager import GclusterManager
from ..core.core import zone_to_region, get_capacity_type
from ..utils.console import xpk_exit, xpk_print
from ..utils.network import all_IPs_cidr
from ..utils.file import ensure_directory_exists
from ..utils.network import all_IPs_cidr
from ..utils.objects import hash_string
from .common import set_cluster_command
import os

blueprints_path = os.path.abspath('xpkclusters/blueprints')
gcluster_working_dir = os.path.abspath('xpkclusters/gcluster-out')
Expand Down
2 changes: 1 addition & 1 deletion src/xpk/commands/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"""

from ..core.commands import run_command_with_updates_retry
from ..core.core import zone_to_region
from ..core.gcloud_context import zone_to_region
from ..utils.console import xpk_print


Expand Down
18 changes: 8 additions & 10 deletions src/xpk/commands/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,17 @@
limitations under the License.
"""

from ..utils.console import xpk_exit, xpk_print
from ..core.kueue import verify_kueuectl
from .common import set_cluster_command
from ..core.commands import (
run_command_for_value,
)
from ..core.core import (
add_zone_and_project,
)
import json
from tabulate import tabulate
from argparse import Namespace

from tabulate import tabulate

from ..core.commands import run_command_for_value
from ..core.gcloud_context import add_zone_and_project
from ..core.kueue import verify_kueuectl
from ..utils.console import xpk_exit, xpk_print
from .common import set_cluster_command

table_fmt = 'plain'


Expand Down
12 changes: 4 additions & 8 deletions src/xpk/commands/inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,13 @@
limitations under the License.
"""

from ..core.cluster import get_cluster_credentials
from ..core.commands import run_command_for_value
from ..core.core import (
CLUSTER_METADATA_CONFIGMAP,
CLUSTER_RESOURCES_CONFIGMAP,
add_zone_and_project,
get_cluster_credentials,
zone_to_region,
)
from ..core.gcloud_context import add_zone_and_project, zone_to_region
from ..core.kueue import CLUSTER_QUEUE_NAME, LOCAL_QUEUE_NAME
from ..utils.file import append_tmp_file, write_tmp_file
from ..core.resources import CLUSTER_METADATA_CONFIGMAP, CLUSTER_RESOURCES_CONFIGMAP
from ..utils.console import xpk_exit, xpk_print
from ..utils.file import append_tmp_file, write_tmp_file
from .workload import get_workload_list


Expand Down
16 changes: 9 additions & 7 deletions src/xpk/commands/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,18 @@
limitations under the License.
"""

from .common import set_cluster_command
from .kind import set_local_cluster_command
from ..core.commands import run_command_for_value, run_command_with_updates
from ..utils.console import xpk_exit, xpk_print
from ..core.kjob import AppProfileDefaults
from ..core.core import add_zone_and_project
from ruamel.yaml import YAML
import re
import sys

from ruamel.yaml import YAML

from ..core.commands import run_command_for_value, run_command_with_updates
from ..core.gcloud_context import add_zone_and_project
from ..core.kjob import AppProfileDefaults
from ..utils.console import xpk_exit, xpk_print
from .common import set_cluster_command
from .kind import set_local_cluster_command


def job_info(args):
"""Run commands obtaining information about a job given by name.
Expand Down
2 changes: 1 addition & 1 deletion src/xpk/commands/kind.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
run_command_for_value,
run_command_with_updates,
)
from ..core.core import (
from ..core.cluster import (
set_jobset_on_cluster,
)
from ..core.kjob import (
Expand Down
6 changes: 3 additions & 3 deletions src/xpk/commands/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@

from argparse import Namespace

from ..core.commands import run_command_with_full_controls
from ..core.gcloud_context import add_zone_and_project
from ..core.kjob import AppProfileDefaults
from ..core.kueue import LOCAL_QUEUE_NAME
from ..utils.console import xpk_exit, xpk_print
from .common import set_cluster_command
from ..core.core import add_zone_and_project
from ..core.kjob import AppProfileDefaults
from ..core.commands import run_command_with_full_controls
from .kind import set_local_cluster_command


Expand Down
2 changes: 1 addition & 1 deletion src/xpk/commands/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from kubernetes import client as k8s_client
from kubernetes.client.rest import ApiException

from ..core.core import (
from ..core.cluster import (
setup_k8s_env,
update_cluster_with_gcsfuse_driver_if_necessary,
update_cluster_with_workload_identity_if_necessary,
Expand Down
2 changes: 1 addition & 1 deletion src/xpk/commands/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
limitations under the License.
"""

from ..core.core import __version__
from ..core.config import __version__
from ..utils.console import xpk_print


Expand Down
Loading
Loading