Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Fix issue #4856 by copying environment variables #5115

Merged
merged 5 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 17 additions & 33 deletions src/sagemaker/workflow/notebook_job_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,49 +13,33 @@
"""The notebook job step definitions for workflow."""
from __future__ import absolute_import

import os
import re
import shutil
import os
from typing import Dict, List, Optional, Union

from typing import (
List,
Optional,
Union,
Dict,
from sagemaker import vpc_utils
from sagemaker.config.config_schema import (
NOTEBOOK_JOB_ROLE_ARN,
NOTEBOOK_JOB_S3_KMS_KEY_ID,
NOTEBOOK_JOB_S3_ROOT_URI,
NOTEBOOK_JOB_VOLUME_KMS_KEY_ID,
NOTEBOOK_JOB_VPC_CONFIG_SECURITY_GROUP_IDS,
NOTEBOOK_JOB_VPC_CONFIG_SUBNETS,
)

from sagemaker.s3 import S3Uploader
from sagemaker.s3_utils import s3_path_join
from sagemaker.session import get_execution_role
from sagemaker.utils import Tags, _tmpdir, format_tags, name_from_base, resolve_value_from_config
from sagemaker.workflow.entities import PipelineVariable, RequestType
from sagemaker.workflow.execution_variables import ExecutionVariables
from sagemaker.workflow.functions import Join
from sagemaker.workflow.properties import Properties
from sagemaker.workflow.retry import RetryPolicy
from sagemaker.workflow.steps import (
Step,
ConfigurableRetryStep,
StepTypeEnum,
)
from sagemaker.workflow.step_collections import StepCollection
from sagemaker.workflow.step_outputs import StepOutput

from sagemaker.workflow.entities import (
RequestType,
PipelineVariable,
)
from sagemaker.workflow.steps import ConfigurableRetryStep, Step, StepTypeEnum
from sagemaker.workflow.utilities import _collect_parameters, load_step_compilation_context
from sagemaker.session import get_execution_role

from sagemaker.s3_utils import s3_path_join
from sagemaker.s3 import S3Uploader
from sagemaker.utils import _tmpdir, name_from_base, resolve_value_from_config, format_tags, Tags
from sagemaker import vpc_utils

from sagemaker.config.config_schema import (
NOTEBOOK_JOB_ROLE_ARN,
NOTEBOOK_JOB_S3_ROOT_URI,
NOTEBOOK_JOB_S3_KMS_KEY_ID,
NOTEBOOK_JOB_VOLUME_KMS_KEY_ID,
NOTEBOOK_JOB_VPC_CONFIG_SUBNETS,
NOTEBOOK_JOB_VPC_CONFIG_SECURITY_GROUP_IDS,
)


# disable E1101 as collect_parameters decorator sets the attributes
Expand Down Expand Up @@ -374,7 +358,7 @@ def _prepare_env_variables(self):
execution mechanism.
"""

job_envs = self.environment_variables if self.environment_variables else {}
job_envs = dict(self.environment_variables or {})
system_envs = {
"AWS_DEFAULT_REGION": self._region_from_session,
"SM_JOB_DEF_VERSION": "1.0",
Expand Down
63 changes: 62 additions & 1 deletion tests/unit/sagemaker/workflow/test_notebook_job_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
# language governing permissions and limitations under the License.
from __future__ import absolute_import

import os
import unittest

from mock import Mock, patch

from sagemaker.workflow.notebook_job_step import NotebookJobStep
from sagemaker.workflow.functions import Join
from sagemaker.workflow.notebook_job_step import NotebookJobStep

REGION = "us-west-2"
PIPELINE_NAME = "test-pipeline-name"
Expand Down Expand Up @@ -573,3 +575,62 @@ def _create_step_with_required_fields(self):
image_uri=IMAGE_URI,
kernel_name=KERNEL_NAME,
)

def test_environment_variables_not_shared(self):
"""Test that environment variables are not shared between NotebookJob steps"""
# Setup shared environment variables
shared_env_vars = {"test": "test"}

# Create two steps with the same environment variables dictionary
step1 = NotebookJobStep(
name="step1",
input_notebook=INPUT_NOTEBOOK,
image_uri=IMAGE_URI,
kernel_name=KERNEL_NAME,
environment_variables=shared_env_vars,
)

step2 = NotebookJobStep(
name="step2",
input_notebook=INPUT_NOTEBOOK,
image_uri=IMAGE_URI,
kernel_name=KERNEL_NAME,
environment_variables=shared_env_vars,
)

# Get the arguments for both steps
step1_args = step1.arguments
step2_args = step2.arguments

# Verify that the environment variables are different objects
self.assertIsNot(
step1_args["Environment"],
step2_args["Environment"],
"Environment dictionaries should be different objects",
)

# Verify that modifying one step's environment doesn't affect the other
step1_env = step1_args["Environment"]
step2_env = step2_args["Environment"]

# Both should have the original test value
self.assertEqual(step1_env["test"], "test")
self.assertEqual(step2_env["test"], "test")

# Modify step1's environment
step1_env["test"] = "modified"

# Verify step2's environment remains unchanged
self.assertEqual(step2_env["test"], "test")

# Verify notebook names are correct for each step
self.assertEqual(
step1_env["SM_INPUT_NOTEBOOK_NAME"],
os.path.basename(INPUT_NOTEBOOK),
"Step 1 should have its own notebook name",
)
self.assertEqual(
step2_env["SM_INPUT_NOTEBOOK_NAME"],
os.path.basename(INPUT_NOTEBOOK),
"Step 2 should have its own notebook name",
)