From 489b00da16cb2d411cedd83f17c3db65da815fdd Mon Sep 17 00:00:00 2001 From: Nathan Park Date: Mon, 17 Feb 2025 13:55:01 -0800 Subject: [PATCH 1/5] Fix hyperparameter strategy docs --- src/sagemaker/tuner.py | 46 ++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/src/sagemaker/tuner.py b/src/sagemaker/tuner.py index 4b0f38f36f..fa8f9b8555 100644 --- a/src/sagemaker/tuner.py +++ b/src/sagemaker/tuner.py @@ -18,21 +18,20 @@ import inspect import json import logging - from enum import Enum -from typing import Union, Dict, Optional, List, Set +from typing import Dict, List, Optional, Set, Union import sagemaker from sagemaker.amazon.amazon_estimator import ( - RecordSet, AmazonAlgorithmEstimatorBase, FileSystemRecordSet, + RecordSet, ) from sagemaker.amazon.hyperparameter import Hyperparameter as hp # noqa from sagemaker.analytics import HyperparameterTuningJobAnalytics from sagemaker.deprecations import removed_function -from sagemaker.estimator import Framework, EstimatorBase -from sagemaker.inputs import TrainingInput, FileSystemInput +from sagemaker.estimator import EstimatorBase, Framework +from sagemaker.inputs import FileSystemInput, TrainingInput from sagemaker.job import _Job from sagemaker.jumpstart.utils import ( add_jumpstart_uri_tags, @@ -44,18 +43,17 @@ IntegerParameter, ParameterRange, ) -from sagemaker.workflow.entities import PipelineVariable -from sagemaker.workflow.pipeline_context import runnable_by_pipeline - from sagemaker.session import Session from sagemaker.utils import ( + Tags, base_from_name, base_name_from_image, + format_tags, name_from_base, to_string, - format_tags, - Tags, ) +from sagemaker.workflow.entities import PipelineVariable +from sagemaker.workflow.pipeline_context import runnable_by_pipeline AMAZON_ESTIMATOR_MODULE = "sagemaker" AMAZON_ESTIMATOR_CLS_NAMES = { @@ -133,15 +131,12 @@ def __init__( if warm_start_type not in list(WarmStartTypes): raise ValueError( - "Invalid type: {}, valid warm start types are: {}".format( - warm_start_type, list(WarmStartTypes) - ) + f"Invalid type: {warm_start_type}, " + f"valid warm start types are: {list(WarmStartTypes)}" ) if not parents: - raise ValueError( - "Invalid parents: {}, parents should not be None/empty".format(parents) - ) + raise ValueError(f"Invalid parents: {parents}, parents should not be None/empty") self.type = warm_start_type self.parents = set(parents) @@ -1455,9 +1450,7 @@ def _get_best_training_job(self): return tuning_job_describe_result["BestTrainingJob"] except KeyError: raise Exception( - "Best training job not available for tuning job: {}".format( - self.latest_tuning_job.name - ) + f"Best training job not available for tuning job: {self.latest_tuning_job.name}" ) def _ensure_last_tuning_job(self): @@ -1920,8 +1913,11 @@ def create( :meth:`~sagemaker.tuner.HyperparameterTuner.fit` method launches. If not specified, a default job name is generated, based on the training image name and current timestamp. - strategy (str): Strategy to be used for hyperparameter estimations - (default: 'Bayesian'). + strategy (str or PipelineVariable): Strategy to be used for hyperparameter estimations. + More information about different strategies: + https://docs.aws.amazon.com/sagemaker/latest/dg/automatic-model-tuning-how-it-works.html. + Available options are: 'Bayesian', 'Random', 'Hyperband', + 'Grid' (default: 'Bayesian') strategy_config (dict): The configuration for a training job launched by a hyperparameter tuning job. completion_criteria_config (dict): The configuration for tuning job completion criteria. @@ -2080,21 +2076,19 @@ def _validate_dict_argument(cls, name, value, allowed_keys, require_same_keys=Fa return if not isinstance(value, dict): - raise ValueError( - "Argument '{}' must be a dictionary using {} as keys".format(name, allowed_keys) - ) + raise ValueError(f"Argument '{name}' must be a dictionary using {allowed_keys} as keys") value_keys = sorted(value.keys()) if require_same_keys: if value_keys != allowed_keys: raise ValueError( - "The keys of argument '{}' must be the same as {}".format(name, allowed_keys) + f"The keys of argument '{name}' must be the same as {allowed_keys}" ) else: if not set(value_keys).issubset(set(allowed_keys)): raise ValueError( - "The keys of argument '{}' must be a subset of {}".format(name, allowed_keys) + f"The keys of argument '{name}' must be a subset of {allowed_keys}" ) def _add_estimator( From 0627fd34fbc852b7cc16efa7b93ca7564d348918 Mon Sep 17 00:00:00 2001 From: Nathan Park Date: Wed, 19 Feb 2025 09:05:08 -0800 Subject: [PATCH 2/5] Fix py version docs WIP --- src/sagemaker/fw_utils.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 84d2f1eb1c..a743ead9ee 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -17,23 +17,22 @@ import logging import os import re -import time import shutil import tempfile +import time from collections import namedtuple -from typing import List, Optional, Union, Dict +from typing import Dict, List, Optional, Union + from packaging import version import sagemaker.image_uris +import sagemaker.utils +from sagemaker.deprecations import deprecation_warn_base, renamed_kwargs, renamed_warning from sagemaker.instance_group import InstanceGroup from sagemaker.s3_utils import s3_path_join from sagemaker.session_settings import SessionSettings -import sagemaker.utils from sagemaker.workflow import is_pipeline_variable - -from sagemaker.deprecations import renamed_warning, renamed_kwargs from sagemaker.workflow.entities import PipelineVariable -from sagemaker.deprecations import deprecation_warn_base logger = logging.getLogger(__name__) @@ -691,7 +690,7 @@ def validate_smdistributed( instance_type (str): A string representing the type of training instance selected. framework_name (str): A string representing the name of framework selected. framework_version (str): A string representing the framework version selected. - py_version (str): A string representing the python version selected. + py_version (str): A string representing the python version selected. Ex: `py38, py39, py310, py311` distribution (dict): A dictionary with information to enable distributed training. (Defaults to None if distributed training is not enabled.) For example: @@ -763,7 +762,7 @@ def _validate_smdataparallel_args( instance_type (str): A string representing the type of training instance selected. Ex: `ml.p3.16xlarge` framework_name (str): A string representing the name of framework selected. Ex: `tensorflow` framework_version (str): A string representing the framework version selected. Ex: `2.3.1` - py_version (str): A string representing the python version selected. Ex: `py3` + py_version (str): A string representing the python version selected. Ex: `py38, py39, py310, py311` distribution (dict): A dictionary with information to enable distributed training. (Defaults to None if distributed training is not enabled.) Ex: @@ -846,7 +845,7 @@ def validate_distribution( instance_groups ([InstanceGroup]): A list contains instance groups used for training. framework_name (str): A string representing the name of framework selected. framework_version (str): A string representing the framework version selected. - py_version (str): A string representing the python version selected. + py_version (str): A string representing the python version selected. Ex: `py38, py39, py310, py311` image_uri (str): A string representing a Docker image URI. kwargs(dict): Additional kwargs passed to this function @@ -1009,7 +1008,7 @@ def validate_torch_distributed_distribution( } } framework_version (str): A string representing the framework version selected. - py_version (str): A string representing the python version selected. + py_version (str): A string representing the python version selected. Ex: `py38, py39, py310, py311` image_uri (str): A string representing a Docker image URI. entry_point (str or PipelineVariable): The absolute or relative path to the local Python source file that should be executed as the entry point to @@ -1162,7 +1161,7 @@ def validate_version_or_image_args(framework_version, py_version, image_uri): Args: framework_version (str): The version of the framework. - py_version (str): The version of Python. + py_version (str): A string representing the python version selected. Ex: `py38, py39, py310, py311` image_uri (str): The URI of the image. Raises: @@ -1194,9 +1193,8 @@ def create_image_uri( instance_type (str): SageMaker instance type. Used to determine device type (cpu/gpu/family-specific optimized). framework_version (str): The version of the framework. - py_version (str): Optional. Python version. If specified, should be one - of 'py2' or 'py3'. If not specified, image uri will not include a - python component. + py_version (str): Optional. Python version Ex: `py38, py39, py310, py311`. + If not specified, image uri will not include a python component. account (str): AWS account that contains the image. (default: '520713654638') accelerator_type (str): SageMaker Elastic Inference accelerator type. From 96e9f1caf10480f2a6daff84d858b6291e450704 Mon Sep 17 00:00:00 2001 From: Nathan Park Date: Thu, 20 Feb 2025 11:58:24 -0800 Subject: [PATCH 3/5] More formatting --- src/sagemaker/fw_utils.py | 22 +++++++-------- src/sagemaker/huggingface/estimator.py | 14 ++++------ src/sagemaker/processing.py | 38 +++++++++++++------------- 3 files changed, 34 insertions(+), 40 deletions(-) diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index a743ead9ee..e348f4c9f0 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -10,7 +10,7 @@ # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. -"""Utility methods used by framework classes""" +"""Utility methods used by framework classes.""" from __future__ import absolute_import import json @@ -40,6 +40,7 @@ UploadedCode = namedtuple("UploadedCode", ["s3_prefix", "script_name"]) """sagemaker.fw_utils.UploadedCode: An object containing the S3 prefix and script name. + This is for the source code used for the entry point with an ``Estimator``. It can be instantiated with positional or keyword arguments. """ @@ -210,7 +211,7 @@ def validate_source_code_input_against_pipeline_variables( git_config: Optional[Dict[str, str]] = None, enable_network_isolation: Union[bool, PipelineVariable] = False, ): - """Validate source code input against pipeline variables + """Validate source code input against pipeline variables. Args: entry_point (str or PipelineVariable): The path to the local Python source file that @@ -480,7 +481,7 @@ def tar_and_upload_dir( def _list_files_to_compress(script, directory): - """Placeholder docstring""" + """Placeholder docstring.""" if directory is None: return [script] @@ -619,8 +620,6 @@ def warn_if_parameter_server_with_multi_gpu(training_instance_type, distribution "enabled": True } } - - """ if training_instance_type == "local" or distribution is None: return @@ -645,7 +644,8 @@ def warn_if_parameter_server_with_multi_gpu(training_instance_type, distribution def profiler_config_deprecation_warning( profiler_config, image_uri, framework_name, framework_version ): - """Put out a deprecation message for if framework profiling is specified TF >= 2.12 and PT >= 2.0""" + """Put out a deprecation message for if framework profiling is specified TF >= 2.12 and PT >= + 2.0.""" if profiler_config is None or profiler_config.framework_profile_params is None: return @@ -952,7 +952,7 @@ def validate_distribution( def validate_distribution_for_instance_type(instance_type, distribution): - """Check if the provided distribution strategy is supported for the instance_type + """Check if the provided distribution strategy is supported for the instance_type. Args: instance_type (str): A string representing the type of training instance selected. @@ -1071,7 +1071,7 @@ def validate_torch_distributed_distribution( def _is_gpu_instance(instance_type): - """Returns bool indicating whether instance_type supports GPU + """Returns bool indicating whether instance_type supports GPU. Args: instance_type (str): Name of the instance_type to check against. @@ -1090,7 +1090,7 @@ def _is_gpu_instance(instance_type): def _is_trainium_instance(instance_type): - """Returns bool indicating whether instance_type is a Trainium instance + """Returns bool indicating whether instance_type is a Trainium instance. Args: instance_type (str): Name of the instance_type to check against. @@ -1106,7 +1106,7 @@ def _is_trainium_instance(instance_type): def python_deprecation_warning(framework, latest_supported_version): - """Placeholder docstring""" + """Placeholder docstring.""" return PYTHON_2_DEPRECATION_WARNING.format( framework=framework, latest_supported_version=latest_supported_version ) @@ -1120,7 +1120,6 @@ def _region_supports_debugger(region_name): Returns: bool: Whether or not the region supports Amazon SageMaker Debugger. - """ return region_name.lower() not in DEBUGGER_UNSUPPORTED_REGIONS @@ -1133,7 +1132,6 @@ def _region_supports_profiler(region_name): Returns: bool: Whether or not the region supports Amazon SageMaker Debugger profiling feature. - """ return region_name.lower() not in PROFILER_UNSUPPORTED_REGIONS diff --git a/src/sagemaker/huggingface/estimator.py b/src/sagemaker/huggingface/estimator.py index f3e655f1f1..70cc17b209 100644 --- a/src/sagemaker/huggingface/estimator.py +++ b/src/sagemaker/huggingface/estimator.py @@ -15,17 +15,13 @@ import logging import re -from typing import Optional, Union, Dict +from typing import Dict, Optional, Union -from sagemaker.estimator import Framework, EstimatorBase -from sagemaker.fw_utils import ( - framework_name_from_image, - validate_distribution, -) +from sagemaker.estimator import EstimatorBase, Framework +from sagemaker.fw_utils import framework_name_from_image, validate_distribution from sagemaker.huggingface.model import HuggingFaceModel -from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT - from sagemaker.huggingface.training_compiler.config import TrainingCompilerConfig +from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT from sagemaker.workflow.entities import PipelineVariable logger = logging.getLogger("sagemaker") @@ -66,7 +62,7 @@ def __init__( Args: py_version (str): Python version you want to use for executing your model training code. Defaults to ``None``. Required unless ``image_uri`` is provided. If - using PyTorch, the current supported version is ``py36``. If using TensorFlow, + using PyTorch, the current supported version is ``py39``. If using TensorFlow, the current supported version is ``py37``. entry_point (str or PipelineVariable): Path (absolute or relative) to the Python source file which should be executed as the entry point to training. diff --git a/src/sagemaker/processing.py b/src/sagemaker/processing.py index 2946cb2540..d8674f269d 100644 --- a/src/sagemaker/processing.py +++ b/src/sagemaker/processing.py @@ -18,51 +18,51 @@ """ from __future__ import absolute_import +import logging import os import pathlib -import logging +import re +from copy import copy from textwrap import dedent from typing import Dict, List, Optional, Union -from copy import copy -import re import attr - from six.moves.urllib.parse import urlparse from six.moves.urllib.request import url2pathname + from sagemaker import s3 +from sagemaker.apiutils._base_types import ApiObject from sagemaker.config import ( + PROCESSING_JOB_ENABLE_NETWORK_ISOLATION_PATH, + PROCESSING_JOB_ENVIRONMENT_PATH, + PROCESSING_JOB_INTER_CONTAINER_ENCRYPTION_PATH, PROCESSING_JOB_KMS_KEY_ID_PATH, + PROCESSING_JOB_ROLE_ARN_PATH, PROCESSING_JOB_SECURITY_GROUP_IDS_PATH, PROCESSING_JOB_SUBNETS_PATH, - PROCESSING_JOB_ENABLE_NETWORK_ISOLATION_PATH, PROCESSING_JOB_VOLUME_KMS_KEY_ID_PATH, - PROCESSING_JOB_ROLE_ARN_PATH, - PROCESSING_JOB_INTER_CONTAINER_ENCRYPTION_PATH, - PROCESSING_JOB_ENVIRONMENT_PATH, ) +from sagemaker.dataset_definition.inputs import DatasetDefinition, S3Input from sagemaker.job import _Job from sagemaker.local import LocalSession from sagemaker.network import NetworkConfig +from sagemaker.s3 import S3Uploader +from sagemaker.session import Session from sagemaker.utils import ( + Tags, base_name_from_image, + check_and_get_run_experiment_config, + format_tags, get_config_value, name_from_base, - check_and_get_run_experiment_config, - resolve_value_from_config, resolve_class_attribute_from_config, - Tags, - format_tags, + resolve_value_from_config, ) -from sagemaker.session import Session from sagemaker.workflow import is_pipeline_variable +from sagemaker.workflow.entities import PipelineVariable +from sagemaker.workflow.execution_variables import ExecutionVariables from sagemaker.workflow.functions import Join from sagemaker.workflow.pipeline_context import runnable_by_pipeline -from sagemaker.workflow.execution_variables import ExecutionVariables -from sagemaker.workflow.entities import PipelineVariable -from sagemaker.dataset_definition.inputs import S3Input, DatasetDefinition -from sagemaker.apiutils._base_types import ApiObject -from sagemaker.s3 import S3Uploader logger = logging.getLogger(__name__) @@ -1465,7 +1465,7 @@ def __init__( instance_type (str or PipelineVariable): The type of EC2 instance to use for processing, for example, 'ml.c4.xlarge'. py_version (str): Python version you want to use for executing your - model training code. One of 'py2' or 'py3'. Defaults to 'py3'. Value + model training code. Ex `py38, py39, py310, py311`. Value is ignored when ``image_uri`` is provided. image_uri (str or PipelineVariable): The URI of the Docker image to use for the processing jobs (default: None). From 5d179a81fe6620b8b7b1fa3bad7f691a15a3b55d Mon Sep 17 00:00:00 2001 From: Nathan Park Date: Thu, 20 Feb 2025 12:01:17 -0800 Subject: [PATCH 4/5] Fix too long lines --- src/sagemaker/fw_utils.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index e348f4c9f0..69163f0e25 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -585,7 +585,6 @@ def model_code_key_prefix(code_location_key_prefix, model_name, image): The location returned is a potential concatenation of 2 parts 1. code_location_key_prefix if it exists 2. model_name or a name derived from the image - Args: code_location_key_prefix (str): the s3 key prefix from code_location model_name (str): the name of the model @@ -690,7 +689,8 @@ def validate_smdistributed( instance_type (str): A string representing the type of training instance selected. framework_name (str): A string representing the name of framework selected. framework_version (str): A string representing the framework version selected. - py_version (str): A string representing the python version selected. Ex: `py38, py39, py310, py311` + py_version (str): A string representing the python version selected. + Ex: `py38, py39, py310, py311` distribution (dict): A dictionary with information to enable distributed training. (Defaults to None if distributed training is not enabled.) For example: @@ -762,7 +762,8 @@ def _validate_smdataparallel_args( instance_type (str): A string representing the type of training instance selected. Ex: `ml.p3.16xlarge` framework_name (str): A string representing the name of framework selected. Ex: `tensorflow` framework_version (str): A string representing the framework version selected. Ex: `2.3.1` - py_version (str): A string representing the python version selected. Ex: `py38, py39, py310, py311` + py_version (str): A string representing the python version selected. + Ex: `py38, py39, py310, py311` distribution (dict): A dictionary with information to enable distributed training. (Defaults to None if distributed training is not enabled.) Ex: @@ -845,7 +846,8 @@ def validate_distribution( instance_groups ([InstanceGroup]): A list contains instance groups used for training. framework_name (str): A string representing the name of framework selected. framework_version (str): A string representing the framework version selected. - py_version (str): A string representing the python version selected. Ex: `py38, py39, py310, py311` + py_version (str): A string representing the python version selected. + Ex: `py38, py39, py310, py311` image_uri (str): A string representing a Docker image URI. kwargs(dict): Additional kwargs passed to this function @@ -1008,7 +1010,8 @@ def validate_torch_distributed_distribution( } } framework_version (str): A string representing the framework version selected. - py_version (str): A string representing the python version selected. Ex: `py38, py39, py310, py311` + py_version (str): A string representing the python version selected. + Ex: `py38, py39, py310, py311` image_uri (str): A string representing a Docker image URI. entry_point (str or PipelineVariable): The absolute or relative path to the local Python source file that should be executed as the entry point to @@ -1159,7 +1162,8 @@ def validate_version_or_image_args(framework_version, py_version, image_uri): Args: framework_version (str): The version of the framework. - py_version (str): A string representing the python version selected. Ex: `py38, py39, py310, py311` + py_version (str): A string representing the python version selected. + Ex: `py38, py39, py310, py311` image_uri (str): The URI of the image. Raises: From 0299688043d5a958020ab7cbf3f07bb5eec507cf Mon Sep 17 00:00:00 2001 From: Nathan Park Date: Thu, 20 Feb 2025 14:13:36 -0800 Subject: [PATCH 5/5] Fix codestyle --- src/sagemaker/fw_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/sagemaker/fw_utils.py b/src/sagemaker/fw_utils.py index 69163f0e25..0e4e582261 100644 --- a/src/sagemaker/fw_utils.py +++ b/src/sagemaker/fw_utils.py @@ -643,8 +643,7 @@ def warn_if_parameter_server_with_multi_gpu(training_instance_type, distribution def profiler_config_deprecation_warning( profiler_config, image_uri, framework_name, framework_version ): - """Put out a deprecation message for if framework profiling is specified TF >= 2.12 and PT >= - 2.0.""" + """Deprecation message if framework profiling is specified TF >= 2.12 and PT >= 2.0.""" if profiler_config is None or profiler_config.framework_profile_params is None: return