diff --git a/.gitignore b/.gitignore index 09935a1dc9..1ac75202b2 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,9 @@ scratch/ *.egg examples/tensorflow/distributed_mnist/data *.iml + +# Sphinx documentation +docs/_build/ doc/_build doc/_static doc/_templates diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000000..41a568b194 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,24 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.10" + +sphinx: + configuration: docs/conf.py + +formats: + - pdf + - epub + +python: + install: + - requirements: docs/requirements.txt + - method: pip + path: . + extra_requirements: + - docs \ No newline at end of file diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000000..fe8e88c6e1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,12 @@ +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/custom.css b/docs/_static/custom.css new file mode 100644 index 0000000000..f618f6c891 --- /dev/null +++ b/docs/_static/custom.css @@ -0,0 +1,174 @@ +/* What's New Section Styling */ +.whats-new-container { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); + gap: 20px; + margin: 30px 0; +} + +.new-feature-card { + background: var(--pst-color-surface); + border: 1px solid var(--pst-color-border); + border-radius: 12px; + padding: 25px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1); + transition: transform 0.2s ease, box-shadow 0.2s ease; + position: relative; + overflow: hidden; + color: var(--pst-color-text-base); +} + +[data-theme="dark"] .new-feature-card { + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3); +} + +.new-feature-card:hover { + transform: translateY(-2px); + box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15); +} + +[data-theme="dark"] .new-feature-card:hover { + box-shadow: 0 8px 25px rgba(0, 0, 0, 0.4); +} + +.new-feature-card h3 { + margin-top: 0; + margin-bottom: 15px; + font-size: 1.3em; + font-weight: 600; + color: inherit; +} + +.new-feature-card ul { + margin: 15px 0; + padding-left: 20px; +} + +.new-feature-card li { + margin-bottom: 8px; + line-height: 1.5; +} + +/* Feedback Component */ +.feedback-container { + border-top: 1px solid var(--pst-color-border); + margin-top: 40px; + padding: 24px; + background-color: var(--pst-color-surface); + border-radius: 8px; + border: 1px solid var(--pst-color-border); +} + +.feedback-question { + font-size: 20px; + font-weight: 500; + color: var(--pst-color-text-base); + margin-bottom: 20px; + text-align: left; +} + +.feedback-buttons { + display: flex; + justify-content: flex-start; + gap: 12px; +} + +.feedback-btn { + display: flex; + align-items: center; + gap: 8px; + padding: 8px 16px; + border: 1px solid var(--pst-color-border); + border-radius: 6px; + background: var(--pst-color-surface); + color: var(--pst-color-text-muted); + font-size: 14px; + cursor: pointer; + transition: all 0.2s ease; +} + +.feedback-btn:hover { + border-color: var(--pst-color-primary); + background: var(--pst-color-surface); +} + +.feedback-btn.selected { + border-color: var(--pst-color-primary); + background: var(--pst-color-primary-bg); + color: var(--pst-color-primary); +} + +.feedback-btn svg { + width: 16px; + height: 16px; +} + +.feedback-thanks { + display: none; + color: #059669; + font-size: 14px; + text-align: center; + margin-top: 16px; +} + +/* Feedback Options */ +.feedback-options { + display: none; + margin-top: 20px; +} + +.feedback-more { + display: flex; + flex-direction: column; + gap: 12px; +} + +.feedback-github-link { + color: #0366d6; + text-decoration: none; + font-size: 14px; + padding: 10px 20px; + border: 1px solid #0366d6; + border-radius: 6px; + transition: all 0.2s; + font-weight: 500; + align-self: flex-start; +} + +.feedback-github-link:hover { + background-color: #0366d6; + color: white; + text-decoration: none; +} + +header { + background-color: white; + + box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05); + position: sticky; + top: 0; + z-index: 50; +} + +h1 { + font-size: 1.875rem; + font-weight: 700; + color: var(--pst-color-text-base); +} + +h2 { + font-size: 1.5rem; + font-weight: 700; + color: var(--pst-color-text-base); +} + +h3 { + font-size: 1.25rem; + font-weight: 500; + color: var(--pst-color-text-base); +} + +p { + font-size: 1.0rem; + color: var(--pst-color-text-base); +} diff --git a/docs/_static/feedback.js b/docs/_static/feedback.js new file mode 100644 index 0000000000..a0e9874aa0 --- /dev/null +++ b/docs/_static/feedback.js @@ -0,0 +1,71 @@ +// Feedback functionality +document.addEventListener('DOMContentLoaded', function() { + // Add feedback component to the very bottom of each page + const article = document.querySelector('article[role="main"]') || document.querySelector('.bd-article') || document.querySelector('main'); + if (article) { + const feedbackHTML = ` +
+
Was this page helpful?
+
+ + +
+
+ +
+
Thank you for your feedback!
+
+ `; + + article.insertAdjacentHTML('beforeend', feedbackHTML); + + // Add click handlers + const feedbackBtns = document.querySelectorAll('.feedback-btn'); + const thanksMessage = document.querySelector('.feedback-thanks'); + const feedbackOptions = document.querySelector('.feedback-options'); + + feedbackBtns.forEach(btn => { + btn.addEventListener('click', function() { + const feedback = this.dataset.feedback; + + // Remove selected class from all buttons + feedbackBtns.forEach(b => b.classList.remove('selected')); + + // Add selected class to clicked button + this.classList.add('selected'); + + if (feedback === 'yes') { + // Hide options and show thanks + feedbackOptions.style.display = 'none'; + thanksMessage.style.display = 'block'; + + // Send positive feedback + if (typeof gtag !== 'undefined') { + gtag('event', 'page_feedback', { + 'feedback_value': 'positive', + 'page_location': window.location.href + }); + } + } else { + // Show GitHub link for negative feedback + feedbackOptions.style.display = 'block'; + thanksMessage.style.display = 'none'; + } + }); + }); + } +}); diff --git a/docs/api/index.rst b/docs/api/index.rst new file mode 100644 index 0000000000..cb1b9ca5a6 --- /dev/null +++ b/docs/api/index.rst @@ -0,0 +1,13 @@ +API Reference +============= + +Complete API documentation for SageMaker Python SDK V3. + +.. toctree:: + :maxdepth: 2 + :caption: API Reference + + sagemaker_core + sagemaker_train + sagemaker_serve + sagemaker_mlops diff --git a/docs/api/sagemaker_core.rst b/docs/api/sagemaker_core.rst new file mode 100644 index 0000000000..bbad9631ab --- /dev/null +++ b/docs/api/sagemaker_core.rst @@ -0,0 +1,60 @@ +SageMaker Core +============== + +Core SageMaker resources and utilities for managing AWS SageMaker services. + +.. currentmodule:: sagemaker.core + +Core Resources +-------------- + +.. automodule:: sagemaker.core.resources + :members: + :undoc-members: + :show-inheritance: + +Session Management +------------------ + +.. automodule:: sagemaker.core.session_settings + :members: + :undoc-members: + +Configuration +------------- + +.. automodule:: sagemaker.core.config_schema + :members: + :undoc-members: + +Processing +---------- + +.. automodule:: sagemaker.core.processing + :members: + :undoc-members: + +Transformers +------------ + +.. automodule:: sagemaker.core.transformer + :members: + :undoc-members: + +Utilities +--------- + +.. automodule:: sagemaker.core.common_utils + :members: + :undoc-members: + +.. automodule:: sagemaker.core.image_uris + :members: + :undoc-members: + +Exceptions +---------- + +.. automodule:: sagemaker.core.exceptions + :members: + :undoc-members: diff --git a/docs/api/sagemaker_mlops.rst b/docs/api/sagemaker_mlops.rst new file mode 100644 index 0000000000..f67879111d --- /dev/null +++ b/docs/api/sagemaker_mlops.rst @@ -0,0 +1,30 @@ +SageMaker MLOps +=============== + +MLOps capabilities including pipelines, workflows, and model management. + +.. currentmodule:: sagemaker.mlops + +Pipeline Management +------------------- + +.. automodule:: sagemaker.mlops + :members: + :undoc-members: + :show-inheritance: + +Workflow Management +------------------- + +.. automodule:: sagemaker.mlops.workflow + :members: + :undoc-members: + :show-inheritance: + +Local Development +----------------- + +.. automodule:: sagemaker.mlops.local + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/sagemaker_serve.rst b/docs/api/sagemaker_serve.rst new file mode 100644 index 0000000000..471b79530b --- /dev/null +++ b/docs/api/sagemaker_serve.rst @@ -0,0 +1,14 @@ +SageMaker Serve +=============== + +Model serving and inference capabilities for deploying and managing ML models. + +.. currentmodule:: sagemaker.serve + +Model Deployment +---------------- + +.. automodule:: sagemaker.serve + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/sagemaker_train.rst b/docs/api/sagemaker_train.rst new file mode 100644 index 0000000000..d54b720a92 --- /dev/null +++ b/docs/api/sagemaker_train.rst @@ -0,0 +1,30 @@ +SageMaker Train +=============== + +Training capabilities including model training, hyperparameter tuning, and distributed training. + +.. currentmodule:: sagemaker.train + +Model Training +-------------- + +.. automodule:: sagemaker.train + :members: + :undoc-members: + :show-inheritance: + +Distributed Training +-------------------- + +.. automodule:: sagemaker.train.distributed + :members: + :undoc-members: + :show-inheritance: + +Model Evaluation +---------------- + +.. automodule:: sagemaker.train.evaluate + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000000..d9e282694f --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,86 @@ +import os +import sys +from datetime import datetime + +# Add the source directories to Python path +sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath('../sagemaker-core/src')) +sys.path.insert(0, os.path.abspath('../sagemaker-train/src')) +sys.path.insert(0, os.path.abspath('../sagemaker-serve/src')) +sys.path.insert(0, os.path.abspath('../sagemaker-mlops/src')) + +project = 'SageMaker Python SDK V3' +copyright = f'{datetime.now().year}, Amazon Web Services' +author = 'Amazon Web Services' + +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.napoleon', + 'sphinx.ext.viewcode', + 'sphinx.ext.intersphinx', + 'myst_nb', + 'sphinx_book_theme', + 'sphinx_design', + 'sphinx_copybutton' +] + +templates_path = ['_templates'] +exclude_patterns = [ + '_build', + 'Thumbs.db', + '.DS_Store', + 'sagemaker-core/docs/*', + 'sagemaker-core/CHANGELOG.md', + 'sagemaker-core/CONTRIBUTING.md', +] + +# Suppress specific warnings +suppress_warnings = [ + 'myst.header', # Suppress header level warnings from notebooks + 'toc.not_readable', # Suppress toctree warnings for symlinked files +] + +html_theme = 'sphinx_book_theme' +html_theme_options = { + 'repository_url': 'https://github.com/aws/sagemaker-python-sdk', + 'use_repository_button': True, + 'use_issues_button': True, + 'use_edit_page_button': False, + 'path_to_docs': 'docs/', + 'show_navbar_depth': 2, + 'show_toc_level': 2, + 'collapse_navbar': True, + 'announcement': 'This is V3 documentation. View V2 docs', +} + +html_static_path = ['_static'] +html_css_files = ['custom.css'] +html_js_files = ['feedback.js'] + +html_context = { + 'display_github': True, + 'github_user': 'aws', + 'github_repo': 'sagemaker-python-sdk', + 'github_version': 'master', + 'conf_py_path': '/docs/', + 'version_warning': True, + 'version_warning_text': 'This is the V3 documentation. For V2 documentation, visit the legacy docs.', +} + +nb_execution_mode = 'off' +nb_execution_allow_errors = True + +# Autodoc configuration +autodoc_default_options = { + 'members': True, + 'undoc-members': True, + 'show-inheritance': True, +} + +# Generate autosummary stubs +autosummary_generate = True + +# Don't mock imports - let them fail gracefully and show what's available +autodoc_mock_imports = [] +suppress_warnings = ['autodoc.import_error'] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000000..da965af497 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,38 @@ +SageMaker Python SDK V3 +========================= + +.. include:: overview.rst + +.. toctree:: + :maxdepth: 2 + :caption: Getting Started + :hidden: + + overview + installation + quickstart + +.. toctree:: + :maxdepth: 2 + :caption: V3 Capabilities + :hidden: + + model_customization/index + training/index + inference/index + ml_ops/index + sagemaker_core/index + +.. toctree:: + :maxdepth: 2 + :caption: Release + :hidden: + + releasenote + +.. toctree:: + :maxdepth: 2 + :caption: API Reference + :hidden: + + api/index diff --git a/docs/inference/index.rst b/docs/inference/index.rst new file mode 100644 index 0000000000..67f6978213 --- /dev/null +++ b/docs/inference/index.rst @@ -0,0 +1,197 @@ +Deploy Models for Inference +============================ + +SageMaker Python SDK V3 transforms model deployment and inference with the unified **ModelBuilder** class, replacing the complex framework-specific model classes from V2. This modern approach provides a consistent interface for all inference scenarios while maintaining the flexibility and performance you need. + +Key Benefits of V3 Inference +---------------------------- + +* **Unified Interface**: Single ``ModelBuilder`` class replaces multiple framework-specific model classes +* **Simplified Deployment**: Object-oriented API with intelligent defaults for endpoint configuration +* **Enhanced Performance**: Optimized inference pipelines with automatic scaling and load balancing +* **Multi-Modal Support**: Deploy models for real-time, batch, and serverless inference scenarios + +Quick Start Example +------------------- + +Here's how inference has evolved from V2 to V3: + +**SageMaker Python SDK V2:** + +.. code-block:: python + + from sagemaker.model import Model + from sagemaker.predictor import Predictor + + model = Model( + image_uri="my-inference-image", + model_data="s3://my-bucket/model.tar.gz", + role="arn:aws:iam::123456789012:role/SageMakerRole" + ) + predictor = model.deploy( + initial_instance_count=1, + instance_type="ml.m5.xlarge" + ) + result = predictor.predict(data) + +**SageMaker Python SDK V3:** + +.. code-block:: python + + from sagemaker.serve import ModelBuilder + + model_builder = ModelBuilder( + model="my-model", + model_path="s3://my-bucket/model.tar.gz" + ) + + model = model_builder.build(model_name="my-deployed-model") + + endpoint = model_builder.deploy( + endpoint_name="my-endpoint", + instance_type="ml.m5.xlarge", + initial_instance_count=1 + ) + + result = endpoint.invoke( + body=data, + content_type="application/json" + ) + +ModelBuilder Overview +-------------------- + +The ``ModelBuilder`` class is the cornerstone of SageMaker Python SDK V3 inference, providing a unified interface for all deployment scenarios. This single class replaces the complex web of framework-specific model classes from V2, offering: + +**Unified Deployment Interface** + One class handles PyTorch, TensorFlow, Scikit-learn, XGBoost, HuggingFace, and custom containers + +**Intelligent Optimization** + Automatically optimizes model serving configuration based on your model characteristics + +**Flexible Deployment Options** + Support for real-time endpoints, batch transform, and serverless inference + +**Seamless Integration** + Works seamlessly with SageMaker features like auto-scaling, multi-model endpoints, and A/B testing + +.. code-block:: python + + from sagemaker.serve import ModelBuilder + + model_builder = ModelBuilder( + model="your-model", + model_path="s3://your-bucket/model-artifacts", + role="your-sagemaker-role" + ) + + model = model_builder.build(model_name="my-model") + + endpoint = model_builder.deploy( + endpoint_name="my-endpoint", + instance_type="ml.m5.xlarge", + initial_instance_count=1 + ) + + response = endpoint.invoke( + body={"inputs": "your-input-data"}, + content_type="application/json" + ) + +Inference Capabilities +---------------------- + +Model Optimization Support +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +V3 introduces powerful model optimization capabilities for enhanced performance: + +* **SageMaker Neo** - Optimize models for specific hardware targets +* **TensorRT Integration** - Accelerate deep learning inference on NVIDIA GPUs +* **ONNX Runtime** - Cross-platform model optimization and acceleration +* **Quantization Support** - Reduce model size and improve inference speed + +**Model Optimization Example:** + +.. code-block:: python + + from sagemaker.serve import ModelBuilder + + # Create ModelBuilder with optimization settings + model_builder = ModelBuilder( + model="huggingface-bert-base", + role="your-sagemaker-role" + ) + + # Build and deploy with optimization + model = model_builder.build(model_name="optimized-bert") + endpoint = model_builder.deploy( + endpoint_name="bert-endpoint", + instance_type="ml.inf1.xlarge", + initial_instance_count=1 + ) + +Key Inference Features +~~~~~~~~~~~~~~~~~~~~~ + +* **Multi-Model Endpoints** - Host multiple models on a single endpoint with automatic model loading and unloading for cost optimization +* **Auto-Scaling Integration** - Automatically scale endpoint capacity based on traffic patterns with configurable scaling policies +* **A/B Testing Support** - Deploy multiple model variants with traffic splitting for safe model updates and performance comparison +* **Batch Transform Jobs** - Process large datasets efficiently with automatic data partitioning and parallel processing +* **Serverless Inference** - Pay-per-request pricing with automatic scaling from zero to handle variable workloads + +Supported Inference Scenarios +----------------------------- + +Deployment Types +~~~~~~~~~~~~~~~ + +* **Real-Time Endpoints** - Low-latency inference for interactive applications +* **Batch Transform** - High-throughput processing for large datasets +* **Serverless Inference** - Cost-effective inference for variable workloads +* **Multi-Model Endpoints** - Host multiple models on shared infrastructure + +Framework Support +~~~~~~~~~~~~~~~~~ + +* **PyTorch** - Deep learning models with dynamic computation graphs +* **TensorFlow** - Production-ready machine learning models at scale +* **Scikit-learn** - Classical machine learning algorithms +* **XGBoost** - Gradient boosting models for structured data +* **HuggingFace** - Pre-trained transformer models for NLP tasks +* **Custom Containers** - Bring your own inference logic and dependencies + +Advanced Features +~~~~~~~~~~~~~~~~ + +* **Model Monitoring** - Track model performance and data drift in production +* **Endpoint Security** - VPC support, encryption, and IAM-based access control +* **Multi-AZ Deployment** - High availability with automatic failover +* **Custom Inference Logic** - Implement preprocessing, postprocessing, and custom prediction logic + +Migration from V2 +------------------ + +If you're migrating from V2, the key changes are: + +* Replace framework-specific model classes (PyTorchModel, TensorFlowModel, etc.) with ``ModelBuilder`` +* Use structured configuration objects instead of parameter dictionaries +* Leverage the new ``invoke()`` method instead of ``predict()`` for more consistent API +* Take advantage of built-in optimization and auto-scaling features + +Inference Examples +----------------- + +Explore comprehensive inference examples that demonstrate V3 capabilities: + +.. toctree:: + :maxdepth: 1 + + Custom InferenceSpec <../v3-examples/inference-examples/inference-spec-example> + ModelBuilder with JumpStart models <../v3-examples/inference-examples/jumpstart-example> + Optimize a JumpStart model <../v3-examples/inference-examples/optimize-example> + Train-to-Inference E2E <../v3-examples/inference-examples/train-inference-e2e-example> + JumpStart E2E <../v3-examples/inference-examples/jumpstart-e2e-training-example> + Local Container Mode <../v3-examples/inference-examples/local-mode-example> + Deploy HuggingFace Models <../v3-examples/inference-examples/huggingface-example> + ModelBuilder in In-Process mode <../v3-examples/inference-examples/in-process-mode-example> diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 0000000000..446fc57ef1 --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,150 @@ +Installation +============ + +This guide covers how to install SageMaker Python SDK V3 and set up your development environment. + +Quick Installation +------------------ + +Install the latest version of SageMaker Python SDK V3: + +.. code-block:: bash + + pip install sagemaker + +Prerequisites +--------------- + +**Python Version** + SageMaker Python SDK V3 supports Python 3.9, 3.10, 3.11, and 3.12 + +**Operating Systems** + - Linux + - macOS + +Installation Methods +---------------------- + +Standard Installation +~~~~~~~~~~~~~~~~~~~~~ + +Install the complete SageMaker Python SDK V3: + +.. code-block:: bash + + pip install sagemaker + +Modular Installation +~~~~~~~~~~~~~~~~~~~ + +Install specific components based on your needs: + +.. code-block:: bash + + # Core functionality only + pip install sagemaker-core + + # Training capabilities + pip install sagemaker-train + + # Inference capabilities + pip install sagemaker-serve + + # ML Operations + pip install sagemaker-mlops + +Virtual Environment (Recommended) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create an isolated environment for your SageMaker projects: + +.. code-block:: bash + + # Using venv + python -m venv sagemaker-v3-env + source sagemaker-v3-env/bin/activate + pip install sagemaker + + # Using conda + conda create -n sagemaker-v3 python=3.10 + conda activate sagemaker-v3 + pip install sagemaker + +Development Installation +~~~~~~~~~~~~~~~~~~~~~~~ + +Install the SageMaker Python SDK directly from source when you want to: + +- develop locally, +- test your own modifications, or +- work with the latest unreleased features. + +.. code-block:: bash + + git clone https://github.com/aws/sagemaker-python-sdk.git + cd sagemaker-python-sdk + pip install -e . + +This installs the top-level sagemaker package in editable mode, so any changes you make to its code are picked up immediately without needing to reinstall. + +**Working with SDK Submodules** + +The repository contains additional installable components such as: + +- sagemaker-core +- sagemaker-train +- sagemaker-serve +- sagemaker-mlops + +If you plan to modify code inside one of these packages, install that submodule in editable mode as well: + +.. code-block:: bash + + cd sagemaker-core + pip install -e . + +Repeat for any other submodule you are actively developing. + +Editable installations operate at the Python package level, not the Git repository level. Installing the relevant submodule ensures your local changes are reflected during development. + +Verification +----------- + +Verify your installation: + +.. code-block:: python + + from sagemaker.core.helper.session_helper import Session + from importlib.metadata import version + print(f"SageMaker SDK version: {version('sagemaker')}") + + session = Session() + print(f"Default bucket: {session.default_bucket()}") + print(f"Region: {session.boto_region_name}") + +Upgrading from V2 +----------------- + +If you have SageMaker Python SDK V2 installed: + +.. code-block:: bash + + # Upgrade to V3 + pip install --upgrade sagemaker + + # Or install V3 in a new environment (recommended) + python -m venv sagemaker-v3-env + source sagemaker-v3-env/bin/activate + pip install sagemaker + +**Note:** V3 introduces breaking changes. See the :doc:`overview` page for migration guidance. + +Next Steps +---------- + +After installation: + +1. **Configure AWS credentials** if you haven't already +2. **Read the** :doc:`overview` **to understand V3 changes** +3. **Try the** :doc:`quickstart` **guide** +4. **Explore** :doc:`training/index`, :doc:`inference/index`, and other capabilities diff --git a/docs/ml_ops/index.rst b/docs/ml_ops/index.rst new file mode 100644 index 0000000000..9c7e49b025 --- /dev/null +++ b/docs/ml_ops/index.rst @@ -0,0 +1,215 @@ +Implement MLOps +============= + +SageMaker Python SDK V3 provides comprehensive MLOps capabilities for building, deploying, and managing machine learning workflows at scale. This includes advanced pipeline orchestration, model monitoring, data quality checks, and automated deployment strategies for production ML systems. + +Key Benefits of V3 ML Operations +-------------------------------- + +* **Unified Pipeline Interface**: Streamlined workflow orchestration with intelligent step dependencies +* **Advanced Monitoring**: Built-in model quality, data drift, and bias detection capabilities +* **Automated Governance**: Model registry integration with approval workflows and lineage tracking +* **Production-Ready**: Enterprise-grade features for compliance, security, and scalability + +Quick Start Example +------------------- + +Here's how ML Operations workflows are simplified in V3: + +**Traditional Pipeline Approach:** + +.. code-block:: python + + from sagemaker.workflow.pipeline import Pipeline + from sagemaker.workflow.steps import TrainingStep, ProcessingStep + from sagemaker.sklearn.processing import SKLearnProcessor + + # Complex setup with multiple framework-specific classes + processor = SKLearnProcessor( + framework_version="0.23-1", + role=role, + instance_type="ml.m5.xlarge", + instance_count=1 + ) + + processing_step = ProcessingStep( + name="PreprocessData", + processor=processor, + # ... many configuration parameters + ) + +**SageMaker V3 MLOps Approach:** + +.. code-block:: python + + from sagemaker.mlops import Pipeline, ProcessingStep + from sagemaker.mlops.configs import ProcessingConfig + + # Simplified configuration with intelligent defaults + pipeline = Pipeline(name="ml-workflow") + + processing_step = ProcessingStep( + name="preprocess-data", + processing_config=ProcessingConfig( + image_uri="sklearn-processing-image", + instance_type="ml.m5.xlarge" + ), + inputs={"raw_data": "s3://bucket/raw-data"}, + outputs={"processed_data": "s3://bucket/processed-data"} + ) + + pipeline.add_step(processing_step) + +MLOps Pipeline Overview +---------------------- + +SageMaker V3 MLOps provides a unified interface for building and managing end-to-end machine learning workflows: + +**Pipeline Orchestration** + Intelligent step dependencies with automatic resource management and error handling + +**Model Registry Integration** + Seamless model versioning, approval workflows, and deployment automation + +**Quality Monitoring** + Built-in data quality, model performance, and bias detection capabilities + +**Governance and Compliance** + Comprehensive lineage tracking, audit trails, and approval mechanisms + +.. code-block:: python + + from sagemaker.mlops import Pipeline, TrainingStep, ModelStep, EndpointStep + from sagemaker.mlops.configs import ModelConfig, EndpointConfig + + # Create comprehensive ML pipeline + pipeline = Pipeline(name="production-ml-pipeline") + + # Training step + training_step = TrainingStep( + name="train-model", + training_config=TrainingConfig( + algorithm_specification={ + "training_image": "your-training-image" + } + ) + ) + + # Model registration step + model_step = ModelStep( + name="register-model", + model_config=ModelConfig( + model_package_group_name="production-models", + approval_status="PendingManualApproval" + ), + depends_on=[training_step] + ) + + # Deployment step + endpoint_step = EndpointStep( + name="deploy-model", + endpoint_config=EndpointConfig( + instance_type="ml.m5.xlarge", + initial_instance_count=1 + ), + depends_on=[model_step] + ) + + pipeline.add_steps([training_step, model_step, endpoint_step]) + +MLOps Capabilities +------------------ + +Advanced Pipeline Features +~~~~~~~~~~~~~~~~~~~~~~~~~ + +V3 introduces powerful pipeline capabilities for production ML workflows: + +* **Conditional Execution** - Dynamic pipeline paths based on data quality checks and model performance +* **Parallel Processing** - Automatic parallelization of independent pipeline steps for faster execution +* **Resource Optimization** - Intelligent resource allocation and cost optimization across pipeline steps +* **Failure Recovery** - Automatic retry mechanisms and checkpoint-based recovery for robust workflows + +**Advanced Pipeline Example:** + +.. code-block:: python + + from sagemaker.mlops import Pipeline, ConditionStep, ParallelStep + from sagemaker.mlops.conditions import ModelAccuracyCondition + + pipeline = Pipeline(name="advanced-ml-pipeline") + + # Conditional model deployment based on accuracy + accuracy_condition = ModelAccuracyCondition( + threshold=0.85, + metric_name="validation:accuracy" + ) + + condition_step = ConditionStep( + name="check-model-quality", + condition=accuracy_condition, + if_steps=[deploy_to_production_step], + else_steps=[retrain_model_step] + ) + + pipeline.add_step(condition_step) + +Key MLOps Features +~~~~~~~~~~~~~~~~~ + +* **Model Registry Integration** - Centralized model versioning with automated approval workflows and deployment tracking +* **Data Quality Monitoring** - Continuous monitoring of data drift, schema changes, and statistical anomalies in production +* **Model Performance Tracking** - Real-time monitoring of model accuracy, latency, and business metrics with alerting +* **Bias Detection and Fairness** - Built-in bias detection across protected attributes with automated reporting and remediation +* **Automated Retraining** - Trigger-based model retraining based on performance degradation or data drift detection + +Supported MLOps Scenarios +------------------------- + +Pipeline Types +~~~~~~~~~~~~~ + +* **Training Pipelines** - End-to-end model training with data preprocessing, feature engineering, and validation +* **Inference Pipelines** - Real-time and batch inference workflows with preprocessing and postprocessing +* **Data Processing Pipelines** - ETL workflows for feature engineering, data validation, and preparation +* **Model Deployment Pipelines** - Automated deployment with A/B testing, canary releases, and rollback capabilities + +Monitoring and Governance +~~~~~~~~~~~~~~~~~~~~~~~~~ + +* **Model Monitoring** - Continuous tracking of model performance, data quality, and operational metrics +* **Compliance Reporting** - Automated generation of audit reports for regulatory compliance and governance +* **Lineage Tracking** - Complete data and model lineage from raw data to production predictions +* **Access Control** - Fine-grained permissions and approval workflows for model deployment and updates + +Integration Patterns +~~~~~~~~~~~~~~~~~~~ + +* **CI/CD Integration** - Seamless integration with GitHub Actions, Jenkins, and other CI/CD platforms +* **Event-Driven Workflows** - Trigger pipelines based on data availability, model performance, or business events +* **Multi-Environment Deployment** - Automated promotion of models across development, staging, and production environments + +Migration from V2 +------------------ + +If you're migrating MLOps workflows from V2, the key improvements are: + +* **Simplified Pipeline Definition**: Unified interface replaces complex framework-specific configurations +* **Enhanced Monitoring**: Built-in model and data quality monitoring replaces custom solutions +* **Improved Governance**: Integrated model registry and approval workflows streamline compliance +* **Better Resource Management**: Automatic resource optimization and cost management across workflows + +ML Operations Examples +---------------------- + +Explore comprehensive MLOps examples that demonstrate V3 capabilities: + +.. toctree:: + :maxdepth: 1 + + ../v3-examples/ml-ops-examples/v3-sagemaker-clarify + ../v3-examples/ml-ops-examples/v3-pipeline-train-create-registry + ../v3-examples/ml-ops-examples/v3-transform-job-example + ../v3-examples/ml-ops-examples/v3-hyperparameter-tuning-example/v3-hyperparameter-tuning-example + ../v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example + ../v3-examples/ml-ops-examples/v3-processing-job-pytorch/v3-pytorch-processing-example diff --git a/docs/model_customization/deployment.rst b/docs/model_customization/deployment.rst new file mode 100644 index 0000000000..662b3436d2 --- /dev/null +++ b/docs/model_customization/deployment.rst @@ -0,0 +1,14 @@ +Model Deployment +================= + +Deploy your fine-tuned models using: + + * **SageMaker Inference Endpoints** - Real-time and batch inference + * **Amazon Bedrock Custom Model Import** - Integrate with Bedrock services + +.. toctree:: + :maxdepth: 1 + :hidden: + + ../../v3-examples/model-customization-examples/bedrock-modelbuilder-deployment + ../../v3-examples/model-customization-examples/model_builder_deployment_notebook \ No newline at end of file diff --git a/docs/model_customization/evaluation.rst b/docs/model_customization/evaluation.rst new file mode 100644 index 0000000000..14814cfcaf --- /dev/null +++ b/docs/model_customization/evaluation.rst @@ -0,0 +1,15 @@ +Model Evaluation Job Submission +================================= + +Launch evaluation jobs with three options: + + * **LLM as a Judge (LLMAJ) Evaluation** - Use large language models to assess model outputs + * **Custom Scorer Evaluation** - Apply previously defined evaluator functions + * **Benchmark Evaluation** - Run standardized performance benchmarks + +.. toctree:: + :maxdepth: 1 + + ../../v3-examples/model-customization-examples/llm_as_judge_demo + ../../v3-examples/model-customization-examples/custom_scorer_demo + ../../v3-examples/model-customization-examples/benchmark_demo \ No newline at end of file diff --git a/docs/model_customization/index.rst b/docs/model_customization/index.rst new file mode 100644 index 0000000000..7440a89d3f --- /dev/null +++ b/docs/model_customization/index.rst @@ -0,0 +1,188 @@ +Customizing Models +=================== + +.. raw:: html + +
+ 🆕 V3 EXCLUSIVE FEATURE
+ Model customization with specialized trainers is available only in SageMaker Python SDK V3, built from the ground up for foundation model fine-tuning. + It streamlines the complex process of customizing AI models from months to days with a guided UI and serverless infrastructure that removes operational overhead. Whether you are building legal research applications, customer service chatbots, or domain-specific AI agents, this feature accelerates your path from proof-of-concept to production deployment. +
+ +Key Benefits +------------- + +* **Serverless Training**: Fully managed compute infrastructure that abstracts away all infrastructure complexity, allowing you to focus purely on model development +* **Advanced Customization Techniques**: Comprehensive set of methods including supervised fine-tuning (SFT), direct preference optimization (DPO), reinforcement learning with verifiable rewards (RLVR), and reinforcement learning with AI feedback (RLAIF) +* **AI Model Customization Assets**: Integrated datasets and evaluators for training, refining, and evaluating custom models +* **Production Ready**: Built-in evaluation, monitoring, and deployment capabilities with automatic resource management + +Key Concepts +------------ + +**Serverless Training** + A fully managed compute infrastructure that abstracts away all infrastructure complexity, allowing you to focus purely on model development. This includes automatic provisioning of GPU instances (P5, P4de, P4d, G5) based on model size and training requirements, pre-optimized training recipes that incorporate best practices for each customization technique, real-time monitoring with live metrics and logs accessible through the UI, and automatic cleanup of resources after training completion to optimize costs. + +**Model Customization Techniques** + Comprehensive set of advanced methods including supervised fine-tuning (SFT), direct preference optimization (DPO), reinforcement learning with verifiable rewards (RLVR), and reinforcement learning with AI feedback (RLAIF). + +**Logged Model** + A specialized version of a base foundation model that has been adapted to a specific use case by training it on your own data, resulting in an AI model that retains the general capabilities of the original foundation model while adding domain-specific knowledge, terminology, style, or behavior tailored to your requirements. + +**AI Model Customization Assets** + Resources and artifacts used to train, refine, and evaluate custom models during the model customization process. These assets include: + + * **Datasets**: Collections of training examples (prompt-response pairs, domain-specific text, or labeled data) used to fine-tune a foundation model to learn specific behaviors, knowledge, or styles + * **Evaluators**: Mechanisms for assessing and improving model performance through either reward functions (code-based logic that scores model outputs based on specific criteria, used in RLVR training and custom scorer evaluation) or reward prompts (natural language instructions that guide an LLM to judge the quality of model responses, used in RLAIF training and LLM-as-a-judge evaluation) + +Getting Started +--------------- + +Prerequisites and Setup +~~~~~~~~~~~~~~~~~~~~~~~ + +Before you begin, complete the following prerequisites: + +1. **SageMaker AI Domain Setup**: Onboard to a SageMaker AI domain with Studio access. If you don't have permissions to set Studio as the default experience for your domain, contact your administrator. + +2. **AWS CLI Configuration**: Update the AWS CLI and configure your credentials: + + .. code-block:: bash + + # Update AWS CLI + pip install --upgrade awscli + + # Configure credentials + aws configure + +3. **IAM Permissions**: Attach the following AWS managed policies to your execution role: + + * ``AmazonSageMakerFullAccess`` - Full access to SageMaker resources + * ``AmazonSageMakerPipelinesIntegrations`` - For pipeline operations + * ``AmazonSageMakerModelRegistryFullAccess`` - For model registry features + +4. **Additional IAM Permissions**: Add the following inline policy to your SageMaker domain execution role: + + .. code-block:: json + + { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "LambdaCreateDeletePermission", + "Effect": "Allow", + "Action": [ + "lambda:CreateFunction", + "lambda:DeleteFunction", + "lambda:InvokeFunction" + ], + "Resource": [ + "arn:aws:lambda:*:*:function:*SageMaker*", + "arn:aws:lambda:*:*:function:*sagemaker*", + "arn:aws:lambda:*:*:function:*Sagemaker*" + ] + }, + { + "Sid": "BedrockDeploy", + "Effect": "Allow", + "Action": [ + "bedrock:CreateModelImportJob", + "bedrock:GetModelImportJob", + "bedrock:GetImportedModel" + ], + "Resource": ["*"] + }, + { + "Sid": "AIRegistry", + "Effect": "Allow", + "Action": [ + "sagemaker:CreateHub", + "sagemaker:DeleteHub", + "sagemaker:DescribeHub", + "sagemaker:ListHubs", + "sagemaker:ImportHubContent", + "sagemaker:DeleteHubContent", + "sagemaker:UpdateHubContent", + "sagemaker:ListHubContents", + "sagemaker:ListHubContentVersions", + "sagemaker:DescribeHubContent" + ], + "Resource": "*" + } + ] + } + +Creating Assets for Model Customization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Using the SageMaker Python SDK:** + +.. code-block:: python + + from sagemaker.train.common import CustomizationTechnique + from sagemaker.assets import DataSet + + # Create a dataset asset + dataset = DataSet.create( + name="demo-sft-dataset", + data_location="s3://your-bucket/dataset/training_dataset.jsonl", + customization_technique=CustomizationTechnique.SFT, + wait=True + ) + + print(f"Dataset ARN: {dataset.arn}") + +Quick Start Example +------------------- + +**Model Customization via SDK:** + +.. code-block:: python + + from sagemaker.train import DPOTrainer + from sagemaker.train.common import TrainingType + + # Submit a DPO model customization job + trainer = DPOTrainer( + model="meta-llama/Llama-2-7b-hf", + training_type=TrainingType.LORA, + model_package_group_name="my-custom-models", + training_dataset="s3://my-bucket/preference-data.jsonl", + s3_output_path="s3://my-bucket/output/", + sagemaker_session=sagemaker_session, + role=role_arn + ) + + # Start training + training_job = trainer.train() + + +Supported Model Types and Use Cases +----------------------------------- + +**Foundation Models** + * Large Language Models (LLaMA, GPT, BERT, T5) + * Conversational AI models and dialogue systems + * Domain-specific models (legal, medical, financial, technical) + * Multimodal models for vision-language understanding + +**Customization Scenarios** + * Task-specific adaptation (summarization, QA, classification) + * Instruction following and multi-step reasoning + * Safety and alignment improvements + * Style and persona customization + +**Advanced Techniques** + * **LoRA (Low-Rank Adaptation)** - Parameter-efficient fine-tuning with minimal memory requirements + * **Full Fine-Tuning** - Complete model parameter updates for maximum customization + * **Preference Learning** - Train models using human feedback and preference data + * **Reinforcement Learning** - Advanced alignment techniques for improved model behavior + +.. toctree:: + :maxdepth: 2 + :hidden: + + open_weight_model_customization + nova + + diff --git a/docs/model_customization/model_customization.rst b/docs/model_customization/model_customization.rst new file mode 100644 index 0000000000..79910d2aa0 --- /dev/null +++ b/docs/model_customization/model_customization.rst @@ -0,0 +1,25 @@ +AI Model Customization Job Submission +====================================== + +SageMaker Python SDK V3 provides four specialized trainer classes for different model customization approaches: + +**SFTTrainer (Supervised Fine-Tuning)** + Traditional fine-tuning with labeled datasets for task-specific adaptation + +**DPOTrainer (Direct Preference Optimization)** + Fine-tune models using human preference data without reinforcement learning complexity + +**RLAIFTrainer (Reinforcement Learning from AI Feedback)** + Use AI-generated feedback to improve model behavior and alignment + +**RLVRTrainer (Reinforcement Learning from Verifiable Rewards)** + Fine-tune with verifiable reward signals for objective optimization + +.. toctree:: + :maxdepth: 1 + :hidden: + + SFT Finetuning <../../v3-examples/model-customization-examples/sft_finetuning_example_notebook_pysdk_prod_v3> + DPOTrainer Finetuning <../../v3-examples/model-customization-examples/dpo_trainer_example_notebook_v3_prod> + RLAIF Finetuning <../../v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod> + RLVR Finetuning <../../v3-examples/model-customization-examples/rlvr_finetuning_example_notebook_v3_prod> diff --git a/docs/model_customization/nova.rst b/docs/model_customization/nova.rst new file mode 100644 index 0000000000..6ef72bbc77 --- /dev/null +++ b/docs/model_customization/nova.rst @@ -0,0 +1,16 @@ +Amazon Nova Foundation Model +============================== + +Adapt Amazon Nova foundation models to your specific use cases through fine-tuning, prompt engineering, and RAG techniques. + +**What you'll accomplish:** +- Fine-tune Nova models on your domain data +- Deploy customized models for production inference +- Evaluate model performance with your metrics + +**Quick start:** Follow the complete walkthrough in the notebook below. + +.. toctree:: + :maxdepth: 1 + + ../../v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook \ No newline at end of file diff --git a/docs/model_customization/open_weight_model_customization.rst b/docs/model_customization/open_weight_model_customization.rst new file mode 100644 index 0000000000..0560afb7e3 --- /dev/null +++ b/docs/model_customization/open_weight_model_customization.rst @@ -0,0 +1,12 @@ +Open weight model customization +================================ + +This section walks you through the process to get started with open weight model customization. + +.. toctree:: + :maxdepth: 1 + + ../../v3-examples/model-customization-examples/ai_registry_example + model_customization + evaluation + deployment diff --git a/docs/model_customization/registry.rst b/docs/model_customization/registry.rst new file mode 100644 index 0000000000..b418ab28e1 --- /dev/null +++ b/docs/model_customization/registry.rst @@ -0,0 +1,18 @@ +Dataset and Evaluator Assets +============================= + +Manage datasets, evaluators, and model artifacts through SageMaker's AI Registry for organized model development workflows. + +**What you'll accomplish:** +- Register and version training datasets +- Create custom evaluators and reward functions +- Track model lineage and metadata + +**Prerequisites:** SageMaker permissions, prepared datasets, understanding of evaluation metrics + +**Use cases:** Dataset versioning, custom evaluation workflows, model governance and compliance tracking. + +.. toctree:: + :maxdepth: 1 + + diff --git a/docs/overview.rst b/docs/overview.rst new file mode 100644 index 0000000000..9b7d70b6e0 --- /dev/null +++ b/docs/overview.rst @@ -0,0 +1,162 @@ +Overview +======== + +.. note:: + SageMaker Python SDK V3 contains breaking changes from V2. See the `V3.0.0 release notes `_ for release notes or visit the `V2 documentation `_. + +SageMaker Python SDK v3.0 introduces a modern, modular API for training, fine-tuning, deploying, and managing models on Amazon SageMaker. This release replaces legacy interfaces such as Estimator, Model, and Predictor with unified classes like ModelTrainer and ModelBuilder, reducing boilerplate and simplifying workflows. V3 also introduces comprehensive fine-tuning support with new trainer classes for foundation models. + +What's New in V3 +----------------- + +.. raw:: html + +
+
+

Model Customization (V3 Exclusive)

+

Revolutionary foundation model fine-tuning with specialized trainers:

+
    +
  • SFTTrainer - Supervised fine-tuning for task-specific adaptation
  • +
  • DPOTrainer - Direct preference optimization without RL complexity
  • +
  • RLAIFTrainer - Reinforcement learning from AI feedback
  • +
  • RLVRTrainer - Reinforcement learning from verifiable rewards
  • +
+

Advanced techniques like LoRA, preference optimization, and RLHF that simply don't exist in V2.

+
+ +
+

Modular Architecture

+

Separate PyPI packages for specialized capabilities:

+
    +
  • sagemaker-core - Low-level SageMaker resource management
  • +
  • sagemaker-train - Unified training with ModelTrainer
  • +
  • sagemaker-serve - Simplified inference with ModelBuilder
  • +
  • sagemaker-mlops - ML operations and pipeline management
  • +
+

Single classes replace multiple framework-specific implementations:

+
    +
  • ModelTrainer replaces PyTorchEstimator, TensorFlowEstimator, SKLearnEstimator, etc.
  • +
  • ModelBuilder replaces PyTorchModel, TensorFlowModel, SKLearnModel, etc.
  • +
+
+
+ +Capabilities +============== + +Training with ModelTrainer +--------------------------- + +Unified training interface replacing framework-specific estimators with intelligent defaults and streamlined workflows: + +.. code-block:: python + + from sagemaker.train import ModelTrainer + from sagemaker.train.configs import InputData + + trainer = ModelTrainer( + training_image="your-training-image", + role="your-sagemaker-role" + ) + + train_data = InputData( + channel_name="training", + data_source="s3://your-bucket/train-data" + ) + + training_job = trainer.train(input_data_config=[train_data]) + +:doc:`Learn more about Training ` + +Inference with ModelBuilder +---------------------------- + +Simplified model deployment and inference with the ModelBuilder + +.. code-block:: python + + from sagemaker.serve import ModelBuilder + + model_builder = ModelBuilder( + model="your-model", + model_path="s3://your-bucket/model-artifacts" + ) + + model = model_builder.build(model_name="my-model") + endpoint = model_builder.deploy( + endpoint_name="my-endpoint", + instance_type="ml.m5.xlarge", + initial_instance_count=1 + ) + result = endpoint.invoke( + body={"inputs": "your-input-data"}, + content_type="application/json" + ) + +:doc:`Learn more about Inference ` + +ML Operations +------------- + +Comprehensive MLOps capabilities for building, deploying, and managing machine learning workflows at scale: + +.. code-block:: python + + from sagemaker.mlops import Pipeline, TrainingStep, ModelStep + + pipeline = Pipeline(name="production-ml-pipeline") + + training_step = TrainingStep( + name="train-model", + training_config=TrainingConfig( + algorithm_specification={ + "training_image": "your-training-image" + } + ) + ) + + pipeline.add_step(training_step) + +:doc:`Learn more about ML Operations ` + +SageMaker Core +-------------- + +Low-level, object-oriented access to Amazon SageMaker resources with intelligent defaults and type safety: + +.. code-block:: python + + from sagemaker.core.resources import TrainingJob + + training_job = TrainingJob.create( + training_job_name="my-training-job", + role_arn="arn:aws:iam::123456789012:role/SageMakerRole", + input_data_config=[{ + "channel_name": "training", + "data_source": "s3://my-bucket/train" + }] + ) + +:doc:`Learn more about SageMaker Core ` + +Getting Started +=============== + +Installation +------------ + +:doc:`Install SageMaker Python SDK V3 ` to get started + +Migration from V2 +------------------ + +Key changes when migrating from V2: + +* Replace Estimator classes with ``ModelTrainer`` +* Replace Model classes with ``ModelBuilder`` +* Use structured config objects instead of parameter dictionaries + +Next Steps +----------- + +**Get Started**: Follow the :doc:`quickstart` guide for a hands-on introduction diff --git a/docs/quickstart.rst b/docs/quickstart.rst new file mode 100644 index 0000000000..934b915bb3 --- /dev/null +++ b/docs/quickstart.rst @@ -0,0 +1,164 @@ +Quickstart +=========== + +Get started with SageMaker Python SDK V3 in minutes. This guide walks you through the essential steps to train and deploy your first model. + +Prerequisites +------------- + +* Python 3.9+ installed +* AWS account with appropriate permissions +* AWS credentials configured + +Installation +------------ + +Install SageMaker Python SDK V3: + +.. code-block:: bash + + pip install sagemaker + +Basic Setup +----------- + +Import the SDK and create a session: + +.. code-block:: python + + from sagemaker.core.helper.session_helper import Session, get_execution_role + + # Create a SageMaker session + session = Session() + role = get_execution_role() + + print(f"Using role: {role}") + print(f"Default bucket: {session.default_bucket()}") + +Training Your First Model +------------------------- + +Train a custom PyTorch model using the unified ModelTrainer: + +.. code-block:: python + + from sagemaker.train import ModelTrainer + from sagemaker.train.configs import SourceCode + + # Create ModelTrainer with custom code + model_trainer = ModelTrainer( + training_image="763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:1.13.1-cpu-py39", + source_code=SourceCode( + source_dir="./training_code", + entry_script="train.py", + requirements="requirements.txt" + ), + role=role + ) + + # Start training (this will create a training job) + training_job = model_trainer.train() + print(f"Training completed: {training_job.name}") + +Deploying Your Model +-------------------- + +Deploy the trained model using the V3 workflow: build() → deploy() → invoke(): + +.. code-block:: python + + from sagemaker.serve.model_builder import ModelBuilder + from sagemaker.serve.builder.schema_builder import SchemaBuilder + from sagemaker.serve.utils.types import ModelServer + + # Create schema for model input/output + sample_input = [[0.1, 0.2, 0.3, 0.4]] + sample_output = [[0.8, 0.2]] + schema_builder = SchemaBuilder(sample_input, sample_output) + + # Create ModelBuilder from training job + model_builder = ModelBuilder( + model=training_job, # Pass the trained ModelTrainer to use its trained model artifacts + schema_builder=schema_builder, + model_server=ModelServer.TORCHSERVE, + role=role + ) + + # Build the model + model = model_builder.build(model_name="my-pytorch-model") + + # Deploy to endpoint + endpoint = model_builder.deploy( + endpoint_name="my-endpoint", + instance_type="ml.m5.large", + initial_instance_count=1 + ) + + print(f"Endpoint deployed: {endpoint.endpoint_name}") + +Making Predictions +------------------ + +Use your deployed model to make predictions: + +.. code-block:: python + + import json + + # Sample tensor data for prediction + test_data = [[0.5, 0.3, 0.2, 0.1]] + + # Make a prediction + result = endpoint.invoke( + body=json.dumps(test_data), + content_type="application/json" + ) + + # Parse the result + prediction = json.loads(result.body.read().decode('utf-8')) + print(f"Prediction: {prediction}") + +Cleanup +------- + +Don't forget to clean up resources to avoid charges: + +.. code-block:: python + + # Delete the endpoint + endpoint.delete() + + print("Endpoint deleted") + +Foundation Model Fine-Tuning +---------------------------- + +Try V3's new foundation model fine-tuning capabilities: + +.. code-block:: python + + from sagemaker.train import SFTTrainer + from sagemaker.train.common import TrainingType + + # Fine-tune a foundation model + sft_trainer = SFTTrainer( + model="huggingface-textgeneration-gpt2", + training_type=TrainingType.LORA, + training_dataset="s3://your-bucket/training-data.jsonl", + role=role + ) + + # Start fine-tuning + fine_tuning_job = sft_trainer.train() + print(f"Fine-tuning job: {fine_tuning_job.name}") + +Next Steps +---------- + +Now that you've completed the quickstart: + +1. **Explore Training**: Learn more about :doc:`training/index` capabilities +2. **Try Inference**: Discover advanced :doc:`inference/index` features +3. **Model Customization**: Experiment with :doc:`model_customization/index` +4. **Build Pipelines**: Create workflows with :doc:`ml_ops/index` +5. **Use SageMaker Core**: Access low-level resources with :doc:`sagemaker_core/index` diff --git a/docs/releasenote.rst b/docs/releasenote.rst new file mode 100644 index 0000000000..dcc469dcd9 --- /dev/null +++ b/docs/releasenote.rst @@ -0,0 +1,12 @@ +Release Notes +=============== + +Support plan +------------- +* Major versions: 12 months after next major release + +* Minor versions: 6 months after next minor release + +* Patch versions: No guaranteed support (upgrade to latest patch) + +Latest release notes can be found at: https://github.com/aws/sagemaker-python-sdk/releases \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000000..38471025b4 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,8 @@ +sphinx>=4.0 +sphinx-book-theme +myst-parser +myst-nb +sphinx-copybutton +sphinx-autobuild +nbsphinx +sphinx-design diff --git a/docs/sagemaker-core b/docs/sagemaker-core new file mode 120000 index 0000000000..3f575bb5b1 --- /dev/null +++ b/docs/sagemaker-core @@ -0,0 +1 @@ +../sagemaker-core \ No newline at end of file diff --git a/docs/sagemaker_core/index.rst b/docs/sagemaker_core/index.rst new file mode 100644 index 0000000000..899156eef4 --- /dev/null +++ b/docs/sagemaker_core/index.rst @@ -0,0 +1,200 @@ +SageMaker Core +============== + +SageMaker Core provides low-level, object-oriented access to Amazon SageMaker resources with intelligent defaults and streamlined workflows. This foundational layer offers direct control over SageMaker services while maintaining the simplicity and power you need for advanced use cases. + +Key Benefits of SageMaker Core +------------------------------ + +* **Direct Resource Access**: Low-level control over SageMaker resources with full API coverage +* **Object-Oriented Design**: Intuitive resource abstractions that map directly to AWS APIs +* **Intelligent Defaults**: Automatic configuration of optimal settings based on resource requirements +* **Type Safety**: Strong typing and validation for better development experience + +Quick Start Example +------------------- + +Here's how SageMaker Core simplifies resource management: + +**Traditional Boto3 Approach:** + +.. code-block:: python + + import boto3 + + client = boto3.client('sagemaker') + response = client.create_training_job( + TrainingJobName='my-training-job', + RoleArn='arn:aws:iam::123456789012:role/SageMakerRole', + InputDataConfig=[{ + 'ChannelName': 'training', + 'DataSource': { + 'S3DataSource': { + 'S3DataType': 'S3Prefix', + 'S3Uri': 's3://my-bucket/train', + 'S3DataDistributionType': 'FullyReplicated' + } + } + }], + # ... many more required parameters + ) + +**SageMaker Core Approach:** + +.. code-block:: python + + from sagemaker.core.resources import TrainingJob + from sagemaker.core.shapes import TrainingJobConfig + + training_job = TrainingJob.create( + training_job_name="my-training-job", + role_arn="arn:aws:iam::123456789012:role/SageMakerRole", + input_data_config=[{ + "channel_name": "training", + "data_source": "s3://my-bucket/train" + }] + ) + +SageMaker Core Overview +---------------------- + +SageMaker Core serves as the foundation for all SageMaker Python SDK V3 operations, providing direct access to SageMaker resources through an object-oriented interface: + +**Resource Abstractions** + Direct mapping to SageMaker resources like TrainingJob, Model, Endpoint, and ProcessingJob + +**Intelligent Configuration** + Automatically fills in required parameters with sensible defaults while allowing full customization + +**Type-Safe Operations** + Strong typing and validation prevent common configuration errors + +**Seamless Integration** + Works as the foundation layer for higher-level SDK components + +.. code-block:: python + + from sagemaker.core.resources import Endpoint, Model + from sagemaker.core.shapes import EndpointConfig + + # Create a model resource + model = Model.create( + model_name="my-model", + primary_container={ + "image": "your-inference-image", + "model_data_url": "s3://your-bucket/model.tar.gz" + }, + execution_role_arn="your-sagemaker-role" + ) + + # Deploy to an endpoint + endpoint = Endpoint.create( + endpoint_name="my-endpoint", + endpoint_config_name="my-config", + model_name=model.model_name + ) + + # Make predictions + response = endpoint.invoke_endpoint( + body=b'{"instances": [1, 2, 3, 4]}', + content_type="application/json" + ) + +Core Capabilities +----------------- + +Resource Management +~~~~~~~~~~~~~~~~~~ + +SageMaker Core provides comprehensive resource management capabilities: + +* **Training Jobs** - Create, monitor, and manage training workloads with full parameter control +* **Models** - Define and register models with custom inference logic and container configurations +* **Endpoints** - Deploy real-time inference endpoints with auto-scaling and monitoring +* **Processing Jobs** - Run data processing and feature engineering workloads at scale + +**Resource Lifecycle Management:** + +.. code-block:: python + + from sagemaker.core.resources import ProcessingJob + + # Create processing job + processing_job = ProcessingJob.create( + processing_job_name="data-preprocessing", + app_specification={ + "image_uri": "your-processing-image", + "container_entrypoint": ["python", "preprocess.py"] + }, + processing_inputs=[{ + "input_name": "raw-data", + "s3_input": { + "s3_uri": "s3://your-bucket/raw-data", + "local_path": "/opt/ml/processing/input" + } + }], + processing_outputs=[{ + "output_name": "processed-data", + "s3_output": { + "s3_uri": "s3://your-bucket/processed-data", + "local_path": "/opt/ml/processing/output" + } + }] + ) + +Key Core Features +~~~~~~~~~~~~~~~~ + +* **Direct API Access** - Full coverage of SageMaker APIs with object-oriented abstractions for better usability +* **Intelligent Defaults** - Automatic parameter inference and validation reduces boilerplate while maintaining flexibility +* **Resource Chaining** - Seamlessly connect resources together for complex workflows and dependencies +* **Monitoring Integration** - Built-in support for CloudWatch metrics, logging, and resource status tracking +* **Error Handling** - Comprehensive error handling with detailed feedback for troubleshooting and debugging + +Supported Core Scenarios +------------------------ + +Resource Types +~~~~~~~~~~~~~ + +* **Training Resources** - TrainingJob, HyperParameterTuningJob, AutoMLJob +* **Inference Resources** - Model, EndpointConfig, Endpoint, Transform +* **Processing Resources** - ProcessingJob, FeatureGroup, Pipeline +* **Monitoring Resources** - ModelQualityJobDefinition, DataQualityJobDefinition + +Advanced Features +~~~~~~~~~~~~~~~~ + +* **Batch Operations** - Efficiently manage multiple resources with batch create, update, and delete operations +* **Resource Tagging** - Comprehensive tagging support for cost allocation, governance, and resource organization +* **Cross-Region Support** - Deploy and manage resources across multiple AWS regions with unified interface +* **Custom Configurations** - Override any default behavior with custom configurations and parameters + +Integration Patterns +~~~~~~~~~~~~~~~~~~~ + +* **Pipeline Integration** - Use Core resources as building blocks for SageMaker Pipelines +* **Event-Driven Workflows** - Integrate with AWS Lambda and EventBridge for automated workflows +* **Multi-Account Deployments** - Deploy resources across multiple AWS accounts with proper IAM configuration + +Migration from Boto3 +-------------------- + +If you're migrating from direct Boto3 usage, the key benefits are: + +* **Simplified Interface**: Object-oriented resources replace complex dictionary-based API calls +* **Intelligent Defaults**: Automatic parameter inference reduces configuration overhead +* **Type Safety**: Strong typing prevents common configuration errors +* **Better Error Messages**: More descriptive error handling and validation feedback + +SageMaker Core Examples +---------------------- + +Explore comprehensive SageMaker Core examples: + +.. toctree:: + :maxdepth: 1 + + ../sagemaker-core/example_notebooks/get_started + ../sagemaker-core/example_notebooks/sagemaker_core_overview + ../sagemaker-core/example_notebooks/intelligent_defaults_and_logging diff --git a/docs/training/index.rst b/docs/training/index.rst new file mode 100644 index 0000000000..5ba8bbd7a1 --- /dev/null +++ b/docs/training/index.rst @@ -0,0 +1,142 @@ +Model Training +=============== + +SageMaker Python SDK V3 revolutionizes machine learning training with the unified **ModelTrainer** class, replacing the complex framework-specific estimators from V2. This modern approach provides a consistent interface across all training scenarios while maintaining the power and flexibility you need. + +Key Benefits of V3 Training +--------------------------- + +* **Unified Interface**: Single ``ModelTrainer`` class replaces multiple framework-specific estimators +* **Simplified Configuration**: Object-oriented API with auto-generated configs aligned with AWS APIs +* **Reduced Boilerplate**: Streamlined workflows with intuitive interfaces + +Quick Start Example +------------------- + +Here's how training has evolved from V2 to V3: + +**SageMaker Python SDK V2:** + +.. code-block:: python + + from sagemaker.estimator import Estimator + + estimator = Estimator( + image_uri="my-training-image", + role="arn:aws:iam::123456789012:role/SageMakerRole", + instance_count=1, + instance_type="ml.m5.xlarge", + output_path="s3://my-bucket/output" + ) + estimator.fit({"training": "s3://my-bucket/train"}) + +**SageMaker Python SDK V3:** + +.. code-block:: python + + from sagemaker.train import ModelTrainer + from sagemaker.train.configs import InputData + + trainer = ModelTrainer( + training_image="my-training-image", + role="arn:aws:iam::123456789012:role/SageMakerRole" + ) + + train_data = InputData( + channel_name="training", + data_source="s3://my-bucket/train" + ) + + trainer.train(input_data_config=[train_data]) + +ModelTrainer Overview +-------------------- + +The ``ModelTrainer`` class is the cornerstone of SageMaker Python SDK V3, providing a unified interface for all training scenarios. This single class replaces the complex web of framework-specific estimators from V2, offering: + +**Unified Training Interface** + One class handles PyTorch, TensorFlow, Scikit-learn, XGBoost, and custom containers + +**Intelligent Defaults** + Automatically configures optimal settings based on your training requirements + +**Flexible Configuration** + Object-oriented design with structured configs that align with AWS APIs + +**Seamless Integration** + Works seamlessly with SageMaker features like distributed training, spot instances, and hyperparameter tuning + +.. code-block:: python + + from sagemaker.train import ModelTrainer + from sagemaker.train.configs import InputData, ResourceConfig + + # Create trainer with intelligent defaults + trainer = ModelTrainer( + training_image="your-training-image", + role="your-sagemaker-role" + ) + + # Configure training data + train_data = InputData( + channel_name="training", + data_source="s3://your-bucket/train-data" + ) + + # Start training + training_job = trainer.train( + input_data_config=[train_data], + resource_config=ResourceConfig( + instance_type="ml.m5.xlarge", + instance_count=1 + ) + ) + +Framework Support +~~~~~~~~~~~~~~~~~ + +* **PyTorch** - Deep learning with dynamic computation graphs +* **TensorFlow** - Production-ready machine learning at scale +* **Scikit-learn** - Classical machine learning algorithms +* **XGBoost** - Gradient boosting for structured data +* **Custom Containers** - Bring your own training algorithms + +Training Types +~~~~~~~~~~~~~~ + +* **Single Instance Training** - Cost-effective training for smaller models +* **Multi-Instance Training** - Distributed training for large-scale models +* **Spot Instance Training** - Cost optimization with managed spot instances +* **Local Mode Training** - Development and debugging on local infrastructure + +Advanced Features +~~~~~~~~~~~~~~~~~ + +* **Automatic Model Tuning** - Hyperparameter optimization at scale +* **Distributed Training** - Multi-node, multi-GPU training strategies +* **Checkpointing** - Resume training from saved states +* **Early Stopping** - Prevent overfitting with intelligent stopping criteria + +Migration from V2 +------------------ + +If you're migrating from V2, the key changes are: + +* Replace framework-specific estimators (PyTorchEstimator, TensorFlowEstimator, etc.) with ``ModelTrainer`` +* Use structured ``InputData`` configs instead of dictionary-based input specifications +* Leverage the new object-oriented API for cleaner, more maintainable code + +Training Examples +----------------- + +Explore comprehensive training examples that demonstrate V3 capabilities: + +.. toctree:: + :maxdepth: 1 + + Local Container mode <../v3-examples/training-examples/local-training-example> + Distributed Local Training <../v3-examples/training-examples/distributed-local-training-example> + Hyperparameter Training <../v3-examples/training-examples/hyperparameter-training-example> + Training with JumpStart Models <../v3-examples/training-examples/jumpstart-training-example> + Custom Distributed Training <../v3-examples/training-examples/custom-distributed-training-example> + AWS Batch for Training <../v3-examples/training-examples/aws_batch/sm-training-queues_getting_started_with_model_trainer> diff --git a/docs/v3-examples b/docs/v3-examples new file mode 120000 index 0000000000..c3785e8a41 --- /dev/null +++ b/docs/v3-examples @@ -0,0 +1 @@ +../v3-examples \ No newline at end of file diff --git a/v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example.ipynb b/v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example.ipynb index 14a94432fb..d307261bbf 100644 --- a/v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example.ipynb +++ b/v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example.ipynb @@ -1,9 +1,19 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "01b01fe1", + "metadata": {}, + "source": [ + "# Model Registry Operations with XGBoost\n", + "\n", + "Register XGBoost models to SageMaker Model Registry, create models from existing registry entries, and manage model approval workflows." + ] + }, { "cell_type": "code", "execution_count": null, - "id": "01b01fe1", + "id": "743f455f", "metadata": {}, "outputs": [], "source": [ diff --git a/v3-examples/ml-ops-examples/v3-pipeline-train-create-registry.ipynb b/v3-examples/ml-ops-examples/v3-pipeline-train-create-registry.ipynb index 526b38d487..0b46e48550 100644 --- a/v3-examples/ml-ops-examples/v3-pipeline-train-create-registry.ipynb +++ b/v3-examples/ml-ops-examples/v3-pipeline-train-create-registry.ipynb @@ -1,9 +1,18 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "a3df823b-3b2b-4e72-9d0b-2d12f9f6297a", + "metadata": {}, + "source": [ + "# E2E ML Pipeline with Model Registry\n", + "Build a SageMaker Pipeline that processes data, trains a model, and registers it to the Model Registry" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "a3df823b-3b2b-4e72-9d0b-2d12f9f6297a", + "id": "cc01df68", "metadata": {}, "outputs": [], "source": [ diff --git a/v3-examples/ml-ops-examples/v3-processing-job-pytorch/v3-pytorch-processing-example.ipynb b/v3-examples/ml-ops-examples/v3-processing-job-pytorch/v3-pytorch-processing-example.ipynb index a1d87530c1..3f757cd9c5 100644 --- a/v3-examples/ml-ops-examples/v3-processing-job-pytorch/v3-pytorch-processing-example.ipynb +++ b/v3-examples/ml-ops-examples/v3-processing-job-pytorch/v3-pytorch-processing-example.ipynb @@ -1,9 +1,17 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "ec1d0a45", + "metadata": {}, + "source": [ + "# SageMaker V3 PyTorch Processing" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "ec1d0a45", + "id": "6bae3323", "metadata": {}, "outputs": [], "source": [ diff --git a/v3-examples/ml-ops-examples/v3-processing-job-sklearn.ipynb b/v3-examples/ml-ops-examples/v3-processing-job-sklearn.ipynb index f75b5b70fe..9ff552f0e8 100644 --- a/v3-examples/ml-ops-examples/v3-processing-job-sklearn.ipynb +++ b/v3-examples/ml-ops-examples/v3-processing-job-sklearn.ipynb @@ -1,41 +1,21 @@ { "cells": [ { - "cell_type": "code", - "execution_count": 1, + "cell_type": "markdown", "id": "af1c5656", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/shapes/shapes.py:2539: UserWarning: Field name \"json\" in \"MonitoringDatasetFormat\" shadows an attribute in parent \"Base\"\n", - " class MonitoringDatasetFormat(Base):\n" - ] - }, - { - "data": { - "text/html": [ - "
[11/18/25 06:56:43] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1364\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/18/25 06:56:43]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=890998;file:///Volumes/workplace/sagemaker-python-sdk-staging/mlops-venv/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=155466;file:///Volumes/workplace/sagemaker-python-sdk-staging/mlops-venv/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/zeidmo/Library/Application Support/sagemaker/config.yaml\n" - ] - } - ], + "source": [ + "# Data Preprocessing with Scikit-learn Processing Job\n", + "\n", + "Run a Scikit-learn processing job to preprocess the Abalone dataset into train, validation, and test splits." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7181fa90", + "metadata": {}, + "outputs": [], "source": [ "from sagemaker.core.processing import (\n", " ScriptProcessor,\n", @@ -54,24 +34,10 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "3942b640", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/18/25 06:56:45] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1364\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/18/25 06:56:45]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=320044;file:///Volumes/workplace/sagemaker-python-sdk-staging/mlops-venv/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=76749;file:///Volumes/workplace/sagemaker-python-sdk-staging/mlops-venv/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "\n", "sagemaker_session = Session()\n", @@ -114,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "7e14a586", "metadata": {}, "outputs": [], @@ -124,18 +90,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "8ae3ecaa", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Overwriting code/preprocess.py\n" - ] - } - ], + "outputs": [], "source": [ "%%writefile code/preprocess.py\n", "\n", @@ -266,26 +224,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "d414442a", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/18/25 06:58:25] INFO     Creating processing-job with name                                    processing.py:598\n",
-       "                             v3-job-1110-sklearn-preprocess-job-2025-11-18-14-58-25-375                            \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/18/25 06:58:25]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating processing-job with name \u001b]8;id=362812;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/processing.py\u001b\\\u001b[2mprocessing.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=120987;file:///Volumes/workplace/sagemaker-python-sdk-staging/sagemaker-core/src/sagemaker/core/processing.py#598\u001b\\\u001b[2m598\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m v3-job-\u001b[1;36m1110\u001b[0m-sklearn-preprocess-job-\u001b[1;36m2025\u001b[0m-11-18-14-58-25-375 \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Process the training data step using a python script.\n", "# Split the training data set into train, test, and validation datasets\n", @@ -353,33 +295,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "f8b6425d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'InProgress'" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "sklearn_processor.latest_job.refresh()\n", "sklearn_processor.latest_job.processing_job_status" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b6c621dc", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/v3-examples/ml-ops-examples/v3-sagemaker-clarify.ipynb b/v3-examples/ml-ops-examples/v3-sagemaker-clarify.ipynb index 6feca5fb5c..d0f8eddaf2 100644 --- a/v3-examples/ml-ops-examples/v3-sagemaker-clarify.ipynb +++ b/v3-examples/ml-ops-examples/v3-sagemaker-clarify.ipynb @@ -11,23 +11,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/08/25 18:39:19] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1364\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/08/25 18:39:19]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=479005;file:///Users/mollyhe/.pyenv/versions/3.12.2/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=274417;file:///Users/mollyhe/.pyenv/versions/3.12.2/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "import sys\n", "import pandas as pd\n", @@ -61,25 +47,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Dataset shape: (1000, 12)\n", - "Target distribution: target\n", - "1 503\n", - "0 497\n", - "Name: count, dtype: int64\n", - "Gender distribution: gender\n", - "0.0 598\n", - "1.0 402\n", - "Name: count, dtype: int64\n" - ] - } - ], + "outputs": [], "source": [ "# Create synthetic dataset\n", "X, y = make_classification(\n", @@ -113,17 +83,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model accuracy: 0.920\n" - ] - } - ], + "outputs": [], "source": [ "# Split data\n", "X_train, X_test, y_train, y_test = train_test_split(\n", @@ -146,44 +108,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/08/25 18:39:30] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1364\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/08/25 18:39:30]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=514072;file:///Users/mollyhe/.pyenv/versions/3.12.2/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=337498;file:///Users/mollyhe/.pyenv/versions/3.12.2/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
[11/08/25 18:39:31] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1364\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/08/25 18:39:31]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Found credentials in shared credentials file: ~\u001b[38;2;225;0;225m/.aws/\u001b[0m\u001b[38;2;225;0;225mcredentials\u001b[0m \u001b]8;id=820880;file:///Users/mollyhe/.pyenv/versions/3.12.2/lib/python3.12/site-packages/botocore/credentials.py\u001b\\\u001b[2mcredentials.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=366626;file:///Users/mollyhe/.pyenv/versions/3.12.2/lib/python3.12/site-packages/botocore/credentials.py#1364\u001b\\\u001b[2m1364\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Data uploaded to: s3://sagemaker-us-west-2-529088288990/clarify-test/data/test_data.csv\n", - "Output will be saved to: s3://sagemaker-us-west-2-529088288990/clarify-test/output\n" - ] - } - ], + "outputs": [], "source": [ "# Setup S3 paths\n", "session = Session()\n", @@ -219,17 +146,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Configurations created successfully\n" - ] - } - ], + "outputs": [], "source": [ "# Data configuration\n", "data_config = DataConfig(\n", @@ -266,30 +185,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/08/25 18:39:35] INFO     Ignoring unnecessary instance type: None.                            image_uris.py:529\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/08/25 18:39:35]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Ignoring unnecessary instance type: \u001b[3;38;2;225;0;225mNone\u001b[0m. \u001b]8;id=687743;file:///Users/mollyhe/.pyenv/versions/3.12.2/lib/python3.12/site-packages/sagemaker/utils/image_uris.py\u001b\\\u001b[2mimage_uris.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=190982;file:///Users/mollyhe/.pyenv/versions/3.12.2/lib/python3.12/site-packages/sagemaker/utils/image_uris.py#529\u001b\\\u001b[2m529\u001b[0m\u001b]8;;\u001b\\\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Clarify processor created with role: arn:aws:iam::529088288990:role/Admin\n" - ] - } - ], + "outputs": [], "source": [ "# Create Clarify processor\n", "clarify_processor = SageMakerClarifyProcessor(\n", @@ -311,57 +209,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
[11/08/25 18:39:36] INFO     Analysis Config: {'dataset_type': 'text/csv', 'headers': ['feature_0', clarify.py:1992\n",
-       "                             'feature_1', 'feature_2', 'feature_3', 'feature_4', 'feature_5',                      \n",
-       "                             'feature_6', 'feature_7', 'feature_8', 'feature_9', 'gender',                         \n",
-       "                             'target'], 'label': 'target', 'label_values_or_threshold': [1],                       \n",
-       "                             'facet': [{'name_or_index': 'gender', 'value_or_threshold': [1]}],                    \n",
-       "                             'methods': {'report': {'name': 'report', 'title': 'Analysis Report'},                 \n",
-       "                             'pre_training_bias': {'methods': ['CI', 'DPL']}}}                                     \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m[11/08/25 18:39:36]\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Analysis Config: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'dataset_type'\u001b[0m: \u001b[38;2;0;135;0m'text/csv'\u001b[0m, \u001b[38;2;0;135;0m'headers'\u001b[0m: \u001b[1m[\u001b[0m\u001b[38;2;0;135;0m'feature_0'\u001b[0m, \u001b]8;id=70589;file:///Users/mollyhe/.pyenv/versions/3.12.2/lib/python3.12/site-packages/sagemaker/utils/clarify.py\u001b\\\u001b[2mclarify.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=487060;file:///Users/mollyhe/.pyenv/versions/3.12.2/lib/python3.12/site-packages/sagemaker/utils/clarify.py#1992\u001b\\\u001b[2m1992\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'feature_1'\u001b[0m, \u001b[38;2;0;135;0m'feature_2'\u001b[0m, \u001b[38;2;0;135;0m'feature_3'\u001b[0m, \u001b[38;2;0;135;0m'feature_4'\u001b[0m, \u001b[38;2;0;135;0m'feature_5'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'feature_6'\u001b[0m, \u001b[38;2;0;135;0m'feature_7'\u001b[0m, \u001b[38;2;0;135;0m'feature_8'\u001b[0m, \u001b[38;2;0;135;0m'feature_9'\u001b[0m, \u001b[38;2;0;135;0m'gender'\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'target'\u001b[0m\u001b[1m]\u001b[0m, \u001b[38;2;0;135;0m'label'\u001b[0m: \u001b[38;2;0;135;0m'target'\u001b[0m, \u001b[38;2;0;135;0m'label_values_or_threshold'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'facet'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'name_or_index'\u001b[0m: \u001b[38;2;0;135;0m'gender'\u001b[0m, \u001b[38;2;0;135;0m'value_or_threshold'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'methods'\u001b[0m: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'report'\u001b[0m: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'name'\u001b[0m: \u001b[38;2;0;135;0m'report'\u001b[0m, \u001b[38;2;0;135;0m'title'\u001b[0m: \u001b[38;2;0;135;0m'Analysis Report'\u001b[0m\u001b[1m}\u001b[0m, \u001b[2m \u001b[0m\n", - "\u001b[2;36m \u001b[0m \u001b[38;2;0;135;0m'pre_training_bias'\u001b[0m: \u001b[1m{\u001b[0m\u001b[38;2;0;135;0m'methods'\u001b[0m: \u001b[1m[\u001b[0m\u001b[38;2;0;135;0m'CI'\u001b[0m, \u001b[38;2;0;135;0m'DPL'\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
                    INFO     Creating processing-job with name                                    processing.py:598\n",
-       "                             Clarify-Pretraining-Bias-2025-11-09-02-39-36-699                                      \n",
-       "
\n" - ], - "text/plain": [ - "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[1;38;2;0;105;255mINFO \u001b[0m Creating processing-job with name \u001b]8;id=100415;file:///Users/mollyhe/.pyenv/versions/3.12.2/lib/python3.12/site-packages/sagemaker/utils/processing.py\u001b\\\u001b[2mprocessing.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=219709;file:///Users/mollyhe/.pyenv/versions/3.12.2/lib/python3.12/site-packages/sagemaker/utils/processing.py#598\u001b\\\u001b[2m598\u001b[0m\u001b]8;;\u001b\\\n", - "\u001b[2;36m \u001b[0m Clarify-Pretraining-Bias-\u001b[1;36m2025\u001b[0m-11-09-02-39-36-699 \u001b[2m \u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Pre-training bias analysis job submitted successfully\n" - ] - } - ], + "outputs": [], "source": [ "# Run pre-training bias analysis (no model needed)\n", "try:\n", @@ -381,15 +231,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Status: Completed\n" - ] - } - ], + "outputs": [], "source": [ "# You can go to SageMaker AI console -> Processing jobs and check the job status\n", "# Or you can run the below command\n", @@ -408,19 +250,9 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Bias analysis config generated successfully\n", - "Config keys: ['dataset_type', 'headers', 'label', 'label_values_or_threshold', 'facet', 'methods']\n", - "✅ All required keys present in config\n" - ] - } - ], + "outputs": [], "source": [ "# Test the internal config generation\n", "from sagemaker.core.clarify import _AnalysisConfigGenerator\n", @@ -458,17 +290,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Schema validation passed\n" - ] - } - ], + "outputs": [], "source": [ "# Test schema validation\n", "from sagemaker.core.clarify import ANALYSIS_CONFIG_SCHEMA_V1_0\n", diff --git a/v3-examples/ml-ops-examples/v3-transform-job-example.ipynb b/v3-examples/ml-ops-examples/v3-transform-job-example.ipynb index fb667dac0f..9720b7c82f 100644 --- a/v3-examples/ml-ops-examples/v3-transform-job-example.ipynb +++ b/v3-examples/ml-ops-examples/v3-transform-job-example.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "9fff3165", + "metadata": {}, + "source": [ + "# Batch Transform Job with XGBoost Model\n", + "\n", + "Deploy an XGBoost model and run batch inference using SageMaker Transform Job to generate predictions on validation data." + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/v3-examples/model-customization-examples/ai_registry_example.ipynb b/v3-examples/model-customization-examples/ai_registry_example.ipynb index c33f2ff41f..912ecd8885 100644 --- a/v3-examples/model-customization-examples/ai_registry_example.ipynb +++ b/v3-examples/model-customization-examples/ai_registry_example.ipynb @@ -1,30 +1,38 @@ { "cells": [ { - "cell_type": "code", + "cell_type": "markdown", "id": "initial_id", "metadata": {}, + "source": [ + "# Creating assets for model customization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dccc2c3", + "metadata": {}, + "outputs": [], "source": [ "from rich.pretty import pprint\n", "\n", "from sagemaker.ai_registry.air_constants import REWARD_FUNCTION, REWARD_PROMPT\n", "from sagemaker.ai_registry.dataset import DataSet, CustomizationTechnique\n", "from sagemaker.ai_registry.evaluator import Evaluator" - ], - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "id": "665a0e71fef89bde", + "metadata": {}, + "outputs": [], "source": [ "# Configure AWS credentials and region\n", "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", "#! aws configure set region us-west-2" - ], - "id": "665a0e71fef89bde" + ] }, { "cell_type": "markdown", @@ -47,8 +55,10 @@ }, { "cell_type": "code", + "execution_count": null, "id": "2234f21780b91625", "metadata": {}, + "outputs": [], "source": [ "\n", "# 1. S3 Data source\n", @@ -58,57 +68,55 @@ " # or use local filepath as source.\n", " # customization_technique=CustomizationTechnique.SFT\n", " )" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "ee2980471f8ae0c0", "metadata": {}, + "outputs": [], "source": [ "# Refreshes status from hub\n", "dataset.refresh()\n", "pprint(dataset.__dict__)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "30c1b17ad232110b", "metadata": {}, + "outputs": [], "source": [ "versions = dataset.get_versions()\n", "pprint(versions.__dict__)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "332be046d91fcefc", "metadata": {}, + "outputs": [], "source": [ "# delete specific version\n", "dataset.delete(version=\"0.0.4\")\n", "#dataset.delete(version=\"use a version from versions\")\n", "#pprint(versions)\n", "# specified deleted version should not be part of output" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "510d1a015e7a565c", "metadata": {}, + "outputs": [], "source": [ "# deletes all versions of this dataset by default\n", "dataset.delete()" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -120,16 +128,16 @@ }, { "cell_type": "code", + "execution_count": null, "id": "d89a8741dd64f92e", "metadata": {}, + "outputs": [], "source": [ "#Optional max_results argument for pagination or else use default config\n", "datasets = DataSet.get_all(max_results=2)\n", "for dataset in datasets:\n", " pprint(dataset)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -141,21 +149,23 @@ }, { "cell_type": "code", + "execution_count": null, "id": "572d4184cf42c7fa", "metadata": {}, + "outputs": [], "source": [ "# Use a dataset from iterator\n", "dataset = next(DataSet.get_all(max_results=2))\n", "for dataset in datasets:\n", " pprint(dataset.__dict__)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "ae056f626cd7e931", "metadata": {}, + "outputs": [], "source": [ "# Use a dataset by name\n", "dataset = DataSet.get(name=\"sdkv3-gen-ds2\")\n", @@ -163,31 +173,29 @@ "\n", "# We can do CRUD operation on this DataSet\n", "# e.g. dataset.delete()" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "44d7a8150b4b7846", "metadata": {}, + "outputs": [], "source": [ "#Create a new version of this dataset\n", "dataset.create_version(source=\"s3:///datasets/test_ds\")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "ba3ae7101c5281de", "metadata": {}, + "outputs": [], "source": [ "versions = dataset.get_versions()\n", "pprint(versions)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -199,8 +207,10 @@ }, { "cell_type": "code", + "execution_count": null, "id": "2d0ff33265d2c8dd", "metadata": {}, + "outputs": [], "source": [ "# Method : Lambda\n", "evaluator = Evaluator.create(\n", @@ -209,14 +219,14 @@ " type=REWARD_FUNCTION\n", "\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "ab2896e0b68b9384", "metadata": {}, + "outputs": [], "source": [ "# Method : BYOC\n", "\n", @@ -225,14 +235,14 @@ " source=\"/path_to_local/eval_lambda_1.py\",\n", " type = REWARD_FUNCTION\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "813243a997e3946b", "metadata": {}, + "outputs": [], "source": [ "# Reward Prompt\n", "evaluator = Evaluator.create(\n", @@ -240,101 +250,99 @@ " source=\"/path_to_local/custom_prompt.jinja\",\n", " type = REWARD_PROMPT\n", ")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "a7aef9b8a54766eb", "metadata": {}, + "outputs": [], "source": [ "# Optional wait, by default we have wait = True during create call.\n", "evaluator.wait()" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "13ff6d34eab34a07", "metadata": {}, + "outputs": [], "source": [ "evaluator.refresh()\n", "pprint(evaluator)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "345214df-f320-4de0-ba97-860429f1f5bb", "metadata": {}, + "outputs": [], "source": [ "# Optional max_results for pagination\n", "evaluators = Evaluator.get_all(max_results=2)\n", "for evaluator in evaluators:\n", " pprint(evaluator)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "b0f2cb26d5bb9a08", "metadata": {}, + "outputs": [], "source": [ "# Get evaluators by type\n", "evaluators = Evaluator.get_all(type='RewardPrompt', max_results=2)\n", "for evaluator in evaluators:\n", " pprint(evaluator)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "1c62ec2f94eb9ac5", "metadata": {}, + "outputs": [], "source": [ "# Get an evaluator by name\n", "evaluator = Evaluator.get(name=\"sdk-new-rf11\")\n", "pprint(evaluator)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "b1a2154e870e623c", "metadata": {}, + "outputs": [], "source": [ "evaluator.create_version(source=evaluator.reference)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "72faf70127208509", "metadata": {}, + "outputs": [], "source": [ "versions = evaluator.get_versions()\n", "pprint(versions)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "id": "0dc1107a-126b-4484-9639-07ba5de4ade6", "metadata": {}, + "outputs": [], "source": [ "# delete evaluator, option version argument or delete all versions.\n", "evaluator.delete()" - ], - "outputs": [], - "execution_count": null + ] } ], "metadata": { diff --git a/v3-examples/model-customization-examples/bedrock-modelbuilder-deployment.ipynb b/v3-examples/model-customization-examples/bedrock-modelbuilder-deployment.ipynb index cac62ffc56..013fb8002a 100644 --- a/v3-examples/model-customization-examples/bedrock-modelbuilder-deployment.ipynb +++ b/v3-examples/model-customization-examples/bedrock-modelbuilder-deployment.ipynb @@ -8,10 +8,10 @@ ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# Configure AWS credentials and region\n", "#! ada credentials update --provider=isengard --account=<> --role=Admin --profile=default --once\n", @@ -20,7 +20,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Setup\n", "import boto3\n", @@ -29,26 +31,26 @@ "import random\n", "from sagemaker.core.resources import TrainingJob\n", "from sagemaker.serve.bedrock_model_builder import BedrockModelBuilder" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Configuration\n", "TRAINING_JOB_NAME = 'meta-textgeneration-llama-3-2-1b-instruct-sft-20251123162832'\n", "ROLE_ARN = \"arn:aws:iam::<>:role/Admin\"\n", "REGION = 'us-west-2'\n", "BUCKET = 'open-models-testing-pdx'" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Step 1: Get training job and prepare model path\n", "training_job = TrainingJob.get(training_job_name=TRAINING_JOB_NAME)\n", @@ -58,13 +60,13 @@ "base_s3_path = training_job.model_artifacts.s3_model_artifacts\n", "hf_model_path = base_s3_path.rstrip('/') + '/checkpoints/hf_merged/'\n", "print(f\"Using HF model path: {hf_model_path}\")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Step 2: Verify required files exist\n", "s3_client = boto3.client('s3', region_name=REGION)\n", @@ -79,13 +81,13 @@ " print(f\"✅ {file}\")\n", " except:\n", " print(f\"❌ {file} - MISSING\")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Step 3: Create missing tokenizer files if needed\n", "def ensure_tokenizer_files():\n", @@ -103,13 +105,13 @@ " print(\"✅ Created added_tokens.json\")\n", "\n", "ensure_tokenizer_files()" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Debug: Check what's actually in the S3 bucket\n", "print(\"Checking S3 structure...\")\n", @@ -159,13 +161,13 @@ " print(\"No files found in hf_merged directory\")\n", "except Exception as e:\n", " print(f\"Error: {e}\")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Step 4: Create Bedrock model builder and deploy\n", "job_name = f\"bedrock-import-{random.randint(1000, 9999)}-{int(time.time())}\"\n", @@ -185,13 +187,13 @@ "\n", "job_arn = deployment_result['jobArn']\n", "print(f\"Import job started: {job_arn}\")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Step 5: Wait for import to complete\n", "bedrock_client = boto3.client('bedrock', region_name=REGION)\n", @@ -214,13 +216,13 @@ " break\n", " \n", " time.sleep(30)" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Step 6: Test inference with correct format\n", "if 'imported_model_arn' in locals():\n", @@ -267,31 +269,31 @@ " print(\"❌ Both formats failed. Check model documentation for correct format.\")\n", "else:\n", " print(\"❌ Import failed, cannot test inference\")" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Optional: List all imported models\n", "models = bedrock_client.list_imported_models()\n", "print(\"\\nAll imported models:\")\n", "for model in models['modelSummaries']:\n", " print(f\"- {model['modelName']}: {model['modelArn']}\")" - ], - "outputs": [], - "execution_count": null + ] }, { + "cell_type": "code", + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2025-11-27T01:09:24.972978Z", "start_time": "2025-11-27T01:09:18.454635Z" } }, - "cell_type": "code", + "outputs": [], "source": [ "from pprint import pprint\n", "from sagemaker.core.resources import TrainingJob\n", @@ -300,73 +302,18 @@ "training_job = TrainingJob.get(training_job_name=\"kssharda-sft-lora-lite-2-ui-run-2bn3c-<>8\",\n", " region=\"us-east-1\")\n", "pprint(training_job.model_artifacts.s3_model_artifacts)\n" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "\u001B[2;36m[11/26/25 17:09:22]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Found credentials in shared credentials file: ~\u001B[38;2;225;0;225m/.aws/\u001B[0m\u001B[38;2;225;0;225mcredentials\u001B[0m \u001B]8;id=147201;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/venv/lib/python3.12/site-packages/botocore/credentials.py\u001B\\\u001B[2mcredentials.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=746538;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/venv/lib/python3.12/site-packages/botocore/credentials.py#1392\u001B\\\u001B[2m1392\u001B[0m\u001B]8;;\u001B\\\n" - ], - "text/html": [ - "
[11/26/25 17:09:22] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1392\n",
-       "
\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml\n", - "sagemaker.config INFO - Not applying SDK defaults from location: /Users/nargokul/Library/Application Support/sagemaker/config.yaml\n" - ] - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m[11/26/25 17:09:24]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Runs on sagemaker us-east-\u001B[1;36m1\u001B[0m, region:us-east-\u001B[1;36m1\u001B[0m \u001B]8;id=46858;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-core/src/sagemaker/core/utils/utils.py\u001B\\\u001B[2mutils.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=786052;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-core/src/sagemaker/core/utils/utils.py#354\u001B\\\u001B[2m354\u001B[0m\u001B]8;;\u001B\\\n" - ], - "text/html": [ - "
[11/26/25 17:09:24] INFO     Runs on sagemaker us-east-1, region:us-east-1                             utils.py:354\n",
-       "
\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Found credentials in shared credentials file: ~\u001B[38;2;225;0;225m/.aws/\u001B[0m\u001B[38;2;225;0;225mcredentials\u001B[0m \u001B]8;id=763694;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/venv/lib/python3.12/site-packages/botocore/credentials.py\u001B\\\u001B[2mcredentials.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=33577;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/venv/lib/python3.12/site-packages/botocore/credentials.py#1392\u001B\\\u001B[2m1392\u001B[0m\u001B]8;;\u001B\\\n" - ], - "text/html": [ - "
                    INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1392\n",
-       "
\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "'s3://nova-studio-output-data/sft/final/kssharda-sft-lora-lite-2-ui-run-2bn3c-<>8/output/model'\n" - ] - } - ], - "execution_count": 1 + ] }, { + "cell_type": "code", + "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2025-11-27T01:09:30.542741Z", "start_time": "2025-11-27T01:09:28.668735Z" } }, - "cell_type": "code", + "outputs": [], "source": [ "\n", "bedrock_model_builder = BedrockModelBuilder(\n", @@ -376,149 +323,20 @@ "bedrock_model_builder.deploy(job_name = \"nargokul-26-01\",\n", " custom_model_name = \"nargokul-26-01\",\n", " role_arn=\"arn:aws:iam::<>:role/Admin\")" - ], - "outputs": [ - { - "data": { - "text/plain": [ - "\u001B[2;36m[11/26/25 17:09:28]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Found credentials in shared credentials file: ~\u001B[38;2;225;0;225m/.aws/\u001B[0m\u001B[38;2;225;0;225mcredentials\u001B[0m \u001B]8;id=892830;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/venv/lib/python3.12/site-packages/botocore/credentials.py\u001B\\\u001B[2mcredentials.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=908475;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/venv/lib/python3.12/site-packages/botocore/credentials.py#1392\u001B\\\u001B[2m1392\u001B[0m\u001B]8;;\u001B\\\n" - ], - "text/html": [ - "
[11/26/25 17:09:28] INFO     Found credentials in shared credentials file: ~/.aws/credentials   credentials.py:1392\n",
-       "
\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m[11/26/25 17:09:29]\u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m S3 artifacts path: \u001B]8;id=340743;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py\u001B\\\u001B[2mbedrock_model_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=618013;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py#209\u001B\\\u001B[2m209\u001B[0m\u001B]8;;\u001B\\\n", - "\u001B[2;36m \u001B[0m s3:\u001B[38;2;225;0;225m/\u001B[0m\u001B[38;2;225;0;225m/nova-studio-output-data/sft/final/kssharda-sft-lora-\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mlite-2-ui-run-2bn3c-<>8/output/\u001B[0m\u001B[38;2;225;0;225mmodel\u001B[0m \u001B[2m \u001B[0m\n" - ], - "text/html": [ - "
[11/26/25 17:09:29] INFO     S3 artifacts path:                                        bedrock_model_builder.py:209\n",
-       "                             s3://nova-studio-output-data/sft/final/kssharda-sft-lora-                             \n",
-       "                             lite-2-ui-run-2bn3c-<>8/output/model                                        \n",
-       "
\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Manifest path: \u001B]8;id=541474;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py\u001B\\\u001B[2mbedrock_model_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=80220;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py#216\u001B\\\u001B[2m216\u001B[0m\u001B]8;;\u001B\\\n", - "\u001B[2;36m \u001B[0m s3:\u001B[38;2;225;0;225m/\u001B[0m\u001B[38;2;225;0;225m/nova-studio-output-data/sft/final/kssharda-sft-lora-\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mlite-2-ui-run-2bn3c-<>8/output/output/\u001B[0m\u001B[38;2;225;0;225mmanifest.\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mjson\u001B[0m \u001B[2m \u001B[0m\n" - ], - "text/html": [ - "
                    INFO     Manifest path:                                            bedrock_model_builder.py:216\n",
-       "                             s3://nova-studio-output-data/sft/final/kssharda-sft-lora-                             \n",
-       "                             lite-2-ui-run-2bn3c-<>8/output/output/manifest.                             \n",
-       "                             json                                                                                  \n",
-       "
\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Looking for manifest at \u001B]8;id=356570;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py\u001B\\\u001B[2mbedrock_model_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=618595;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py#223\u001B\\\u001B[2m223\u001B[0m\u001B]8;;\u001B\\\n", - "\u001B[2;36m \u001B[0m s3:\u001B[38;2;225;0;225m/\u001B[0m\u001B[38;2;225;0;225m/nova-studio-output-data/sft/final/kssharda-sft-lora-\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mlite-2-ui-run-2bn3c-<>8/output/output/\u001B[0m\u001B[38;2;225;0;225mmanifest.\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225mjson\u001B[0m \u001B[2m \u001B[0m\n" - ], - "text/html": [ - "
                    INFO     Looking for manifest at                                   bedrock_model_builder.py:223\n",
-       "                             s3://nova-studio-output-data/sft/final/kssharda-sft-lora-                             \n",
-       "                             lite-2-ui-run-2bn3c-<>8/output/output/manifest.                             \n",
-       "                             json                                                                                  \n",
-       "
\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Manifest content: \u001B[1m{\u001B[0m\u001B[38;2;0;135;0m'checkpoint_s3_bucket'\u001B[0m: \u001B]8;id=291479;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py\u001B\\\u001B[2mbedrock_model_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=238165;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py#229\u001B\\\u001B[2m229\u001B[0m\u001B]8;;\u001B\\\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;0;135;0m's3://customer-escrow-<>-smtj-3ff597fc/kssharda\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;0;135;0m-sft-lora-lite-2-ui-run-2bn3c-<>8/step_4'\u001B[0m, \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;0;135;0m'intermediate_checkpoints'\u001B[0m: \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[1m[\u001B[0m\u001B[38;2;0;135;0m's3://customer-escrow-<>-smtj-3ff597fc/ksshard\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;0;135;0ma-sft-lora-lite-2-ui-run-2bn3c-<>8/step_3'\u001B[0m\u001B[1m]\u001B[0m\u001B[1m}\u001B[0m \u001B[2m \u001B[0m\n" - ], - "text/html": [ - "
                    INFO     Manifest content: {'checkpoint_s3_bucket':                bedrock_model_builder.py:229\n",
-       "                             's3://customer-escrow-<>-smtj-3ff597fc/kssharda                             \n",
-       "                             -sft-lora-lite-2-ui-run-2bn3c-<>8/step_4',                                  \n",
-       "                             'intermediate_checkpoints':                                                           \n",
-       "                             ['s3://customer-escrow-<>-smtj-3ff597fc/ksshard                             \n",
-       "                             a-sft-lora-lite-2-ui-run-2bn3c-<>8/step_3']}                                \n",
-       "
\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[1;38;2;0;105;255mINFO \u001B[0m Checkpoint URI: \u001B]8;id=545156;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py\u001B\\\u001B[2mbedrock_model_builder.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=779715;file:///Users/nargokul/workspace/sagemaker-python-sdk-staging-1/sagemaker-serve/src/sagemaker/serve/bedrock_model_builder.py#236\u001B\\\u001B[2m236\u001B[0m\u001B]8;;\u001B\\\n", - "\u001B[2;36m \u001B[0m s3:\u001B[38;2;225;0;225m/\u001B[0m\u001B[38;2;225;0;225m/customer-escrow-<>-smtj-3ff597fc/kssharda-\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[38;2;225;0;225msft-lora-lite-2-ui-run-2bn3c-<>8/\u001B[0m\u001B[38;2;225;0;225mstep_4\u001B[0m \u001B[2m \u001B[0m\n" - ], - "text/html": [ - "
                    INFO     Checkpoint URI:                                           bedrock_model_builder.py:236\n",
-       "                             s3://customer-escrow-<>-smtj-3ff597fc/kssharda-                             \n",
-       "                             sft-lora-lite-2-ui-run-2bn3c-<>8/step_4                                     \n",
-       "
\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "{'ResponseMetadata': {'RequestId': '95bc35c0-0f8e-48cb-95e2-00fb77b17b4d',\n", - " 'HTTPStatusCode': 202,\n", - " 'HTTPHeaders': {'date': 'Thu, 27 Nov 2025 01:09:30 GMT',\n", - " 'content-type': 'application/json',\n", - " 'content-length': '88',\n", - " 'connection': 'keep-alive',\n", - " 'x-amzn-requestid': '95bc35c0-0f8e-48cb-95e2-00fb77b17b4d'},\n", - " 'RetryAttempts': 0},\n", - " 'modelArn': 'arn:aws:bedrock:us-east-1:<>:custom-model/imported/pl4keb8mfank'}" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 2 + ] }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from sagemaker.ai_registry.dataset import DataSet\n", "\n", "dataset = DataSet.get(name=\"arn:aws:sagemaker:us-east-1:<>:hub-content/MDG6N5CA58D0IJMC1OPJOPIKOS2VPPLP0AM6UBOT9D73B8A34HTG/DataSet/nova-2-0-sft-dataset/1.0.0\")\n", "\n", "pprint(dataset.__dict__)" - ], - "outputs": [], - "execution_count": null + ] } ], "metadata": { diff --git a/v3-examples/model-customization-examples/benchmark_demo.ipynb b/v3-examples/model-customization-examples/benchmark_demo.ipynb index 442d93b690..557e01ec35 100644 --- a/v3-examples/model-customization-examples/benchmark_demo.ipynb +++ b/v3-examples/model-customization-examples/benchmark_demo.ipynb @@ -16,7 +16,7 @@ "## Step 1: Discover Available Benchmarks\n", "\n", "Discover the benchmark properties and available options:\n", - "https://docs.aws.amazon.com/sagemaker/latest/dg/nova-model-evaluation.html" + "[Nova Model Evaluation](https://docs.aws.amazon.com/sagemaker/latest/dg/nova-model-evaluation.html)" ] }, { diff --git a/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb b/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb new file mode 100644 index 0000000000..20c51e562e --- /dev/null +++ b/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb @@ -0,0 +1,1087 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "babaeb90", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "novaTrainingJobNotebookHeaderMarkdown" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Model Customization using SageMaker Training Job" + ] + }, + { + "cell_type": "markdown", + "id": "a16fc6c1-c18f-4a06-ae98-36b12ec72ab3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This notebook provides an end-to-end walkthrough for creating SageMaker Training job using a SageMaker Nova model and deploy it for inference." + ] + }, + { + "cell_type": "markdown", + "id": "940f9af2-cb1e-40be-839d-48db014d67f1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Setup and Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84cf410f", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --upgrade sagemaker --quiet # restart the kernel after running this cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "234f7398-fd6b-4d02-a406-0491924c461d", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import os\n", + "import json\n", + "import boto3\n", + "from rich.pretty import pprint\n", + "from sagemaker.core.helper.session_helper import Session\n", + "\n", + "REGION = boto3.Session().region_name\n", + "sm_client = boto3.client(\"sagemaker\", region_name=REGION)\n", + "\n", + "# Create SageMaker session\n", + "sagemaker_session = Session(sagemaker_client=sm_client)\n", + "\n", + "print(f\"Region: {REGION}\")\n", + "\n", + "# For MLFlow native metrics in Trainer wait, run below line with appropriate region\n", + "os.environ[\"SAGEMAKER_MLFLOW_CUSTOM_ENDPOINT\"] = f\"https://mlflow.sagemaker.{REGION}.app.aws\"" + ] + }, + { + "cell_type": "markdown", + "id": "b9bf5959", + "metadata": {}, + "source": [ + "#### Create Training Dataset\n", + "Below section provides sample code to create the training dataset arn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39aaeb1d", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.ai_registry.dataset import DataSet\n", + "from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n", + "\n", + "# Register dataset in SageMaker AI Registry. This creates a versioned dataset that can be referenced by ARN\n", + "dataset = DataSet.create(\n", + " name=\"demo-sft-dataset\",\n", + " source=\"s3://your-bucket/dataset/training_dataset.jsonl\", # source can be S3 or local path\n", + " #customization_technique=CUSTOMIZATION_TECHNIQUE.SFT # or DPO or RLVR\n", + " # Optional technique name for minimal dataset format check.\n", + " wait=True\n", + ")\n", + "\n", + "print(f\"TRAINING_DATASET ARN: {dataset.arn}\")\n", + "# TRAINING_DATASET = dataset.arn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea22bd22", + "metadata": {}, + "outputs": [], + "source": [ + "# Required Configs\n", + "BASE_MODEL = \"\"\n", + "\n", + "# MODEL_PACKAGE_GROUP_NAME is same as CUSTOM_MODEL_NAME\n", + "MODEL_PACKAGE_GROUP_NAME = \"\"\n", + "\n", + "TRAINING_DATASET = \"\"\n", + "\n", + "S3_OUTPUT_PATH = \"\"\n", + "\n", + "ROLE_ARN = \"\"" + ] + }, + { + "cell_type": "markdown", + "id": "259aca67d3d3863b", + "metadata": {}, + "source": [ + "#### Create Model Package Group" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90a1069d19eeee7", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.core.resources import ModelPackageGroup\n", + "model_package_group = ModelPackageGroup.create(\n", + " model_package_group_name=MODEL_PACKAGE_GROUP_NAME,\n", + " model_package_group_description='' # Required Description\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "1c931764", + "metadata": {}, + "source": [ + "## Part 1: Fine-tuning\n", + "\n", + "### Step 1: Creating the Trainer" + ] + }, + { + "cell_type": "markdown", + "id": "f23e67f7", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "" + }, + "source": [ + "#### Choose one of the following trainer techniques:\n", + "- **Option 1: SFT Trainer (Supervised Fine-Tuning)** \n", + "- **Option 2: Create RLVRTrainer (Reinforcement Learning with Verifiable Rewards)**. \n", + "- **Option 3: DPO Trainer (Direct Preference Optimization)** \n", + "\n", + "**Instructions:** Run only ONE of the trainers, not all of them." + ] + }, + { + "cell_type": "markdown", + "id": "32fd436b", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "SFT" + }, + "source": [ + "#### Create SFT Trainer (Supervised Fine-Tuning)\n", + "\n", + "##### Key Parameters:\n", + "* `model`: base_model id on Sagemaker Hubcontent that is available to finetune (or) ModelPackage artifacts\n", + "* `training_type`: Choose from TrainingType Enum(sagemaker.train.common) either LORA OR FULL. (optional)\n", + "* `model_package_group`: ModelPackage group name or ModelPackageGroup (optional)\n", + "* `mlflow_resource_arn`: MLFlow app ARN to track the training job (optional)\n", + "* `mlflow_experiment_name`: MLFlow app experiment name(str) (optional)\n", + "* `mlflow_run_name`: MLFlow app run name(str) (optional)\n", + "* `training_dataset`: Training Dataset - either Dataset ARN or S3 Path of the dataset (Please note these are required for a training job to run, can be either provided via Trainer or .train()) (optional)\n", + "* `validation_dataset`: Validation Dataset - either Dataset ARN or S3 Path of the dataset (optional)\n", + "* `s3_output_path`: S3 path for the trained model artifacts (optional)\n", + "* `base_job_name` : Unique job name (optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "062953d8", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "SFT" + }, + "outputs": [], + "source": [ + "from sagemaker.train.sft_trainer import SFTTrainer\n", + "from sagemaker.train.common import TrainingType\n", + "\n", + "trainer = SFTTrainer(\n", + " model=BASE_MODEL,\n", + " training_type=TrainingType.LORA,\n", + " model_package_group=model_package_group,\n", + " training_dataset=TRAINING_DATASET,\n", + " s3_output_path=S3_OUTPUT_PATH,\n", + " sagemaker_session=sagemaker_session,\n", + " role=ROLE_ARN\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "cd93226c", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "" + }, + "source": [ + "### OR" + ] + }, + { + "cell_type": "markdown", + "id": "1b5603ee", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "RLVR" + }, + "source": [ + "#### Create RLVRTrainer (Reinforcement Learning with Verifiable Rewards)\n", + "\n", + "##### Key Parameters:\n", + "* `model`: base_model id on Sagemaker Hubcontent that is available to finetune (or) ModelPackage artifacts\n", + "* `custom_reward_function`: Custom reward function/Evaluator ARN (optional)\n", + "* `model_package_group`: ModelPackage group name or ModelPackageGroup (optional)\n", + "* `mlflow_resource_arn`: MLFlow app ARN to track the training job (optional)\n", + "* `mlflow_experiment_name`: MLFlow app experiment name(str) (optional)\n", + "* `mlflow_run_name`: MLFlow app run name(str) (optional)\n", + "* `training_dataset`: Training Dataset - either Dataset ARN or S3 Path of the dataset (Please note these are required for a training job to run, can be either provided via Trainer or .train()) (optional)\n", + "* `validation_dataset`: Validation Dataset - either Dataset ARN or S3 Path of the dataset (optional)\n", + "* `s3_output_path`: S3 path for the trained model artifacts (optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5aa51a5f", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "RLVR" + }, + "outputs": [], + "source": [ + "from sagemaker.train.rlvr_trainer import RLVRTrainer\n", + "\n", + "trainer = RLVRTrainer(\n", + " model=BASE_MODEL,\n", + " model_package_group=model_package_group,\n", + " training_dataset=TRAINING_DATASET,\n", + " s3_output_path=S3_OUTPUT_PATH,\n", + " sagemaker_session=sagemaker_session,\n", + " role=ROLE_ARN\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a61dbe9f", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "" + }, + "source": [ + "### OR" + ] + }, + { + "cell_type": "markdown", + "id": "88f8bfde", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "DPO" + }, + "source": [ + "#### Create DPO Trainer (Direct Preference Optimization)\n", + "\n", + "Direct Preference Optimization (DPO) is a method for training language models to follow human preferences. Unlike traditional RLHF (Reinforcement Learning from Human Feedback), DPO directly optimizes the model using preference pairs without needing a reward model.\n", + "\n", + "##### Key Parameters:\n", + "- `model` Base model to fine-tune (from SageMaker Hub)\n", + "- `training_type` Fine-tuning method (LoRA recommended for efficiency)\n", + "- `training_dataset` ARN of the registered preference dataset\n", + "- `model_package_group` Where to store the fine-tuned model\n", + "- `mlflow_resource_arn` MLflow tracking server for experiment logging " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b74c57f8", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "DPO" + }, + "outputs": [], + "source": [ + "from sagemaker.train.dpo_trainer import DPOTrainer\n", + "from sagemaker.train.common import TrainingType\n", + "\n", + "trainer = DPOTrainer(\n", + " model=BASE_MODEL,\n", + " training_type=TrainingType.LORA,\n", + " model_package_group=model_package_group,\n", + " training_dataset=TRAINING_DATASET,\n", + " s3_output_path=S3_OUTPUT_PATH,\n", + " sagemaker_session=sagemaker_session,\n", + " role=ROLE_ARN\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "514186e9", + "metadata": {}, + "source": [ + "### Step 2: Get Finetuning Options and Modify" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f6eeb5e", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Default Finetuning Options:\")\n", + "pprint(trainer.hyperparameters.to_dict())\n", + "\n", + "# Modify options like object attributes\n", + "trainer.hyperparameters.learning_rate = 0.0002\n", + "\n", + "print(\"\\nModified/User defined Options:\")\n", + "pprint(trainer.hyperparameters.to_dict())" + ] + }, + { + "cell_type": "markdown", + "id": "18f4e5df", + "metadata": {}, + "source": [ + "### Step 3: Start Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31690f41", + "metadata": {}, + "outputs": [], + "source": [ + "training_job = trainer.train(wait=True)\n", + "\n", + "TRAINING_JOB_NAME = training_job.training_job_name\n", + "\n", + "pprint(training_job)" + ] + }, + { + "cell_type": "markdown", + "id": "60b77a45", + "metadata": {}, + "source": [ + "### Step 4: Describe Training job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9277fde0", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.core.resources import TrainingJob\n", + "\n", + "response = TrainingJob.get(training_job_name=TRAINING_JOB_NAME)\n", + "pprint(response)" + ] + }, + { + "cell_type": "markdown", + "id": "evaluation-section", + "metadata": {}, + "source": [ + "# Part 2: Model Evaluation\n", + "\n", + "This section demonstrates the basic user-facing flow for creating and managing evaluation jobs" + ] + }, + { + "cell_type": "markdown", + "id": "cleanup-pipeline", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Step 1: Create BenchmarkEvaluator\n", + "\n", + "Create a BenchmarkEvaluator instance with the desired benchmark. The evaluator will use Jinja2 templates to render a complete pipeline definition.\n", + "\n", + "### Key Parameters:\n", + "- `benchmark`: Benchmark type from the Benchmark enum\n", + "- `model`: Model ARN from SageMaker hub content\n", + "- `s3_output_path`: S3 location for evaluation outputs\n", + "- `mlflow_resource_arn`: MLflow tracking server ARN for experiment tracking (optional)\n", + "- `model_package_group`: Model package group ARN (optional)\n", + "- `source_model_package`: Source model package ARN (optional)\n", + "- `model_artifact`: ARN of model artifact for lineage tracking (auto-inferred from source_model_package) (optional)\n", + "\n", + "**Note:** When you call `evaluate()`, the system will start evaluation job. The evaluator will:\n", + "1. Build template context with all required parameters\n", + "2. Render the pipeline definition from `DETERMINISTIC_TEMPLATE` using Jinja2\n", + "3. Create or update the pipeline with the rendered definition\n", + "4. Start the pipeline execution with empty parameters (all values pre-substituted) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "delete-existing-pipeline", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.train.evaluate import BenchMarkEvaluator\n", + "from sagemaker.train.evaluate import get_benchmarks, get_benchmark_properties\n", + "from rich.pretty import pprint\n", + "import logging\n", + "logging.basicConfig(\n", + " level=logging.INFO,\n", + " format='%(levelname)s - %(name)s - %(message)s'\n", + ")\n", + "\n", + "# Get available benchmarks\n", + "Benchmark = get_benchmarks()\n", + "pprint(list(Benchmark))\n", + "\n", + "# Print properties for a specific benchmark\n", + "pprint(get_benchmark_properties(benchmark=Benchmark.GEN_QA))\n", + "\n", + "\n", + "# Create evaluator with GEN_QA benchmark\n", + "evaluator = BenchMarkEvaluator(\n", + " benchmark=Benchmark.GEN_QA,\n", + " model=BASE_MODEL,\n", + " s3_output_path=S3_OUTPUT_PATH,\n", + ")\n", + "\n", + "pprint(evaluator)" + ] + }, + { + "cell_type": "markdown", + "id": "run-evaluation", + "metadata": {}, + "source": [ + "## Step 2: Run Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "start-evaluation", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Run evaluation\n", + "execution = evaluator.evaluate()\n", + "\n", + "print(f\"Evaluation job started!\")\n", + "print(f\"Job ARN: {execution.arn}\")\n", + "print(f\"Job Name: {execution.name}\")\n", + "print(f\"Status: {execution.status.overall_status}\")\n", + "\n", + "pprint(execution)" + ] + }, + { + "cell_type": "markdown", + "id": "a3de8255-9f98-444a-99a6-cfe7cc2584af", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Step 3: Monitor Execution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "monitor-evaluation", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution.refresh()\n", + "\n", + "print(f\"Current status: {execution.status}\")\n", + "\n", + "# Display individual step statuses\n", + "if execution.status.step_details:\n", + " print(\"\\nStep Details:\")\n", + " for step in execution.status.step_details:\n", + " print(f\" {step.name}: {step.status}\")" + ] + }, + { + "cell_type": "markdown", + "id": "2ebac85a-adee-4f18-935d-478037c7a1f3", + "metadata": {}, + "source": [ + "## Step 4: Wait for Completion\n", + "\n", + "Wait for the pipeline to complete. This provides rich progress updates in Jupyter notebooks:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74b51cca-2024-4276-b05d-48f52e527c06", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution.wait(target_status=\"Succeeded\", poll=5, timeout=3600)\n", + "\n", + "print(f\"\\nFinal Status: {execution.status.overall_status}\")" + ] + }, + { + "cell_type": "markdown", + "id": "0d153370-213a-41d0-8a95-f4ffccf8f9aa", + "metadata": {}, + "source": [ + "## Step 5: View Results\n", + "\n", + "Display the evaluation results in a formatted table:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f470824-7740-48bb-9282-a7b9d0407fff", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution.show_results()" + ] + }, + { + "cell_type": "markdown", + "id": "92bda96d-5be7-408f-9b47-ae46772ac03e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Part 3. Deploying the Model to Bedrock for inference\n", + "\n", + "Trained model artifacts and checkpoints are stored in your designated escrow S3 bucket. You can access the training checkpoint location from the `describe_training_job` response.\n", + "\n", + "By calling `create_custom_model` API, you can create your custom model referencing the model artifacts stored in your S3 escrow bucket." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "784304f4-eb4f-48c8-b572-e5a18c5a9929", + "metadata": {}, + "outputs": [], + "source": [ + "import boto3\n", + "import json\n", + "from urllib.parse import urlparse\n", + "\n", + "bedrock_custom_model_name = \"\" # customize as needed\n", + "\n", + "describe_training_response = sm_client.describe_training_job(TrainingJobName=TRAINING_JOB_NAME)\n", + "\n", + "training_output_s3_uri = describe_training_response['OutputDataConfig']['S3OutputPath']\n", + "\n", + "def get_s3_manifest(training_output_s3_uri):\n", + " try:\n", + " s3_client = boto3.client('s3')\n", + " parsed_uri = urlparse(training_output_s3_uri)\n", + " bucket = parsed_uri.netloc\n", + " key = parsed_uri.path.lstrip('/')\n", + " manifest_key = f\"{key.rstrip('/')}/{TRAINING_JOB_NAME}/output/output/manifest.json\"\n", + "\n", + " print(f\"Fetching manifest from s3://{bucket}/{manifest_key}\")\n", + " response = s3_client.get_object(Bucket=bucket, Key=manifest_key)\n", + "\n", + " manifest_content = response['Body'].read().decode('utf-8')\n", + " manifest = json.loads(manifest_content)\n", + " if 'checkpoint_s3_bucket' not in manifest:\n", + " raise ValueError(\"Checkpoint location not found in manifest\")\n", + " print(f\"Successfully retrieved checkpoint S3 URI: {manifest['checkpoint_s3_bucket']}\")\n", + " return manifest['checkpoint_s3_bucket']\n", + " except s3_client.exceptions.NoSuchKey:\n", + " raise FileNotFoundError(f\"Manifest file not found at s3://{bucket}/{manifest_key}\")\n", + " except json.JSONDecodeError as e:\n", + " raise ValueError(f\"Failed to parse manifest JSON: {str(e)}\")\n", + " except Exception as e:\n", + " raise Exception(f\"Error fetching manifest: {str(e)}\")\n", + "\n", + "s3_checkpoint_path = get_s3_manifest(training_output_s3_uri)\n", + "\n", + "\n", + "bedrock_client = boto3.Session().client(service_name=\"bedrock\", region_name=REGION)\n", + "\n", + "\n", + "s3_checkpoint_path = describe_training_response[\"CheckpointConfig\"][\"S3Uri\"]\n", + "\n", + "try:\n", + " response = bedrock_client.create_custom_model(\n", + " modelName=bedrock_custom_model_name,\n", + " modelSourceConfig={\"s3DataSource\": {\"s3Uri\": s3_checkpoint_path}},\n", + " roleArn=ROLE_ARN,\n", + " # Optionally, add modelTags here\n", + " )\n", + " print(\"Custom model ARN:\", response[\"modelArn\"])\n", + "except Exception as e:\n", + " print(f\"An unexpected error occurred: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "f0dc4211-788c-4e5d-844a-58176ac69cea", + "metadata": {}, + "source": [ + "To monitor the job, use the `get_custom_model` operation to retrieve the job status. Please allow some time for the job to complete as this can take upto 20 minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3795cd13-57fd-44f7-b2e2-9f51f2df74c4", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "while True:\n", + " custom_model_response = bedrock_client.get_custom_model(modelIdentifier=bedrock_custom_model_name)\n", + " model_status = custom_model_response[\"modelStatus\"]\n", + " print(f\"Custom model status: {model_status}\")\n", + " if model_status == \"Active\":\n", + " break\n", + " elif model_status in [\"Failed\"]:\n", + " raise Exception(f\"Custom model creation failed with status: {model_status}\")\n", + " time.sleep(30)\n", + "print(\"Custom model is ACTIVE.\")\n", + "custom_model_response" + ] + }, + { + "cell_type": "markdown", + "id": "c4ecb46f-26ac-463e-b644-c8eb65173ac2", + "metadata": {}, + "source": [ + "After you create a custom model, you can set up inference using one of the following options:\n", + "1. **Purchase Provisioned Throughput** – Purchase Provisioned Throughput for your model to set up dedicated compute capacity with guaranteed throughput for consistent performance and lower latency.\n", + "For more information about Provisioned Throughput, see [Increase model invocation capacity with Provisioned Throughput in Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/prov-throughput.html). For more information about using custom models with Provisioned Throughput, [see Purchase Provisioned Throughput for a custom model](https://docs.aws.amazon.com/bedrock/latest/userguide/custom-model-use-pt.html).\n", + "2. **Deploy custom model for on-demand inference (only LoRA fine-tuned Amazon Nova models)** – To set up on-demand inference, you deploy the custom model with a custom model deployment. After you deploy the model, you invoke it using the ARN for the custom model deployment. With on-demand inference, you only pay for what you use and you don't need to set up provisioned compute resources.\n", + "For more information about deploying custom models for on-demand inference, see [Deploy a custom model for on-demand inference](https://docs.aws.amazon.com/bedrock/latest/userguide/deploy-custom-model-on-demand.html)." + ] + }, + { + "cell_type": "markdown", + "id": "f7b52193-3624-4485-84c7-86b7f5d0e7fb", + "metadata": {}, + "source": [ + "#### Deploy custom model for inference by using Provisioned Throughput" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f0064c4-7302-47de-a125-a0f07d4e13ad", + "metadata": {}, + "outputs": [], + "source": [ + "provisioned_model_name = \"test-provisioned-model\"\n", + "custom_model_id = custom_model_response[\"modelArn\"]\n", + "\n", + "try:\n", + " response = bedrock_client.create_provisioned_model_throughput(\n", + " modelId=custom_model_id, provisionedModelName=provisioned_model_name, modelUnits=1\n", + " )\n", + " provisioned_model_arn = response[\"provisionedModelArn\"]\n", + " print(\"Provisioned model ARN:\", provisioned_model_arn)\n", + "except Exception as e:\n", + " print(f\"An unexpected error occurred: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "ef1671ec-81d7-4fc7-89a5-7ee5a2e8b67b", + "metadata": {}, + "source": [ + "Wait for provisioned model to become ACTIVE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "efef8325-fade-49ef-a2bd-ec0215a56f25", + "metadata": {}, + "outputs": [], + "source": [ + "while True:\n", + " response = bedrock_client.get_provisioned_model_throughput(\n", + " provisionedModelId=provisioned_model_arn\n", + " )\n", + " model_status = response[\"status\"]\n", + " print(f\"Provisioned model status: {model_status}\")\n", + " if model_status == \"InService\":\n", + " break\n", + " elif model_status in [\"Failed\"]:\n", + " raise Exception(f\"Provisioned model failed with status: {model_status}\")\n", + " time.sleep(30)\n", + "print(\"Provisioned model is in service.\")\n", + "response" + ] + }, + { + "cell_type": "markdown", + "id": "68a1443e-ef01-4ee1-9f8d-10e9ec3a55a3", + "metadata": {}, + "source": [ + "Finally, you can invoke the model like any other Bedrock-hosted model using the invoke-model API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80f8ecee-9a5f-46a5-97dc-72a2e6a7c693", + "metadata": {}, + "outputs": [], + "source": [ + "# Invoke model (Inference)\n", + "bedrock_runtime = boto3.client(\"bedrock-runtime\", region_name=REGION)\n", + "\n", + "request_body = {\n", + " \"inferenceConfig\": {\"max_new_tokens\": 1000, \"temperature\": 0.7, \"top_p\": 0.9},\n", + " \"messages\": [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"text\": \"Tell me about Amazon Bedrock in less than 100 words.\"}\n", + " ],\n", + " }\n", + " ],\n", + "}\n", + "\n", + "response = bedrock_runtime.invoke_model(\n", + " modelId=provisioned_model_arn,\n", + " body=json.dumps(request_body),\n", + " contentType=\"application/json\",\n", + " accept=\"application/json\",\n", + ")\n", + "\n", + "response_body = json.loads(response[\"body\"].read())\n", + "print(response_body[\"output\"][\"message\"][\"content\"][0][\"text\"])" + ] + }, + { + "cell_type": "markdown", + "id": "b51edee9-6663-4863-a5f2-c72e9cfe7e9e", + "metadata": {}, + "source": [ + "#### Deploy custom model for On-Demand Inference\n", + "**Important Note:** On-demand inference is currently supported only for LoRA-based fine-tuned models.\n", + "\n", + "Once the custom model has reached Active Status, deploy it for on-demand inference by creating custom model deployment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d8beda2-c96d-40e5-ac86-ff2a58eadd40", + "metadata": {}, + "outputs": [], + "source": [ + "model_deployment_name = \"\"\n", + "custom_model_arn=custom_model_response[\"modelArn\"]\n", + "try:\n", + " response = bedrock_client.create_custom_model_deployment(\n", + " modelDeploymentName=model_deployment_name,\n", + " modelArn=custom_model_arn,\n", + " description=\"\",\n", + " tags=[\n", + " {\n", + " \"key\":\"\",\n", + " \"value\":\"\"\n", + " }\n", + " ]\n", + " )\n", + " custom_model_deployment_arn = response[\"customModelDeploymentArn\"]\n", + " print(\"Custom model deployment ARN:\", custom_model_deployment_arn)\n", + "except Exception as e:\n", + " print(f\"An unexpected error occurred: {e}\")\n", + "response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "863102f7-4e5d-4f64-945d-df5be269b307", + "metadata": {}, + "outputs": [], + "source": [ + "while True:\n", + " response = bedrock_client.get_custom_model_deployment(customModelDeploymentIdentifier=custom_model_deployment_arn)\n", + " model_status = response[\"status\"]\n", + " print(f\"Custom model deployment status: {model_status}\")\n", + " if model_status == \"Active\":\n", + " break\n", + " elif model_status in [\"Failed\"]:\n", + " raise Exception(f\"Custom model deployment failed with status: {model_status}\")\n", + " time.sleep(30)\n", + "print(\"Custom model is ACTIVE.\")\n", + "response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79b99c7a-aa7f-409a-a988-9ed618e856e9", + "metadata": {}, + "outputs": [], + "source": [ + "bedrock_runtime = boto3.client(\"bedrock-runtime\", region_name=REGION)\n", + "\n", + "# invoke a deployed custom model using Converse API\n", + "response = bedrock_runtime.converse(\n", + " modelId=custom_model_deployment_arn,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\n", + " \"text\": \"Tell me about Amazon Bedrock in less than 100 words.\",\n", + " }\n", + " ]\n", + " }\n", + " ]\n", + " )\n", + "result = response.get('output')\n", + "print(result)\n", + "\n", + "# invoke a deployed custom model using InvokeModel API\n", + "request_body = {\n", + " \"schemaVersion\": \"messages-v1\",\n", + " \"messages\": [{\"role\": \"user\", \n", + " \"content\": [{\"text\": \"Tell me about Amazon Bedrock in less than 100 words.\"}]}],\n", + " \"system\": [{\"text\": \"What is amazon bedrock?\"}],\n", + " \"inferenceConfig\": {\"maxTokens\": 500, \n", + " \"topP\": 0.9, \n", + " \"temperature\": 0.0\n", + " }\n", + "}\n", + "body = json.dumps(request_body)\n", + "response = bedrock_runtime.invoke_model(\n", + " modelId=custom_model_deployment_arn,\n", + " body=body\n", + " )\n", + "\n", + "# Extract and print the response text\n", + "model_response = json.loads(response[\"body\"].read())\n", + "response_text = model_response[\"output\"][\"message\"][\"content\"][0][\"text\"]\n", + "print(response_text)" + ] + }, + { + "cell_type": "markdown", + "id": "1b80972e-7f59-4357-9b23-74c1d3877342", + "metadata": {}, + "source": [ + "### Cleanup\n", + "Delete the resources that were created to stop incurring charges." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f83840f7-1279-4192-a13f-a05bef8fb3e4", + "metadata": {}, + "outputs": [], + "source": [ + "# Delete provisioned model throughput\n", + "print(f\"Deleting provisioned model throughput: {provisioned_model_arn}\")\n", + "try:\n", + " bedrock_client.delete_provisioned_model_throughput(\n", + " provisionedModelId=provisioned_model_name\n", + " )\n", + " print(\"Provisioned model throughput deleted successfully.\")\n", + "except Exception as e:\n", + " print(f\"Error deleting provisioned throughput: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41188290-dc41-4231-95f7-d371aa77fb1c", + "metadata": {}, + "outputs": [], + "source": [ + "# Delete custom model deployment if you have used on-demand inference.\n", + "print(f\"Deleting custom model deployment: {custom_model_deployment_arn}\")\n", + "try:\n", + " bedrock_client.delete_custom_model_deployment(\n", + " customModelDeploymentIdentifier=custom_model_deployment_arn\n", + " )\n", + " print(\"Custom model deployment deleted successfully.\")\n", + "except Exception as e:\n", + " print(f\"Error deleting custom model deployment: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff1805d7-14b6-4d6b-a331-1924fbae346b", + "metadata": {}, + "outputs": [], + "source": [ + "# Delete custom model\n", + "print(f\"Deleting custom model: {custom_model_id}\")\n", + "try:\n", + " bedrock_client.delete_custom_model(modelIdentifier=custom_model_id)\n", + " print(\"Custom model deleted successfully.\")\n", + "except Exception as e:\n", + " print(f\"Error deleting custom model: {e}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}