From 6a661c09b9f052a0d060563a9bd57bfa3ee78791 Mon Sep 17 00:00:00 2001 From: adishaa Date: Wed, 31 Dec 2025 12:51:08 -0800 Subject: [PATCH 1/3] feat: Add comprehensive V3 documentation with enhanced UI - Add complete documentation structure for SageMaker Python SDK V3 - Implement eye-catching card-based UI for 'What's New' section - Position model customization as flagship V3 feature with prominent highlighting - Add comprehensive content for training, inference, ML ops, and core modules - Include collapsed navigation with model customization emphasis - Set up ReadTheDocs configuration and Sphinx documentation system - Add installation guide, quickstart, and overview pages - Link to staging repository examples and notebooks --- .gitignore | 3 + .readthedocs.yaml | 20 +++ docs/Makefile | 12 ++ docs/_static/custom.css | 150 ++++++++++++++++++++ docs/api/index.rst | 4 + docs/conf.py | 59 ++++++++ docs/index.rst | 31 +++++ docs/inference/index.rst | 186 +++++++++++++++++++++++++ docs/installation.rst | 170 +++++++++++++++++++++++ docs/ml_ops/index.rst | 215 +++++++++++++++++++++++++++++ docs/model_customization/index.rst | 211 ++++++++++++++++++++++++++++ docs/overview.rst | 186 +++++++++++++++++++++++++ docs/quickstart.rst | 163 ++++++++++++++++++++++ docs/requirements.txt | 9 ++ docs/sagemaker-core | 1 + docs/sagemaker_core/index.rst | 200 +++++++++++++++++++++++++++ docs/training/index.rst | 184 ++++++++++++++++++++++++ docs/v3-examples | 1 + 18 files changed, 1805 insertions(+) create mode 100644 .readthedocs.yaml create mode 100644 docs/Makefile create mode 100644 docs/_static/custom.css create mode 100644 docs/api/index.rst create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/inference/index.rst create mode 100644 docs/installation.rst create mode 100644 docs/ml_ops/index.rst create mode 100644 docs/model_customization/index.rst create mode 100644 docs/overview.rst create mode 100644 docs/quickstart.rst create mode 100644 docs/requirements.txt create mode 120000 docs/sagemaker-core create mode 100644 docs/sagemaker_core/index.rst create mode 100644 docs/training/index.rst create mode 120000 docs/v3-examples diff --git a/.gitignore b/.gitignore index 09935a1dc9..1ac75202b2 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,9 @@ scratch/ *.egg examples/tensorflow/distributed_mnist/data *.iml + +# Sphinx documentation +docs/_build/ doc/_build doc/_static doc/_templates diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000000..efe6929714 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,20 @@ +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.10" + +python: + install: + - method: pip + path: . + - requirements: docs/requirements.txt + +sphinx: + configuration: docs/conf.py + fail_on_warning: false + +formats: + - pdf + - epub diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000000..fe8e88c6e1 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,12 @@ +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/custom.css b/docs/_static/custom.css new file mode 100644 index 0000000000..917774591c --- /dev/null +++ b/docs/_static/custom.css @@ -0,0 +1,150 @@ +/* What's New Section Styling */ +.whats-new-container { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); + gap: 20px; + margin: 30px 0; +} + +.new-feature-card { + background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); + border: 1px solid #dee2e6; + border-radius: 12px; + padding: 25px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1); + transition: transform 0.2s ease, box-shadow 0.2s ease; + position: relative; + overflow: hidden; +} + +.new-feature-card:hover { + transform: translateY(-2px); + box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15); +} + +.new-feature-card.exclusive { + background: linear-gradient(135deg, #ff6b6b 0%, #4ecdc4 100%); + color: white; + border: 2px solid #ff4757; +} + +.new-feature-card.exclusive::before { + content: "V3 EXCLUSIVE"; + position: absolute; + top: 10px; + right: -30px; + background: #ff4757; + color: white; + padding: 5px 40px; + font-size: 0.75em; + font-weight: bold; + transform: rotate(45deg); + box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2); +} + +.feature-icon { + font-size: 2.5em; + margin-bottom: 15px; + display: block; +} + +.new-feature-card h3 { + margin-top: 0; + margin-bottom: 15px; + font-size: 1.3em; + font-weight: 600; +} + +.new-feature-card.exclusive h3 { + color: white; +} + +.new-feature-card ul { + margin: 15px 0; + padding-left: 20px; +} + +.new-feature-card li { + margin-bottom: 8px; + line-height: 1.5; +} + +.new-feature-card code { + background: rgba(0, 0, 0, 0.1); + padding: 2px 6px; + border-radius: 4px; + font-size: 0.9em; +} + +.new-feature-card.exclusive code { + background: rgba(255, 255, 255, 0.2); + color: white; +} + +/* Collapse navigation tree items by default */ +.bd-toc .toctree-l1 > ul { + display: none; +} + +.bd-toc .toctree-l1.current > ul { + display: block; +} + +.bd-toc .toctree-l2 > ul { + display: none; +} + +.bd-toc .toctree-l2.current > ul { + display: block; +} + +/* Highlight model customization in navigation */ +.bd-toc a[href*="model_customization"], +.toctree-l1 a[href*="model_customization"] { + background: linear-gradient(90deg, rgba(255, 107, 107, 0.15), rgba(78, 205, 196, 0.15)); + border-left: 4px solid #ff6b6b; + padding-left: 12px; + font-weight: 600; + border-radius: 4px; + margin: 2px 0; + display: block; +} + +.bd-toc a[href*="model_customization"]:before, +.toctree-l1 a[href*="model_customization"]:before { + content: "⭐ "; + color: #ff4757; + font-size: 0.9em; +} + +header { + background-color: white; + + box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05); + position: sticky; + top: 0; + z-index: 50; +} + +h1 { + font-size: 1.875rem; + font-weight: 700; + color: #111827; +} + +h2 { + font-size: 1.5rem; + font-weight: 700; + color: #111827; +} + +h3 { + font-size: 1.25rem; + font-weight: 500; + color: #111827; +} + +p { + font-size: 1.0rem; + color: #4b5563; +} diff --git a/docs/api/index.rst b/docs/api/index.rst new file mode 100644 index 0000000000..4923bba097 --- /dev/null +++ b/docs/api/index.rst @@ -0,0 +1,4 @@ +API Reference +============= + +Complete API documentation for SageMaker Python SDK V3. diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000000..f97b18135e --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,59 @@ +import os +import sys +from datetime import datetime + +sys.path.insert(0, os.path.abspath('.')) + +project = 'SageMaker Python SDK V3' +copyright = f'{datetime.now().year}, Amazon Web Services' +author = 'Amazon Web Services' + +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.napoleon', + 'sphinx.ext.viewcode', + 'sphinx.ext.intersphinx', + 'myst_nb', + 'sphinx_book_theme', + 'sphinx_design', +] + +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'sagemaker-core/docs/*', 'sagemaker-core/CHANGELOG.md', 'sagemaker-core/CONTRIBUTING.md'] + +html_theme = 'sphinx_book_theme' +html_theme_options = { + 'repository_url': 'https://github.com/aws/sagemaker-python-sdk', + 'use_repository_button': True, + 'use_issues_button': True, + 'use_edit_page_button': True, + 'path_to_docs': 'docs/', + 'show_navbar_depth': 2, + 'show_toc_level': 2, + 'announcement': 'This is V3 documentation. View V2 docs', +} + +html_theme_options = { + "collapse_navigation": True, +} + +html_static_path = ['_static'] +html_css_files = ['custom.css'] + +html_context = { + 'display_github': True, + 'github_user': 'aws', + 'github_repo': 'sagemaker-python-sdk', + 'github_version': 'master', + 'conf_py_path': '/docs/', + 'version_warning': True, + 'version_warning_text': 'This is the V3 documentation. For V2 documentation, visit the legacy docs.', +} + +nb_execution_mode = 'off' +nb_execution_allow_errors = True + +# Suppress autodoc warnings for missing modules +autodoc_mock_imports = ['sagemaker'] +suppress_warnings = ['autodoc.import_error'] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000000..94aba61ba2 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,31 @@ +SageMaker Python SDK V3 +========================= + +.. include:: overview.rst + +.. toctree:: + :maxdepth: 2 + :caption: Getting Started + :hidden: + + overview + installation + quickstart + +.. toctree:: + :maxdepth: 2 + :caption: V3 Capabilities + :hidden: + + model_customization/index + training/index + inference/index + ml_ops/index + sagemaker_core/index + +.. toctree:: + :maxdepth: 2 + :caption: API Reference + :hidden: + + api/index diff --git a/docs/inference/index.rst b/docs/inference/index.rst new file mode 100644 index 0000000000..a5b1455334 --- /dev/null +++ b/docs/inference/index.rst @@ -0,0 +1,186 @@ +Inference +========= + +SageMaker Python SDK V3 transforms model deployment and inference with the unified **ModelBuilder** class, replacing the complex framework-specific model classes from V2. This modern approach provides a consistent interface for all inference scenarios while maintaining the flexibility and performance you need. + +Key Benefits of V3 Inference +---------------------------- + +* **Unified Interface**: Single ``ModelBuilder`` class replaces multiple framework-specific model classes +* **Simplified Deployment**: Object-oriented API with intelligent defaults for endpoint configuration +* **Enhanced Performance**: Optimized inference pipelines with automatic scaling and load balancing +* **Multi-Modal Support**: Deploy models for real-time, batch, and serverless inference scenarios + +Quick Start Example +------------------- + +Here's how inference has evolved from V2 to V3: + +**SageMaker Python SDK V2:** + +.. code-block:: python + + from sagemaker.model import Model + from sagemaker.predictor import Predictor + + model = Model( + image_uri="my-inference-image", + model_data="s3://my-bucket/model.tar.gz", + role="arn:aws:iam::123456789012:role/SageMakerRole" + ) + predictor = model.deploy( + initial_instance_count=1, + instance_type="ml.m5.xlarge" + ) + result = predictor.predict(data) + +**SageMaker Python SDK V3:** + +.. code-block:: python + + from sagemaker.serve import ModelBuilder + + model_builder = ModelBuilder( + model="my-model", + model_path="s3://my-bucket/model.tar.gz" + ) + endpoint = model_builder.build() + result = endpoint.invoke(data) + +ModelBuilder Overview +-------------------- + +The ``ModelBuilder`` class is the cornerstone of SageMaker Python SDK V3 inference, providing a unified interface for all deployment scenarios. This single class replaces the complex web of framework-specific model classes from V2, offering: + +**Unified Deployment Interface** + One class handles PyTorch, TensorFlow, Scikit-learn, XGBoost, HuggingFace, and custom containers + +**Intelligent Optimization** + Automatically optimizes model serving configuration based on your model characteristics + +**Flexible Deployment Options** + Support for real-time endpoints, batch transform, and serverless inference + +**Seamless Integration** + Works seamlessly with SageMaker features like auto-scaling, multi-model endpoints, and A/B testing + +.. code-block:: python + + from sagemaker.serve import ModelBuilder + from sagemaker.serve.configs import EndpointConfig + + # Create model builder with intelligent defaults + model_builder = ModelBuilder( + model="your-model", + model_path="s3://your-bucket/model-artifacts", + role="your-sagemaker-role" + ) + + # Configure endpoint settings + endpoint_config = EndpointConfig( + instance_type="ml.m5.xlarge", + initial_instance_count=1, + auto_scaling_enabled=True + ) + + # Deploy model + endpoint = model_builder.build(endpoint_config=endpoint_config) + + # Make predictions + response = endpoint.invoke({"inputs": "your-input-data"}) + +Inference Capabilities +---------------------- + +Model Optimization Support +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +V3 introduces powerful model optimization capabilities for enhanced performance: + +* **SageMaker Neo** - Optimize models for specific hardware targets +* **TensorRT Integration** - Accelerate deep learning inference on NVIDIA GPUs +* **ONNX Runtime** - Cross-platform model optimization and acceleration +* **Quantization Support** - Reduce model size and improve inference speed + +**Quick Optimization Example:** + +.. code-block:: python + + from sagemaker.serve import ModelBuilder + from sagemaker.serve.configs import OptimizationConfig + + model_builder = ModelBuilder( + model="huggingface-bert-base", + optimization_config=OptimizationConfig( + target_device="ml_inf1", + optimization_level="O2", + quantization_enabled=True + ) + ) + + optimized_endpoint = model_builder.build() + +Key Inference Features +~~~~~~~~~~~~~~~~~~~~~ + +* **Multi-Model Endpoints** - Host multiple models on a single endpoint with automatic model loading and unloading for cost optimization +* **Auto-Scaling Integration** - Automatically scale endpoint capacity based on traffic patterns with configurable scaling policies +* **A/B Testing Support** - Deploy multiple model variants with traffic splitting for safe model updates and performance comparison +* **Batch Transform Jobs** - Process large datasets efficiently with automatic data partitioning and parallel processing +* **Serverless Inference** - Pay-per-request pricing with automatic scaling from zero to handle variable workloads + +Supported Inference Scenarios +----------------------------- + +Deployment Types +~~~~~~~~~~~~~~~ + +* **Real-Time Endpoints** - Low-latency inference for interactive applications +* **Batch Transform** - High-throughput processing for large datasets +* **Serverless Inference** - Cost-effective inference for variable workloads +* **Multi-Model Endpoints** - Host multiple models on shared infrastructure + +Framework Support +~~~~~~~~~~~~~~~~~ + +* **PyTorch** - Deep learning models with dynamic computation graphs +* **TensorFlow** - Production-ready machine learning models at scale +* **Scikit-learn** - Classical machine learning algorithms +* **XGBoost** - Gradient boosting models for structured data +* **HuggingFace** - Pre-trained transformer models for NLP tasks +* **Custom Containers** - Bring your own inference logic and dependencies + +Advanced Features +~~~~~~~~~~~~~~~~ + +* **Model Monitoring** - Track model performance and data drift in production +* **Endpoint Security** - VPC support, encryption, and IAM-based access control +* **Multi-AZ Deployment** - High availability with automatic failover +* **Custom Inference Logic** - Implement preprocessing, postprocessing, and custom prediction logic + +Migration from V2 +------------------ + +If you're migrating from V2, the key changes are: + +* Replace framework-specific model classes (PyTorchModel, TensorFlowModel, etc.) with ``ModelBuilder`` +* Use structured configuration objects instead of parameter dictionaries +* Leverage the new ``invoke()`` method instead of ``predict()`` for more consistent API +* Take advantage of built-in optimization and auto-scaling features + +Inference Examples +----------------- + +Explore comprehensive inference examples that demonstrate V3 capabilities: + +.. toctree:: + :maxdepth: 1 + + ../v3-examples/inference-examples/inference-spec-example + ../v3-examples/inference-examples/jumpstart-example + ../v3-examples/inference-examples/optimize-example + ../v3-examples/inference-examples/train-inference-e2e-example + ../v3-examples/inference-examples/jumpstart-e2e-training-example + ../v3-examples/inference-examples/local-mode-example + ../v3-examples/inference-examples/huggingface-example + ../v3-examples/inference-examples/in-process-mode-example diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 0000000000..f4266e1c10 --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,170 @@ +Installation +============ + +This guide covers how to install SageMaker Python SDK V3 and set up your development environment. + +Quick Installation +------------------ + +Install the latest version of SageMaker Python SDK V3: + +.. code-block:: bash + + pip install sagemaker>=3.0.0 + +Prerequisites +------------- + +**Python Version** + SageMaker Python SDK V3 supports Python 3.9, 3.10, 3.11, and 3.12 + +**Operating Systems** + - Linux (recommended for production) + - macOS (development and testing) + +**AWS Credentials** + Configure AWS credentials using one of these methods: + + - AWS CLI: ``aws configure`` + - Environment variables: ``AWS_ACCESS_KEY_ID`` and ``AWS_SECRET_ACCESS_KEY`` + - IAM roles + +Installation Methods +-------------------- + +Standard Installation +~~~~~~~~~~~~~~~~~~~~~ + +Install the complete SageMaker Python SDK V3: + +.. code-block:: bash + + pip install sagemaker>=3.0.0 + +Modular Installation +~~~~~~~~~~~~~~~~~~~ + +Install specific components based on your needs: + +.. code-block:: bash + + # Core functionality only + pip install sagemaker-core>=3.0.0 + + # Training capabilities + pip install sagemaker-train>=3.0.0 + + # Inference capabilities + pip install sagemaker-serve>=3.0.0 + + # ML Operations + pip install sagemaker-mlops>=3.0.0 + +Virtual Environment (Recommended) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create an isolated environment for your SageMaker projects: + +.. code-block:: bash + + # Using venv + python -m venv sagemaker-v3-env + source sagemaker-v3-env/bin/activate + pip install sagemaker>=3.0.0 + + # Using conda + conda create -n sagemaker-v3 python=3.10 + conda activate sagemaker-v3 + pip install sagemaker>=3.0.0 + +Development Installation +~~~~~~~~~~~~~~~~~~~~~~~ + +Install from source for development or to get the latest features: + +.. code-block:: bash + + git clone https://github.com/aws/sagemaker-python-sdk.git + cd sagemaker-python-sdk + pip install -e . + +Optional Dependencies +-------------------- + +Install additional dependencies for specific use cases: + +.. code-block:: bash + + # For local mode training and inference + pip install sagemaker[local] + + # For all optional dependencies + pip install sagemaker[all] + + # For specific frameworks + pip install sagemaker[tensorflow] + pip install sagemaker[pytorch] + +Verification +----------- + +Verify your installation: + +.. code-block:: python + + import sagemaker + print(f"SageMaker SDK version: {sagemaker.__version__}") + + # Check if you can create a session + import sagemaker + session = sagemaker.Session() + print(f"Default bucket: {session.default_bucket()}") + print(f"Region: {session.boto_region_name}") + +Troubleshooting +-------------- + +**Common Issues:** + +*ImportError: No module named 'sagemaker'* + - Ensure you're using the correct Python environment + - Verify installation with ``pip list | grep sagemaker`` + +*Permission denied errors* + - Use ``pip install --user sagemaker>=3.0.0`` for user-level installation + - Or use a virtual environment + +*AWS credential errors* + - Configure AWS credentials: ``aws configure`` + - Verify with ``aws sts get-caller-identity`` + +*Version conflicts* + - Uninstall old versions: ``pip uninstall sagemaker`` + - Install fresh: ``pip install sagemaker>=3.0.0`` + +Upgrading from V2 +----------------- + +If you have SageMaker Python SDK V2 installed: + +.. code-block:: bash + + # Upgrade to V3 + pip install --upgrade sagemaker>=3.0.0 + + # Or install V3 in a new environment (recommended) + python -m venv sagemaker-v3-env + source sagemaker-v3-env/bin/activate + pip install sagemaker>=3.0.0 + +**Note:** V3 introduces breaking changes. See the :doc:`overview` page for migration guidance. + +Next Steps +---------- + +After installation: + +1. **Configure AWS credentials** if you haven't already +2. **Read the** :doc:`overview` **to understand V3 changes** +3. **Try the** :doc:`quickstart` **guide** +4. **Explore** :doc:`training/index`, :doc:`inference/index`, and other capabilities diff --git a/docs/ml_ops/index.rst b/docs/ml_ops/index.rst new file mode 100644 index 0000000000..58ec726993 --- /dev/null +++ b/docs/ml_ops/index.rst @@ -0,0 +1,215 @@ +ML Operations +============= + +SageMaker Python SDK V3 provides comprehensive MLOps capabilities for building, deploying, and managing machine learning workflows at scale. This includes advanced pipeline orchestration, model monitoring, data quality checks, and automated deployment strategies for production ML systems. + +Key Benefits of V3 ML Operations +-------------------------------- + +* **Unified Pipeline Interface**: Streamlined workflow orchestration with intelligent step dependencies +* **Advanced Monitoring**: Built-in model quality, data drift, and bias detection capabilities +* **Automated Governance**: Model registry integration with approval workflows and lineage tracking +* **Production-Ready**: Enterprise-grade features for compliance, security, and scalability + +Quick Start Example +------------------- + +Here's how ML Operations workflows are simplified in V3: + +**Traditional Pipeline Approach:** + +.. code-block:: python + + from sagemaker.workflow.pipeline import Pipeline + from sagemaker.workflow.steps import TrainingStep, ProcessingStep + from sagemaker.sklearn.processing import SKLearnProcessor + + # Complex setup with multiple framework-specific classes + processor = SKLearnProcessor( + framework_version="0.23-1", + role=role, + instance_type="ml.m5.xlarge", + instance_count=1 + ) + + processing_step = ProcessingStep( + name="PreprocessData", + processor=processor, + # ... many configuration parameters + ) + +**SageMaker V3 MLOps Approach:** + +.. code-block:: python + + from sagemaker.mlops import Pipeline, ProcessingStep + from sagemaker.mlops.configs import ProcessingConfig + + # Simplified configuration with intelligent defaults + pipeline = Pipeline(name="ml-workflow") + + processing_step = ProcessingStep( + name="preprocess-data", + processing_config=ProcessingConfig( + image_uri="sklearn-processing-image", + instance_type="ml.m5.xlarge" + ), + inputs={"raw_data": "s3://bucket/raw-data"}, + outputs={"processed_data": "s3://bucket/processed-data"} + ) + + pipeline.add_step(processing_step) + +MLOps Pipeline Overview +---------------------- + +SageMaker V3 MLOps provides a unified interface for building and managing end-to-end machine learning workflows: + +**Pipeline Orchestration** + Intelligent step dependencies with automatic resource management and error handling + +**Model Registry Integration** + Seamless model versioning, approval workflows, and deployment automation + +**Quality Monitoring** + Built-in data quality, model performance, and bias detection capabilities + +**Governance and Compliance** + Comprehensive lineage tracking, audit trails, and approval mechanisms + +.. code-block:: python + + from sagemaker.mlops import Pipeline, TrainingStep, ModelStep, EndpointStep + from sagemaker.mlops.configs import ModelConfig, EndpointConfig + + # Create comprehensive ML pipeline + pipeline = Pipeline(name="production-ml-pipeline") + + # Training step + training_step = TrainingStep( + name="train-model", + training_config=TrainingConfig( + algorithm_specification={ + "training_image": "your-training-image" + } + ) + ) + + # Model registration step + model_step = ModelStep( + name="register-model", + model_config=ModelConfig( + model_package_group_name="production-models", + approval_status="PendingManualApproval" + ), + depends_on=[training_step] + ) + + # Deployment step + endpoint_step = EndpointStep( + name="deploy-model", + endpoint_config=EndpointConfig( + instance_type="ml.m5.xlarge", + initial_instance_count=1 + ), + depends_on=[model_step] + ) + + pipeline.add_steps([training_step, model_step, endpoint_step]) + +MLOps Capabilities +------------------ + +Advanced Pipeline Features +~~~~~~~~~~~~~~~~~~~~~~~~~ + +V3 introduces powerful pipeline capabilities for production ML workflows: + +* **Conditional Execution** - Dynamic pipeline paths based on data quality checks and model performance +* **Parallel Processing** - Automatic parallelization of independent pipeline steps for faster execution +* **Resource Optimization** - Intelligent resource allocation and cost optimization across pipeline steps +* **Failure Recovery** - Automatic retry mechanisms and checkpoint-based recovery for robust workflows + +**Advanced Pipeline Example:** + +.. code-block:: python + + from sagemaker.mlops import Pipeline, ConditionStep, ParallelStep + from sagemaker.mlops.conditions import ModelAccuracyCondition + + pipeline = Pipeline(name="advanced-ml-pipeline") + + # Conditional model deployment based on accuracy + accuracy_condition = ModelAccuracyCondition( + threshold=0.85, + metric_name="validation:accuracy" + ) + + condition_step = ConditionStep( + name="check-model-quality", + condition=accuracy_condition, + if_steps=[deploy_to_production_step], + else_steps=[retrain_model_step] + ) + + pipeline.add_step(condition_step) + +Key MLOps Features +~~~~~~~~~~~~~~~~~ + +* **Model Registry Integration** - Centralized model versioning with automated approval workflows and deployment tracking +* **Data Quality Monitoring** - Continuous monitoring of data drift, schema changes, and statistical anomalies in production +* **Model Performance Tracking** - Real-time monitoring of model accuracy, latency, and business metrics with alerting +* **Bias Detection and Fairness** - Built-in bias detection across protected attributes with automated reporting and remediation +* **Automated Retraining** - Trigger-based model retraining based on performance degradation or data drift detection + +Supported MLOps Scenarios +------------------------- + +Pipeline Types +~~~~~~~~~~~~~ + +* **Training Pipelines** - End-to-end model training with data preprocessing, feature engineering, and validation +* **Inference Pipelines** - Real-time and batch inference workflows with preprocessing and postprocessing +* **Data Processing Pipelines** - ETL workflows for feature engineering, data validation, and preparation +* **Model Deployment Pipelines** - Automated deployment with A/B testing, canary releases, and rollback capabilities + +Monitoring and Governance +~~~~~~~~~~~~~~~~~~~~~~~~~ + +* **Model Monitoring** - Continuous tracking of model performance, data quality, and operational metrics +* **Compliance Reporting** - Automated generation of audit reports for regulatory compliance and governance +* **Lineage Tracking** - Complete data and model lineage from raw data to production predictions +* **Access Control** - Fine-grained permissions and approval workflows for model deployment and updates + +Integration Patterns +~~~~~~~~~~~~~~~~~~~ + +* **CI/CD Integration** - Seamless integration with GitHub Actions, Jenkins, and other CI/CD platforms +* **Event-Driven Workflows** - Trigger pipelines based on data availability, model performance, or business events +* **Multi-Environment Deployment** - Automated promotion of models across development, staging, and production environments + +Migration from V2 +------------------ + +If you're migrating MLOps workflows from V2, the key improvements are: + +* **Simplified Pipeline Definition**: Unified interface replaces complex framework-specific configurations +* **Enhanced Monitoring**: Built-in model and data quality monitoring replaces custom solutions +* **Improved Governance**: Integrated model registry and approval workflows streamline compliance +* **Better Resource Management**: Automatic resource optimization and cost management across workflows + +ML Operations Examples +---------------------- + +Explore comprehensive MLOps examples that demonstrate V3 capabilities: + +.. toctree:: + :maxdepth: 1 + + ../v3-examples/ml-ops-examples/v3-sagemaker-clarify + ../v3-examples/ml-ops-examples/v3-pipeline-train-create-registry + ../v3-examples/ml-ops-examples/v3-transform-job-example + ../v3-examples/ml-ops-examples/v3-hyperparameter-tuning-example/v3-hyperparameter-tuning-example + ../v3-examples/ml-ops-examples/v3-model-registry-example/v3-model-registry-example + ../v3-examples/ml-ops-examples/v3-processing-job-pytorch/v3-pytorch-processing-example diff --git a/docs/model_customization/index.rst b/docs/model_customization/index.rst new file mode 100644 index 0000000000..9023a203f9 --- /dev/null +++ b/docs/model_customization/index.rst @@ -0,0 +1,211 @@ +Model Customization +=================== + +.. raw:: html + +
+ 🆕 V3 EXCLUSIVE FEATURE
+ Model customization with specialized trainers is only available in SageMaker Python SDK V3. + This powerful capability was built from the ground up for foundation model fine-tuning. +
+ +SageMaker Python SDK V3 revolutionizes foundation model fine-tuning with specialized trainer classes, making it easier than ever to customize large language models and foundation models for your specific use cases. This modern approach provides powerful fine-tuning capabilities while maintaining simplicity and performance. + +Key Benefits of V3 Model Customization +-------------------------------------- + +* **Specialized Trainers**: Purpose-built classes for different fine-tuning approaches (SFT, DPO, RLAIF, RLVR) +* **Foundation Model Focus**: Optimized for large language models and transformer architectures +* **Advanced Techniques**: Support for cutting-edge fine-tuning methods like RLHF and preference optimization +* **Production Ready**: Built-in evaluation, monitoring, and deployment capabilities + +Quick Start Example +------------------- + +Here's how model customization works in V3: + +**Supervised Fine-Tuning (SFT):** + +.. code-block:: python + + from sagemaker.train import SFTTrainer + from sagemaker.train.common import TrainingType + + # Create SFT trainer for foundation model fine-tuning + trainer = SFTTrainer( + model="meta-llama/Llama-2-7b-hf", + training_type=TrainingType.LORA, + model_package_group_name="my-custom-models", + training_dataset="s3://my-bucket/training-data.jsonl" + ) + + # Start fine-tuning + training_job = trainer.train() + +**Direct Preference Optimization (DPO):** + +.. code-block:: python + + from sagemaker.train import DPOTrainer + + # Create DPO trainer for preference-based fine-tuning + dpo_trainer = DPOTrainer( + model="my-base-model", + preference_dataset="s3://my-bucket/preference-data.jsonl", + training_type=TrainingType.LORA + ) + + # Train with human preferences + dpo_job = dpo_trainer.train() + +Fine-Tuning Trainers Overview +----------------------------- + +SageMaker Python SDK V3 provides four specialized trainer classes for different model customization approaches: + +**SFTTrainer (Supervised Fine-Tuning)** + Traditional fine-tuning with labeled datasets for task-specific adaptation + +**DPOTrainer (Direct Preference Optimization)** + Fine-tune models using human preference data without reinforcement learning complexity + +**RLAIFTrainer (Reinforcement Learning from AI Feedback)** + Use AI-generated feedback to improve model behavior and alignment + +**RLVRTrainer (Reinforcement Learning from Verifiable Rewards)** + Fine-tune with verifiable reward signals for objective optimization + +.. code-block:: python + + from sagemaker.train import SFTTrainer, DPOTrainer, RLAIFTrainer, RLVRTrainer + from sagemaker.train.common import TrainingType + from sagemaker.train.configs import LoRAConfig + + # Configure LoRA for parameter-efficient fine-tuning + lora_config = LoRAConfig( + rank=16, + alpha=32, + dropout=0.1, + target_modules=["q_proj", "v_proj"] + ) + + # Choose your fine-tuning approach + sft_trainer = SFTTrainer( + model="huggingface-model-id", + training_dataset="s3://bucket/sft-data.jsonl", + lora_config=lora_config + ) + + # Or use preference optimization + dpo_trainer = DPOTrainer( + model="base-model", + preference_dataset="s3://bucket/preferences.jsonl", + lora_config=lora_config + ) + +Model Customization Capabilities +-------------------------------- + +Advanced Fine-Tuning Techniques +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +V3 supports state-of-the-art fine-tuning methods for foundation models: + +* **LoRA (Low-Rank Adaptation)** - Parameter-efficient fine-tuning with minimal memory requirements +* **Full Fine-Tuning** - Complete model parameter updates for maximum customization +* **Preference Learning** - Train models using human feedback and preference data +* **Reinforcement Learning** - Advanced alignment techniques for improved model behavior + +**Parameter-Efficient Fine-Tuning Example:** + +.. code-block:: python + + from sagemaker.train import SFTTrainer + from sagemaker.train.configs import LoRAConfig, TrainingConfig + + # Configure LoRA for efficient fine-tuning + lora_config = LoRAConfig( + rank=8, + alpha=16, + target_modules=["q_proj", "k_proj", "v_proj", "o_proj"] + ) + + # Set up training configuration + training_config = TrainingConfig( + learning_rate=2e-4, + batch_size=4, + gradient_accumulation_steps=4, + max_steps=1000 + ) + + trainer = SFTTrainer( + model="microsoft/DialoGPT-medium", + training_dataset="s3://bucket/conversation-data.jsonl", + lora_config=lora_config, + training_config=training_config + ) + +Key Model Customization Features +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* **Parameter-Efficient Training** - LoRA and other techniques reduce memory usage by up to 90% while maintaining performance quality +* **Multi-GPU Support** - Distributed training across multiple GPUs with automatic parallelization and gradient synchronization +* **Custom Evaluation Metrics** - Built-in support for 11 evaluation benchmarks including BLEU, ROUGE, perplexity, and domain-specific metrics +* **MLflow Integration** - Comprehensive experiment tracking with real-time metrics, model versioning, and artifact management +* **Flexible Deployment** - Deploy fine-tuned models to SageMaker endpoints, Bedrock, or export for external use + +Supported Model Customization Scenarios +--------------------------------------- + +Model Types +~~~~~~~~~~~ + +* **Large Language Models** - GPT, LLaMA, BERT, T5, and other transformer architectures +* **Conversational AI** - ChatGPT-style models, dialogue systems, and virtual assistants +* **Domain-Specific Models** - Legal, medical, financial, and technical domain adaptation +* **Multimodal Models** - Vision-language models and cross-modal understanding + +Fine-Tuning Approaches +~~~~~~~~~~~~~~~~~~~~~~ + +* **Task-Specific Adaptation** - Fine-tune for specific downstream tasks like summarization, QA, or classification +* **Instruction Following** - Train models to follow complex instructions and multi-step reasoning +* **Safety and Alignment** - Improve model behavior, reduce harmful outputs, and align with human values +* **Style and Persona** - Customize model personality, writing style, and response patterns + +Evaluation and Monitoring +~~~~~~~~~~~~~~~~~~~~~~~~~ + +* **Automated Benchmarking** - Built-in evaluation on standard benchmarks and custom metrics +* **Human Evaluation Integration** - Tools for collecting and incorporating human feedback +* **Performance Monitoring** - Track model quality, safety, and alignment metrics during training +* **A/B Testing Support** - Compare different fine-tuning approaches and model variants + +Migration from V2 +------------------ + +V3 introduces entirely new capabilities for model customization that weren't available in V2: + +* **New Specialized Trainers**: SFTTrainer, DPOTrainer, RLAIFTrainer, and RLVRTrainer are V3-exclusive +* **Foundation Model Focus**: V2 primarily supported traditional ML models; V3 is optimized for LLMs +* **Advanced Techniques**: Preference learning and RLHF capabilities are new in V3 +* **Integrated Evaluation**: Built-in benchmarking and evaluation tools replace manual evaluation workflows + +Model Customization Examples +---------------------------- + +Explore comprehensive model customization examples that demonstrate V3 capabilities: + +.. toctree:: + :maxdepth: 1 + + ../v3-examples/model-customization-examples/sft_finetuning_example_notebook_pysdk_prod_v3 + ../v3-examples/model-customization-examples/dpo_trainer_example_notebook_v3_prod + ../v3-examples/model-customization-examples/rlaif_finetuning_example_notebook_v3_prod + ../v3-examples/model-customization-examples/rlvr_finetuning_example_notebook_v3_prod + ../v3-examples/model-customization-examples/llm_as_judge_demo + ../v3-examples/model-customization-examples/custom_scorer_demo + ../v3-examples/model-customization-examples/benchmark_demo + ../v3-examples/model-customization-examples/bedrock-modelbuilder-deployment + ../v3-examples/model-customization-examples/model_builder_deployment_notebook + ../v3-examples/model-customization-examples/ai_registry_example diff --git a/docs/overview.rst b/docs/overview.rst new file mode 100644 index 0000000000..ca2ffb72b0 --- /dev/null +++ b/docs/overview.rst @@ -0,0 +1,186 @@ +Overview +======== + +Welcome to SageMaker Python SDK V3 - a revolutionary approach to machine learning on Amazon SageMaker. Version 3.0 represents a significant milestone with modernized architecture, enhanced performance, and powerful new capabilities while maintaining our commitment to user experience and reliability. + +What's New in V3 +----------------- + +.. raw:: html + +
+
+
🎯
+

Model Customization (V3 Exclusive)

+

Revolutionary foundation model fine-tuning with specialized trainers:

+
    +
  • SFTTrainer - Supervised fine-tuning for task-specific adaptation
  • +
  • DPOTrainer - Direct preference optimization without RL complexity
  • +
  • RLAIFTrainer - Reinforcement learning from AI feedback
  • +
  • RLVRTrainer - Reinforcement learning from verifiable rewards
  • +
+

Advanced techniques like LoRA, preference optimization, and RLHF that simply don't exist in V2.

+
+ +
+
📦
+

Modular Architecture

+

Separate PyPI packages for specialized capabilities:

+
    +
  • sagemaker-core - Low-level SageMaker resource management
  • +
  • sagemaker-train - Unified training with ModelTrainer
  • +
  • sagemaker-serve - Simplified inference with ModelBuilder
  • +
  • sagemaker-mlops - ML operations and pipeline management
  • +
+
+ +
+
🔧
+

Unified Classes

+

Single classes replace multiple framework-specific implementations:

+
    +
  • ModelTrainer replaces PyTorchEstimator, TensorFlowEstimator, SKLearnEstimator, etc.
  • +
  • ModelBuilder replaces PyTorchModel, TensorFlowModel, SKLearnModel, etc.
  • +
+
+ +
+
+

Object-Oriented API

+

Structured interface with auto-generated configs aligned with AWS APIs for better developer experience.

+
+
+ +Capabilities +============== + +Model Customization +-------------------- + +Advanced foundation model fine-tuning with specialized trainer classes for cutting-edge techniques: + +.. code-block:: python + + from sagemaker.train import SFTTrainer + from sagemaker.train.common import TrainingType + + trainer = SFTTrainer( + model="meta-llama/Llama-2-7b-hf", + training_type=TrainingType.LORA, + model_package_group_name="my-custom-models", + training_dataset="s3://my-bucket/training-data.jsonl" + ) + + training_job = trainer.train() + +:doc:`Learn more about Model Customization ` + +Training with ModelTrainer +--------------------------- + +Unified training interface replacing framework-specific estimators with intelligent defaults and streamlined workflows: + +.. code-block:: python + + from sagemaker.train import ModelTrainer + from sagemaker.train.configs import InputData + + trainer = ModelTrainer( + training_image="your-training-image", + role="your-sagemaker-role" + ) + + train_data = InputData( + channel_name="training", + data_source="s3://your-bucket/train-data" + ) + + training_job = trainer.train(input_data_config=[train_data]) + +:doc:`Learn more about Training ` + +Inference with ModelBuilder +---------------------------- + +Simplified model deployment and inference with automatic optimization and flexible deployment options: + +.. code-block:: python + + from sagemaker.serve import ModelBuilder + + model_builder = ModelBuilder( + model="your-model", + model_path="s3://your-bucket/model-artifacts" + ) + + endpoint = model_builder.build() + result = endpoint.invoke({"inputs": "your-input-data"}) + +:doc:`Learn more about Inference ` + +ML Operations +------------- + +Comprehensive MLOps capabilities for building, deploying, and managing machine learning workflows at scale: + +.. code-block:: python + + from sagemaker.mlops import Pipeline, TrainingStep, ModelStep + + pipeline = Pipeline(name="production-ml-pipeline") + + training_step = TrainingStep( + name="train-model", + training_config=TrainingConfig( + algorithm_specification={ + "training_image": "your-training-image" + } + ) + ) + + pipeline.add_step(training_step) + +:doc:`Learn more about ML Operations ` + +SageMaker Core +-------------- + +Low-level, object-oriented access to Amazon SageMaker resources with intelligent defaults and type safety: + +.. code-block:: python + + from sagemaker.core.resources import TrainingJob + + training_job = TrainingJob.create( + training_job_name="my-training-job", + role_arn="arn:aws:iam::123456789012:role/SageMakerRole", + input_data_config=[{ + "channel_name": "training", + "data_source": "s3://my-bucket/train" + }] + ) + +:doc:`Learn more about SageMaker Core ` + +Getting Started +=============== + +Installation +------------ + +:doc:`Install SageMaker Python SDK V3 ` to get started + +Migration from V2 +------------------ + +Key changes when migrating from V2: + +* Replace Estimator classes with ``ModelTrainer`` +* Replace Model classes with ``ModelBuilder`` +* Use structured config objects instead of parameter dictionaries +* Leverage specialized fine-tuning trainers for foundation models + +Next Steps +----------- + +**Get Started**: Follow the :doc:`quickstart` guide for a hands-on introduction diff --git a/docs/quickstart.rst b/docs/quickstart.rst new file mode 100644 index 0000000000..33c898f542 --- /dev/null +++ b/docs/quickstart.rst @@ -0,0 +1,163 @@ +Quickstart +=========== + +Get started with SageMaker Python SDK V3 in minutes. This guide walks you through the essential steps to train and deploy your first model. + +Prerequisites +------------- + +* Python 3.9+ installed +* AWS account with appropriate permissions +* AWS credentials configured + +Installation +------------ + +Install SageMaker Python SDK V3: + +.. code-block:: bash + + pip install sagemaker>=3.0.0 + +Basic Setup +----------- + +Import the SDK and create a session: + +.. code-block:: python + + import sagemaker + from sagemaker.train import ModelTrainer + from sagemaker.serve import ModelBuilder + + # Create a SageMaker session + session = sagemaker.Session() + role = sagemaker.get_execution_role() # Or specify your IAM role ARN + + print(f"Using role: {role}") + print(f"Default bucket: {session.default_bucket()}") + +Training Your First Model +------------------------- + +Train a simple model using the unified ModelTrainer: + +.. code-block:: python + + from sagemaker.train import ModelTrainer + from sagemaker.train.configs import InputData + + # Create a ModelTrainer + trainer = ModelTrainer( + training_image="382416733822.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest", + role=role + ) + + # Configure training data + train_data = InputData( + channel_name="training", + data_source="s3://sagemaker-sample-data-us-east-1/xgboost/census-income/train.csv" + ) + + # Start training + training_job = trainer.train( + input_data_config=[train_data], + hyperparameters={ + "objective": "binary:logistic", + "num_round": "100" + } + ) + + print(f"Training job: {training_job.name}") + +Deploying Your Model +-------------------- + +Deploy the trained model for inference: + +.. code-block:: python + + from sagemaker.serve import ModelBuilder + + # Create a ModelBuilder from the training job + model_builder = ModelBuilder( + model_data=training_job.model_artifacts, + image_uri="382416733822.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest", + role=role + ) + + # Deploy to an endpoint + endpoint = model_builder.build( + instance_type="ml.m5.large", + initial_instance_count=1 + ) + + print(f"Endpoint: {endpoint.endpoint_name}") + +Making Predictions +------------------ + +Use your deployed model to make predictions: + +.. code-block:: python + + # Sample data for prediction + test_data = "39, State-gov, 77516, Bachelors, 13, Never-married, Adm-clerical, Not-in-family, White, Male, 2174, 0, 40, United-States" + + # Make a prediction + result = endpoint.invoke(test_data, content_type="text/csv") + print(f"Prediction: {result}") + +Cleanup +------- + +Don't forget to clean up resources to avoid charges: + +.. code-block:: python + + # Delete the endpoint + endpoint.delete() + + print("Endpoint deleted") + +Foundation Model Fine-Tuning +---------------------------- + +Try V3's new foundation model fine-tuning capabilities: + +.. code-block:: python + + from sagemaker.train import SFTTrainer + from sagemaker.train.common import TrainingType + + # Fine-tune a foundation model + sft_trainer = SFTTrainer( + model="huggingface-textgeneration-gpt2", + training_type=TrainingType.LORA, + training_dataset="s3://your-bucket/training-data.jsonl", + role=role + ) + + # Start fine-tuning + fine_tuning_job = sft_trainer.train() + print(f"Fine-tuning job: {fine_tuning_job.name}") + +Next Steps +---------- + +Now that you've completed the quickstart: + +1. **Explore Training**: Learn more about :doc:`training/index` capabilities +2. **Try Inference**: Discover advanced :doc:`inference/index` features +3. **Model Customization**: Experiment with :doc:`model_customization/index` +4. **Build Pipelines**: Create workflows with :doc:`ml_ops/index` +5. **Use SageMaker Core**: Access low-level resources with :doc:`sagemaker_core/index` + +Common Issues +------------- + +**ImportError**: Ensure you have the latest version installed +**Credential errors**: Run ``aws configure`` to set up credentials +**Permission denied**: Check your IAM role has SageMaker permissions + +For detailed troubleshooting, see the :doc:`installation` guide. diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000000..13e65772c9 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,9 @@ +sphinx>=4.0.0 +sphinx-book-theme>=1.0.0 +myst-nb>=0.17.0 +jupyter-book>=0.15.0 +sphinx-design>=0.4.0 +ipykernel +jupyter +pandas +numpy diff --git a/docs/sagemaker-core b/docs/sagemaker-core new file mode 120000 index 0000000000..3f575bb5b1 --- /dev/null +++ b/docs/sagemaker-core @@ -0,0 +1 @@ +../sagemaker-core \ No newline at end of file diff --git a/docs/sagemaker_core/index.rst b/docs/sagemaker_core/index.rst new file mode 100644 index 0000000000..899156eef4 --- /dev/null +++ b/docs/sagemaker_core/index.rst @@ -0,0 +1,200 @@ +SageMaker Core +============== + +SageMaker Core provides low-level, object-oriented access to Amazon SageMaker resources with intelligent defaults and streamlined workflows. This foundational layer offers direct control over SageMaker services while maintaining the simplicity and power you need for advanced use cases. + +Key Benefits of SageMaker Core +------------------------------ + +* **Direct Resource Access**: Low-level control over SageMaker resources with full API coverage +* **Object-Oriented Design**: Intuitive resource abstractions that map directly to AWS APIs +* **Intelligent Defaults**: Automatic configuration of optimal settings based on resource requirements +* **Type Safety**: Strong typing and validation for better development experience + +Quick Start Example +------------------- + +Here's how SageMaker Core simplifies resource management: + +**Traditional Boto3 Approach:** + +.. code-block:: python + + import boto3 + + client = boto3.client('sagemaker') + response = client.create_training_job( + TrainingJobName='my-training-job', + RoleArn='arn:aws:iam::123456789012:role/SageMakerRole', + InputDataConfig=[{ + 'ChannelName': 'training', + 'DataSource': { + 'S3DataSource': { + 'S3DataType': 'S3Prefix', + 'S3Uri': 's3://my-bucket/train', + 'S3DataDistributionType': 'FullyReplicated' + } + } + }], + # ... many more required parameters + ) + +**SageMaker Core Approach:** + +.. code-block:: python + + from sagemaker.core.resources import TrainingJob + from sagemaker.core.shapes import TrainingJobConfig + + training_job = TrainingJob.create( + training_job_name="my-training-job", + role_arn="arn:aws:iam::123456789012:role/SageMakerRole", + input_data_config=[{ + "channel_name": "training", + "data_source": "s3://my-bucket/train" + }] + ) + +SageMaker Core Overview +---------------------- + +SageMaker Core serves as the foundation for all SageMaker Python SDK V3 operations, providing direct access to SageMaker resources through an object-oriented interface: + +**Resource Abstractions** + Direct mapping to SageMaker resources like TrainingJob, Model, Endpoint, and ProcessingJob + +**Intelligent Configuration** + Automatically fills in required parameters with sensible defaults while allowing full customization + +**Type-Safe Operations** + Strong typing and validation prevent common configuration errors + +**Seamless Integration** + Works as the foundation layer for higher-level SDK components + +.. code-block:: python + + from sagemaker.core.resources import Endpoint, Model + from sagemaker.core.shapes import EndpointConfig + + # Create a model resource + model = Model.create( + model_name="my-model", + primary_container={ + "image": "your-inference-image", + "model_data_url": "s3://your-bucket/model.tar.gz" + }, + execution_role_arn="your-sagemaker-role" + ) + + # Deploy to an endpoint + endpoint = Endpoint.create( + endpoint_name="my-endpoint", + endpoint_config_name="my-config", + model_name=model.model_name + ) + + # Make predictions + response = endpoint.invoke_endpoint( + body=b'{"instances": [1, 2, 3, 4]}', + content_type="application/json" + ) + +Core Capabilities +----------------- + +Resource Management +~~~~~~~~~~~~~~~~~~ + +SageMaker Core provides comprehensive resource management capabilities: + +* **Training Jobs** - Create, monitor, and manage training workloads with full parameter control +* **Models** - Define and register models with custom inference logic and container configurations +* **Endpoints** - Deploy real-time inference endpoints with auto-scaling and monitoring +* **Processing Jobs** - Run data processing and feature engineering workloads at scale + +**Resource Lifecycle Management:** + +.. code-block:: python + + from sagemaker.core.resources import ProcessingJob + + # Create processing job + processing_job = ProcessingJob.create( + processing_job_name="data-preprocessing", + app_specification={ + "image_uri": "your-processing-image", + "container_entrypoint": ["python", "preprocess.py"] + }, + processing_inputs=[{ + "input_name": "raw-data", + "s3_input": { + "s3_uri": "s3://your-bucket/raw-data", + "local_path": "/opt/ml/processing/input" + } + }], + processing_outputs=[{ + "output_name": "processed-data", + "s3_output": { + "s3_uri": "s3://your-bucket/processed-data", + "local_path": "/opt/ml/processing/output" + } + }] + ) + +Key Core Features +~~~~~~~~~~~~~~~~ + +* **Direct API Access** - Full coverage of SageMaker APIs with object-oriented abstractions for better usability +* **Intelligent Defaults** - Automatic parameter inference and validation reduces boilerplate while maintaining flexibility +* **Resource Chaining** - Seamlessly connect resources together for complex workflows and dependencies +* **Monitoring Integration** - Built-in support for CloudWatch metrics, logging, and resource status tracking +* **Error Handling** - Comprehensive error handling with detailed feedback for troubleshooting and debugging + +Supported Core Scenarios +------------------------ + +Resource Types +~~~~~~~~~~~~~ + +* **Training Resources** - TrainingJob, HyperParameterTuningJob, AutoMLJob +* **Inference Resources** - Model, EndpointConfig, Endpoint, Transform +* **Processing Resources** - ProcessingJob, FeatureGroup, Pipeline +* **Monitoring Resources** - ModelQualityJobDefinition, DataQualityJobDefinition + +Advanced Features +~~~~~~~~~~~~~~~~ + +* **Batch Operations** - Efficiently manage multiple resources with batch create, update, and delete operations +* **Resource Tagging** - Comprehensive tagging support for cost allocation, governance, and resource organization +* **Cross-Region Support** - Deploy and manage resources across multiple AWS regions with unified interface +* **Custom Configurations** - Override any default behavior with custom configurations and parameters + +Integration Patterns +~~~~~~~~~~~~~~~~~~~ + +* **Pipeline Integration** - Use Core resources as building blocks for SageMaker Pipelines +* **Event-Driven Workflows** - Integrate with AWS Lambda and EventBridge for automated workflows +* **Multi-Account Deployments** - Deploy resources across multiple AWS accounts with proper IAM configuration + +Migration from Boto3 +-------------------- + +If you're migrating from direct Boto3 usage, the key benefits are: + +* **Simplified Interface**: Object-oriented resources replace complex dictionary-based API calls +* **Intelligent Defaults**: Automatic parameter inference reduces configuration overhead +* **Type Safety**: Strong typing prevents common configuration errors +* **Better Error Messages**: More descriptive error handling and validation feedback + +SageMaker Core Examples +---------------------- + +Explore comprehensive SageMaker Core examples: + +.. toctree:: + :maxdepth: 1 + + ../sagemaker-core/example_notebooks/get_started + ../sagemaker-core/example_notebooks/sagemaker_core_overview + ../sagemaker-core/example_notebooks/intelligent_defaults_and_logging diff --git a/docs/training/index.rst b/docs/training/index.rst new file mode 100644 index 0000000000..ebe940f098 --- /dev/null +++ b/docs/training/index.rst @@ -0,0 +1,184 @@ +Training +======== + +SageMaker Python SDK V3 revolutionizes machine learning training with the unified **ModelTrainer** class, replacing the complex framework-specific estimators from V2. This modern approach provides a consistent interface across all training scenarios while maintaining the power and flexibility you need. + +Key Benefits of V3 Training +--------------------------- + +* **Unified Interface**: Single ``ModelTrainer`` class replaces multiple framework-specific estimators +* **Simplified Configuration**: Object-oriented API with auto-generated configs aligned with AWS APIs +* **Reduced Boilerplate**: Streamlined workflows with intuitive interfaces +* **Enhanced Performance**: Modernized architecture for better training efficiency + +Quick Start Example +------------------- + +Here's how training has evolved from V2 to V3: + +**SageMaker Python SDK V2:** + +.. code-block:: python + + from sagemaker.estimator import Estimator + + estimator = Estimator( + image_uri="my-training-image", + role="arn:aws:iam::123456789012:role/SageMakerRole", + instance_count=1, + instance_type="ml.m5.xlarge", + output_path="s3://my-bucket/output" + ) + estimator.fit({"training": "s3://my-bucket/train"}) + +**SageMaker Python SDK V3:** + +.. code-block:: python + + from sagemaker.train import ModelTrainer + from sagemaker.train.configs import InputData + + trainer = ModelTrainer( + training_image="my-training-image", + role="arn:aws:iam::123456789012:role/SageMakerRole" + ) + + train_data = InputData( + channel_name="training", + data_source="s3://my-bucket/train" + ) + + trainer.train(input_data_config=[train_data]) + +ModelTrainer Overview +-------------------- + +The ``ModelTrainer`` class is the cornerstone of SageMaker Python SDK V3, providing a unified interface for all training scenarios. This single class replaces the complex web of framework-specific estimators from V2, offering: + +**Unified Training Interface** + One class handles PyTorch, TensorFlow, Scikit-learn, XGBoost, and custom containers + +**Intelligent Defaults** + Automatically configures optimal settings based on your training requirements + +**Flexible Configuration** + Object-oriented design with structured configs that align with AWS APIs + +**Seamless Integration** + Works seamlessly with SageMaker features like distributed training, spot instances, and hyperparameter tuning + +.. code-block:: python + + from sagemaker.train import ModelTrainer + from sagemaker.train.configs import InputData, ResourceConfig + + # Create trainer with intelligent defaults + trainer = ModelTrainer( + training_image="your-training-image", + role="your-sagemaker-role" + ) + + # Configure training data + train_data = InputData( + channel_name="training", + data_source="s3://your-bucket/train-data" + ) + + # Start training + training_job = trainer.train( + input_data_config=[train_data], + resource_config=ResourceConfig( + instance_type="ml.m5.xlarge", + instance_count=1 + ) + ) + +Training Capabilities +--------------------- + +Model Fine-Tuning Support +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +V3 introduces powerful fine-tuning capabilities with four specialized trainer classes: + +* **SFTTrainer** - Supervised fine-tuning for foundation models +* **DPOTrainer** - Direct preference optimization +* **RLAIFTrainer** - Reinforcement Learning from AI Feedback +* **RLVRTrainer** - Reinforcement Learning from Verifiable Rewards + +**Quick Fine-Tuning Example:** + +.. code-block:: python + + from sagemaker.train import SFTTrainer + from sagemaker.train.common import TrainingType + + trainer = SFTTrainer( + model="meta-llama/Llama-2-7b-hf", + training_type=TrainingType.LORA, + model_package_group_name="my-models", + training_dataset="s3://bucket/train.jsonl" + ) + + training_job = trainer.train() + +Key Fine-Tuning Features +~~~~~~~~~~~~~~~~~~~~~~~~ + +* **LoRA & Full Fine-Tuning Support** - Choose between parameter-efficient LoRA (Low-Rank Adaptation) for faster training with reduced memory requirements, or full fine-tuning for maximum model customization and performance +* **MLflow Integration with Real-Time Metrics** - Monitor training progress with comprehensive metrics tracking, experiment comparison, and model versioning through integrated MLflow support +* **Multi-Platform Deployment** - Seamlessly deploy your fine-tuned models to Amazon SageMaker endpoints for real-time inference or Amazon Bedrock for foundation model serving +* **Comprehensive Evaluation Suite** - Validate model performance with 11 built-in benchmark evaluations including accuracy, perplexity, BLEU scores, and domain-specific metrics +* **Serverless Training Capabilities** - Scale training automatically without managing infrastructure, with pay-per-use pricing and automatic resource optimization + +Supported Training Scenarios +---------------------------- + +Framework Support +~~~~~~~~~~~~~~~~~ + +* **PyTorch** - Deep learning with dynamic computation graphs +* **TensorFlow** - Production-ready machine learning at scale +* **Scikit-learn** - Classical machine learning algorithms +* **XGBoost** - Gradient boosting for structured data +* **Custom Containers** - Bring your own training algorithms + +Training Types +~~~~~~~~~~~~~~ + +* **Single Instance Training** - Cost-effective training for smaller models +* **Multi-Instance Training** - Distributed training for large-scale models +* **Spot Instance Training** - Cost optimization with managed spot instances +* **Local Mode Training** - Development and debugging on local infrastructure + +Advanced Features +~~~~~~~~~~~~~~~~~ + +* **Automatic Model Tuning** - Hyperparameter optimization at scale +* **Distributed Training** - Multi-node, multi-GPU training strategies +* **Checkpointing** - Resume training from saved states +* **Early Stopping** - Prevent overfitting with intelligent stopping criteria + +Migration from V2 +------------------ + +If you're migrating from V2, the key changes are: + +* Replace framework-specific estimators (PyTorchEstimator, TensorFlowEstimator, etc.) with ``ModelTrainer`` +* Use structured ``InputData`` configs instead of dictionary-based input specifications +* Leverage the new object-oriented API for cleaner, more maintainable code + +Training Examples +----------------- + +Explore comprehensive training examples that demonstrate V3 capabilities: + +.. toctree:: + :maxdepth: 1 + + ../v3-examples/training-examples/local-training-example + ../v3-examples/training-examples/distributed-local-training-example + ../v3-examples/training-examples/hyperparameter-training-example + ../v3-examples/training-examples/jumpstart-training-example + ../v3-examples/training-examples/custom-distributed-training-example + ../v3-examples/training-examples/aws_batch/sm-training-queues_getting_started_with_model_trainer diff --git a/docs/v3-examples b/docs/v3-examples new file mode 120000 index 0000000000..c3785e8a41 --- /dev/null +++ b/docs/v3-examples @@ -0,0 +1 @@ +../v3-examples \ No newline at end of file From ce413a708e6de5cf78f217255baf781897f1accd Mon Sep 17 00:00:00 2001 From: adishaa Date: Wed, 31 Dec 2025 16:37:58 -0800 Subject: [PATCH 2/3] feat: Add comprehensive feedback component with light theme - Add thumbs up/down feedback buttons with SVG icons - Show detailed feedback options when thumbs down is selected - Include 5 checkbox options: hard to understand, code doesn't work, couldn't find, out of date, other - Add textarea for additional feedback details - Implement light theme styling with proper contrast - Position component at bottom of each documentation page - Include analytics tracking for feedback data - Support multiple selection of feedback reasons --- docs/_static/custom.css | 195 +++++++++++++++++++++++++++++++++++++++ docs/_static/feedback.js | 127 +++++++++++++++++++++++++ docs/conf.py | 1 + 3 files changed, 323 insertions(+) create mode 100644 docs/_static/feedback.js diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 917774591c..f3c3ed8eca 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -117,6 +117,201 @@ font-size: 0.9em; } +/* Feedback Component */ +.feedback-container { + border-top: 1px solid #e1e5e9; + margin-top: 40px; + padding: 24px; + background-color: #f8f9fa; + border-radius: 8px; + border: 1px solid #e1e5e9; +} + +.feedback-question { + font-size: 20px; + font-weight: 500; + color: #374151; + margin-bottom: 20px; + text-align: left; +} + +.feedback-buttons { + display: flex; + justify-content: flex-start; + gap: 12px; +} + +.feedback-btn { + display: flex; + align-items: center; + gap: 8px; + padding: 8px 16px; + border: 1px solid #d1d5db; + border-radius: 6px; + background: white; + color: #6b7280; + font-size: 14px; + cursor: pointer; + transition: all 0.2s ease; +} + +.feedback-btn:hover { + border-color: #9ca3af; + background: #f9fafb; +} + +.feedback-btn.selected { + border-color: #3b82f6; + background: #eff6ff; + color: #1d4ed8; +} + +.feedback-btn svg { + width: 16px; + height: 16px; +} + +.feedback-thanks { + display: none; + color: #059669; + font-size: 14px; + text-align: center; + margin-top: 16px; +} + +/* Feedback Options */ +.feedback-options { + display: none; + margin-top: 20px; +} + +.feedback-options-title { + font-size: 16px; + font-weight: 500; + color: #374151; + margin-bottom: 16px; +} + +.required { + color: #ff6b6b; +} + +.feedback-checkboxes { + display: flex; + flex-direction: column; + gap: 12px; + margin-bottom: 20px; +} + +.feedback-checkbox { + display: flex; + align-items: center; + cursor: pointer; + color: #374151; + font-size: 14px; + position: relative; + padding-left: 32px; +} + +.feedback-checkbox input[type="checkbox"] { + position: absolute; + opacity: 0; + cursor: pointer; + height: 0; + width: 0; +} + +.checkmark { + position: absolute; + left: 0; + top: 2px; + height: 18px; + width: 18px; + background-color: transparent; + border: 2px solid #6b7280; + border-radius: 3px; +} + +.feedback-checkbox:hover input ~ .checkmark { + border-color: #9ca3af; +} + +.feedback-checkbox input:checked ~ .checkmark { + background-color: #3b82f6; + border-color: #3b82f6; +} + +.checkmark:after { + content: ""; + position: absolute; + display: none; +} + +.feedback-checkbox input:checked ~ .checkmark:after { + display: block; +} + +.feedback-checkbox .checkmark:after { + left: 5px; + top: 2px; + width: 4px; + height: 8px; + border: solid white; + border-width: 0 2px 2px 0; + transform: rotate(45deg); +} + +.feedback-more { + display: flex; + flex-direction: column; + gap: 12px; +} + +.feedback-more-title { + font-size: 16px; + font-weight: 500; + color: #374151; +} + +.feedback-textarea { + padding: 12px; + border: 1px solid #d1d5db; + border-radius: 6px; + font-size: 14px; + resize: vertical; + min-height: 80px; + font-family: inherit; + background-color: #ffffff; + color: #374151; +} + +.feedback-textarea::placeholder { + color: #6b7280; +} + +.feedback-textarea:focus { + outline: none; + border-color: #3b82f6; + box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1); +} + +.feedback-submit-btn { + padding: 10px 20px; + background: #3b82f6; + color: white; + border: none; + border-radius: 6px; + font-size: 14px; + font-weight: 500; + cursor: pointer; + align-self: flex-start; + transition: background 0.2s; +} + +.feedback-submit-btn:hover { + background: #2563eb; +} + header { background-color: white; diff --git a/docs/_static/feedback.js b/docs/_static/feedback.js new file mode 100644 index 0000000000..6a4b8207f1 --- /dev/null +++ b/docs/_static/feedback.js @@ -0,0 +1,127 @@ +// Feedback functionality +document.addEventListener('DOMContentLoaded', function() { + // Add feedback component to the very bottom of each page + const article = document.querySelector('article[role="main"]') || document.querySelector('.bd-article') || document.querySelector('main'); + if (article) { + const feedbackHTML = ` + + `; + + article.insertAdjacentHTML('beforeend', feedbackHTML); + + // Add click handlers + const feedbackBtns = document.querySelectorAll('.feedback-btn'); + const thanksMessage = document.querySelector('.feedback-thanks'); + const feedbackOptions = document.querySelector('.feedback-options'); + const checkboxes = document.querySelectorAll('.feedback-checkbox input[type="checkbox"]'); + const submitBtn = document.querySelector('.feedback-submit-btn'); + const textarea = document.querySelector('.feedback-textarea'); + + feedbackBtns.forEach(btn => { + btn.addEventListener('click', function() { + const feedback = this.dataset.feedback; + + // Remove selected class from all buttons + feedbackBtns.forEach(b => b.classList.remove('selected')); + + // Add selected class to clicked button + this.classList.add('selected'); + + if (feedback === 'yes') { + // Hide options and show thanks + feedbackOptions.style.display = 'none'; + thanksMessage.style.display = 'block'; + + // Send positive feedback + if (typeof gtag !== 'undefined') { + gtag('event', 'page_feedback', { + 'feedback_value': 'positive', + 'page_location': window.location.href + }); + } + } else { + // Show options for negative feedback + feedbackOptions.style.display = 'block'; + thanksMessage.style.display = 'none'; + } + }); + }); + + // Handle submit button + submitBtn.addEventListener('click', function() { + const selectedReasons = []; + checkboxes.forEach(checkbox => { + if (checkbox.checked) { + selectedReasons.push(checkbox.dataset.reason); + } + }); + const additionalFeedback = textarea.value.trim(); + + // Hide options and show thanks + feedbackOptions.style.display = 'none'; + thanksMessage.style.display = 'block'; + + // Send negative feedback with details + if (typeof gtag !== 'undefined') { + gtag('event', 'page_feedback', { + 'feedback_value': 'negative', + 'feedback_reasons': selectedReasons, + 'feedback_details': additionalFeedback, + 'page_location': window.location.href + }); + } + }); + } +}); diff --git a/docs/conf.py b/docs/conf.py index f97b18135e..9bc25460b6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,6 +40,7 @@ html_static_path = ['_static'] html_css_files = ['custom.css'] +html_js_files = ['feedback.js'] html_context = { 'display_github': True, From 77c229f9998ecf91dc6d88392d4014eb7b1cede8 Mon Sep 17 00:00:00 2001 From: adishaa Date: Fri, 2 Jan 2026 01:35:01 -0800 Subject: [PATCH 3/3] Add API documentation using autodoc and autosummary --- docs/api/index.rst | 9 + docs/api/sagemaker_core.rst | 60 + docs/api/sagemaker_mlops.rst | 30 + docs/api/sagemaker_serve.rst | 14 + docs/api/sagemaker_train.rst | 30 + docs/conf.py | 34 +- docs/model_customization/index.rst | 1 + ...io-nova-training-job-sample-notebook.ipynb | 1087 +++++++++++++++++ 8 files changed, 1262 insertions(+), 3 deletions(-) create mode 100644 docs/api/sagemaker_core.rst create mode 100644 docs/api/sagemaker_mlops.rst create mode 100644 docs/api/sagemaker_serve.rst create mode 100644 docs/api/sagemaker_train.rst create mode 100644 v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb diff --git a/docs/api/index.rst b/docs/api/index.rst index 4923bba097..4d50771982 100644 --- a/docs/api/index.rst +++ b/docs/api/index.rst @@ -2,3 +2,12 @@ API Reference ============= Complete API documentation for SageMaker Python SDK V3. + +.. toctree:: + :maxdepth: 2 + :caption: SDK Reference + + sagemaker_core + sagemaker_train + sagemaker_serve + sagemaker_mlops diff --git a/docs/api/sagemaker_core.rst b/docs/api/sagemaker_core.rst new file mode 100644 index 0000000000..bbad9631ab --- /dev/null +++ b/docs/api/sagemaker_core.rst @@ -0,0 +1,60 @@ +SageMaker Core +============== + +Core SageMaker resources and utilities for managing AWS SageMaker services. + +.. currentmodule:: sagemaker.core + +Core Resources +-------------- + +.. automodule:: sagemaker.core.resources + :members: + :undoc-members: + :show-inheritance: + +Session Management +------------------ + +.. automodule:: sagemaker.core.session_settings + :members: + :undoc-members: + +Configuration +------------- + +.. automodule:: sagemaker.core.config_schema + :members: + :undoc-members: + +Processing +---------- + +.. automodule:: sagemaker.core.processing + :members: + :undoc-members: + +Transformers +------------ + +.. automodule:: sagemaker.core.transformer + :members: + :undoc-members: + +Utilities +--------- + +.. automodule:: sagemaker.core.common_utils + :members: + :undoc-members: + +.. automodule:: sagemaker.core.image_uris + :members: + :undoc-members: + +Exceptions +---------- + +.. automodule:: sagemaker.core.exceptions + :members: + :undoc-members: diff --git a/docs/api/sagemaker_mlops.rst b/docs/api/sagemaker_mlops.rst new file mode 100644 index 0000000000..f67879111d --- /dev/null +++ b/docs/api/sagemaker_mlops.rst @@ -0,0 +1,30 @@ +SageMaker MLOps +=============== + +MLOps capabilities including pipelines, workflows, and model management. + +.. currentmodule:: sagemaker.mlops + +Pipeline Management +------------------- + +.. automodule:: sagemaker.mlops + :members: + :undoc-members: + :show-inheritance: + +Workflow Management +------------------- + +.. automodule:: sagemaker.mlops.workflow + :members: + :undoc-members: + :show-inheritance: + +Local Development +----------------- + +.. automodule:: sagemaker.mlops.local + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/sagemaker_serve.rst b/docs/api/sagemaker_serve.rst new file mode 100644 index 0000000000..471b79530b --- /dev/null +++ b/docs/api/sagemaker_serve.rst @@ -0,0 +1,14 @@ +SageMaker Serve +=============== + +Model serving and inference capabilities for deploying and managing ML models. + +.. currentmodule:: sagemaker.serve + +Model Deployment +---------------- + +.. automodule:: sagemaker.serve + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/sagemaker_train.rst b/docs/api/sagemaker_train.rst new file mode 100644 index 0000000000..d54b720a92 --- /dev/null +++ b/docs/api/sagemaker_train.rst @@ -0,0 +1,30 @@ +SageMaker Train +=============== + +Training capabilities including model training, hyperparameter tuning, and distributed training. + +.. currentmodule:: sagemaker.train + +Model Training +-------------- + +.. automodule:: sagemaker.train + :members: + :undoc-members: + :show-inheritance: + +Distributed Training +-------------------- + +.. automodule:: sagemaker.train.distributed + :members: + :undoc-members: + :show-inheritance: + +Model Evaluation +---------------- + +.. automodule:: sagemaker.train.evaluate + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index 9bc25460b6..de04756c3a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -2,7 +2,12 @@ import sys from datetime import datetime +# Add the source directories to Python path sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath('../sagemaker-core/src')) +sys.path.insert(0, os.path.abspath('../sagemaker-train/src')) +sys.path.insert(0, os.path.abspath('../sagemaker-serve/src')) +sys.path.insert(0, os.path.abspath('../sagemaker-mlops/src')) project = 'SageMaker Python SDK V3' copyright = f'{datetime.now().year}, Amazon Web Services' @@ -20,7 +25,20 @@ ] templates_path = ['_templates'] -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'sagemaker-core/docs/*', 'sagemaker-core/CHANGELOG.md', 'sagemaker-core/CONTRIBUTING.md'] +exclude_patterns = [ + '_build', + 'Thumbs.db', + '.DS_Store', + 'sagemaker-core/docs/*', + 'sagemaker-core/CHANGELOG.md', + 'sagemaker-core/CONTRIBUTING.md', +] + +# Suppress specific warnings +suppress_warnings = [ + 'myst.header', # Suppress header level warnings from notebooks + 'toc.not_readable', # Suppress toctree warnings for symlinked files +] html_theme = 'sphinx_book_theme' html_theme_options = { @@ -55,6 +73,16 @@ nb_execution_mode = 'off' nb_execution_allow_errors = True -# Suppress autodoc warnings for missing modules -autodoc_mock_imports = ['sagemaker'] +# Autodoc configuration +autodoc_default_options = { + 'members': True, + 'undoc-members': True, + 'show-inheritance': True, +} + +# Generate autosummary stubs +autosummary_generate = True + +# Don't mock imports - let them fail gracefully and show what's available +autodoc_mock_imports = [] suppress_warnings = ['autodoc.import_error'] diff --git a/docs/model_customization/index.rst b/docs/model_customization/index.rst index 9023a203f9..85870d9cc3 100644 --- a/docs/model_customization/index.rst +++ b/docs/model_customization/index.rst @@ -209,3 +209,4 @@ Explore comprehensive model customization examples that demonstrate V3 capabilit ../v3-examples/model-customization-examples/bedrock-modelbuilder-deployment ../v3-examples/model-customization-examples/model_builder_deployment_notebook ../v3-examples/model-customization-examples/ai_registry_example + ../v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook diff --git a/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb b/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb new file mode 100644 index 0000000000..20c51e562e --- /dev/null +++ b/v3-examples/model-customization-examples/sm-studio-nova-training-job-sample-notebook.ipynb @@ -0,0 +1,1087 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "babaeb90", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "novaTrainingJobNotebookHeaderMarkdown" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Model Customization using SageMaker Training Job" + ] + }, + { + "cell_type": "markdown", + "id": "a16fc6c1-c18f-4a06-ae98-36b12ec72ab3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This notebook provides an end-to-end walkthrough for creating SageMaker Training job using a SageMaker Nova model and deploy it for inference." + ] + }, + { + "cell_type": "markdown", + "id": "940f9af2-cb1e-40be-839d-48db014d67f1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Setup and Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "84cf410f", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install --upgrade sagemaker --quiet # restart the kernel after running this cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "234f7398-fd6b-4d02-a406-0491924c461d", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import os\n", + "import json\n", + "import boto3\n", + "from rich.pretty import pprint\n", + "from sagemaker.core.helper.session_helper import Session\n", + "\n", + "REGION = boto3.Session().region_name\n", + "sm_client = boto3.client(\"sagemaker\", region_name=REGION)\n", + "\n", + "# Create SageMaker session\n", + "sagemaker_session = Session(sagemaker_client=sm_client)\n", + "\n", + "print(f\"Region: {REGION}\")\n", + "\n", + "# For MLFlow native metrics in Trainer wait, run below line with appropriate region\n", + "os.environ[\"SAGEMAKER_MLFLOW_CUSTOM_ENDPOINT\"] = f\"https://mlflow.sagemaker.{REGION}.app.aws\"" + ] + }, + { + "cell_type": "markdown", + "id": "b9bf5959", + "metadata": {}, + "source": [ + "#### Create Training Dataset\n", + "Below section provides sample code to create the training dataset arn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39aaeb1d", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.ai_registry.dataset import DataSet\n", + "from sagemaker.ai_registry.dataset_utils import CustomizationTechnique\n", + "\n", + "# Register dataset in SageMaker AI Registry. This creates a versioned dataset that can be referenced by ARN\n", + "dataset = DataSet.create(\n", + " name=\"demo-sft-dataset\",\n", + " source=\"s3://your-bucket/dataset/training_dataset.jsonl\", # source can be S3 or local path\n", + " #customization_technique=CUSTOMIZATION_TECHNIQUE.SFT # or DPO or RLVR\n", + " # Optional technique name for minimal dataset format check.\n", + " wait=True\n", + ")\n", + "\n", + "print(f\"TRAINING_DATASET ARN: {dataset.arn}\")\n", + "# TRAINING_DATASET = dataset.arn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea22bd22", + "metadata": {}, + "outputs": [], + "source": [ + "# Required Configs\n", + "BASE_MODEL = \"\"\n", + "\n", + "# MODEL_PACKAGE_GROUP_NAME is same as CUSTOM_MODEL_NAME\n", + "MODEL_PACKAGE_GROUP_NAME = \"\"\n", + "\n", + "TRAINING_DATASET = \"\"\n", + "\n", + "S3_OUTPUT_PATH = \"\"\n", + "\n", + "ROLE_ARN = \"\"" + ] + }, + { + "cell_type": "markdown", + "id": "259aca67d3d3863b", + "metadata": {}, + "source": [ + "#### Create Model Package Group" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90a1069d19eeee7", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.core.resources import ModelPackageGroup\n", + "model_package_group = ModelPackageGroup.create(\n", + " model_package_group_name=MODEL_PACKAGE_GROUP_NAME,\n", + " model_package_group_description='' # Required Description\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "1c931764", + "metadata": {}, + "source": [ + "## Part 1: Fine-tuning\n", + "\n", + "### Step 1: Creating the Trainer" + ] + }, + { + "cell_type": "markdown", + "id": "f23e67f7", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "" + }, + "source": [ + "#### Choose one of the following trainer techniques:\n", + "- **Option 1: SFT Trainer (Supervised Fine-Tuning)** \n", + "- **Option 2: Create RLVRTrainer (Reinforcement Learning with Verifiable Rewards)**. \n", + "- **Option 3: DPO Trainer (Direct Preference Optimization)** \n", + "\n", + "**Instructions:** Run only ONE of the trainers, not all of them." + ] + }, + { + "cell_type": "markdown", + "id": "32fd436b", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "SFT" + }, + "source": [ + "#### Create SFT Trainer (Supervised Fine-Tuning)\n", + "\n", + "##### Key Parameters:\n", + "* `model`: base_model id on Sagemaker Hubcontent that is available to finetune (or) ModelPackage artifacts\n", + "* `training_type`: Choose from TrainingType Enum(sagemaker.train.common) either LORA OR FULL. (optional)\n", + "* `model_package_group`: ModelPackage group name or ModelPackageGroup (optional)\n", + "* `mlflow_resource_arn`: MLFlow app ARN to track the training job (optional)\n", + "* `mlflow_experiment_name`: MLFlow app experiment name(str) (optional)\n", + "* `mlflow_run_name`: MLFlow app run name(str) (optional)\n", + "* `training_dataset`: Training Dataset - either Dataset ARN or S3 Path of the dataset (Please note these are required for a training job to run, can be either provided via Trainer or .train()) (optional)\n", + "* `validation_dataset`: Validation Dataset - either Dataset ARN or S3 Path of the dataset (optional)\n", + "* `s3_output_path`: S3 path for the trained model artifacts (optional)\n", + "* `base_job_name` : Unique job name (optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "062953d8", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "SFT" + }, + "outputs": [], + "source": [ + "from sagemaker.train.sft_trainer import SFTTrainer\n", + "from sagemaker.train.common import TrainingType\n", + "\n", + "trainer = SFTTrainer(\n", + " model=BASE_MODEL,\n", + " training_type=TrainingType.LORA,\n", + " model_package_group=model_package_group,\n", + " training_dataset=TRAINING_DATASET,\n", + " s3_output_path=S3_OUTPUT_PATH,\n", + " sagemaker_session=sagemaker_session,\n", + " role=ROLE_ARN\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "cd93226c", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "" + }, + "source": [ + "### OR" + ] + }, + { + "cell_type": "markdown", + "id": "1b5603ee", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "RLVR" + }, + "source": [ + "#### Create RLVRTrainer (Reinforcement Learning with Verifiable Rewards)\n", + "\n", + "##### Key Parameters:\n", + "* `model`: base_model id on Sagemaker Hubcontent that is available to finetune (or) ModelPackage artifacts\n", + "* `custom_reward_function`: Custom reward function/Evaluator ARN (optional)\n", + "* `model_package_group`: ModelPackage group name or ModelPackageGroup (optional)\n", + "* `mlflow_resource_arn`: MLFlow app ARN to track the training job (optional)\n", + "* `mlflow_experiment_name`: MLFlow app experiment name(str) (optional)\n", + "* `mlflow_run_name`: MLFlow app run name(str) (optional)\n", + "* `training_dataset`: Training Dataset - either Dataset ARN or S3 Path of the dataset (Please note these are required for a training job to run, can be either provided via Trainer or .train()) (optional)\n", + "* `validation_dataset`: Validation Dataset - either Dataset ARN or S3 Path of the dataset (optional)\n", + "* `s3_output_path`: S3 path for the trained model artifacts (optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5aa51a5f", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "RLVR" + }, + "outputs": [], + "source": [ + "from sagemaker.train.rlvr_trainer import RLVRTrainer\n", + "\n", + "trainer = RLVRTrainer(\n", + " model=BASE_MODEL,\n", + " model_package_group=model_package_group,\n", + " training_dataset=TRAINING_DATASET,\n", + " s3_output_path=S3_OUTPUT_PATH,\n", + " sagemaker_session=sagemaker_session,\n", + " role=ROLE_ARN\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a61dbe9f", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "" + }, + "source": [ + "### OR" + ] + }, + { + "cell_type": "markdown", + "id": "88f8bfde", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "DPO" + }, + "source": [ + "#### Create DPO Trainer (Direct Preference Optimization)\n", + "\n", + "Direct Preference Optimization (DPO) is a method for training language models to follow human preferences. Unlike traditional RLHF (Reinforcement Learning from Human Feedback), DPO directly optimizes the model using preference pairs without needing a reward model.\n", + "\n", + "##### Key Parameters:\n", + "- `model` Base model to fine-tune (from SageMaker Hub)\n", + "- `training_type` Fine-tuning method (LoRA recommended for efficiency)\n", + "- `training_dataset` ARN of the registered preference dataset\n", + "- `model_package_group` Where to store the fine-tuned model\n", + "- `mlflow_resource_arn` MLflow tracking server for experiment logging " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b74c57f8", + "metadata": { + "editable": true, + "jumpStartAlterations": [ + "trainerSelection" + ], + "slideshow": { + "slide_type": "" + }, + "tags": [], + "trainer_type": "DPO" + }, + "outputs": [], + "source": [ + "from sagemaker.train.dpo_trainer import DPOTrainer\n", + "from sagemaker.train.common import TrainingType\n", + "\n", + "trainer = DPOTrainer(\n", + " model=BASE_MODEL,\n", + " training_type=TrainingType.LORA,\n", + " model_package_group=model_package_group,\n", + " training_dataset=TRAINING_DATASET,\n", + " s3_output_path=S3_OUTPUT_PATH,\n", + " sagemaker_session=sagemaker_session,\n", + " role=ROLE_ARN\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "514186e9", + "metadata": {}, + "source": [ + "### Step 2: Get Finetuning Options and Modify" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f6eeb5e", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Default Finetuning Options:\")\n", + "pprint(trainer.hyperparameters.to_dict())\n", + "\n", + "# Modify options like object attributes\n", + "trainer.hyperparameters.learning_rate = 0.0002\n", + "\n", + "print(\"\\nModified/User defined Options:\")\n", + "pprint(trainer.hyperparameters.to_dict())" + ] + }, + { + "cell_type": "markdown", + "id": "18f4e5df", + "metadata": {}, + "source": [ + "### Step 3: Start Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31690f41", + "metadata": {}, + "outputs": [], + "source": [ + "training_job = trainer.train(wait=True)\n", + "\n", + "TRAINING_JOB_NAME = training_job.training_job_name\n", + "\n", + "pprint(training_job)" + ] + }, + { + "cell_type": "markdown", + "id": "60b77a45", + "metadata": {}, + "source": [ + "### Step 4: Describe Training job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9277fde0", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.core.resources import TrainingJob\n", + "\n", + "response = TrainingJob.get(training_job_name=TRAINING_JOB_NAME)\n", + "pprint(response)" + ] + }, + { + "cell_type": "markdown", + "id": "evaluation-section", + "metadata": {}, + "source": [ + "# Part 2: Model Evaluation\n", + "\n", + "This section demonstrates the basic user-facing flow for creating and managing evaluation jobs" + ] + }, + { + "cell_type": "markdown", + "id": "cleanup-pipeline", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Step 1: Create BenchmarkEvaluator\n", + "\n", + "Create a BenchmarkEvaluator instance with the desired benchmark. The evaluator will use Jinja2 templates to render a complete pipeline definition.\n", + "\n", + "### Key Parameters:\n", + "- `benchmark`: Benchmark type from the Benchmark enum\n", + "- `model`: Model ARN from SageMaker hub content\n", + "- `s3_output_path`: S3 location for evaluation outputs\n", + "- `mlflow_resource_arn`: MLflow tracking server ARN for experiment tracking (optional)\n", + "- `model_package_group`: Model package group ARN (optional)\n", + "- `source_model_package`: Source model package ARN (optional)\n", + "- `model_artifact`: ARN of model artifact for lineage tracking (auto-inferred from source_model_package) (optional)\n", + "\n", + "**Note:** When you call `evaluate()`, the system will start evaluation job. The evaluator will:\n", + "1. Build template context with all required parameters\n", + "2. Render the pipeline definition from `DETERMINISTIC_TEMPLATE` using Jinja2\n", + "3. Create or update the pipeline with the rendered definition\n", + "4. Start the pipeline execution with empty parameters (all values pre-substituted) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "delete-existing-pipeline", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sagemaker.train.evaluate import BenchMarkEvaluator\n", + "from sagemaker.train.evaluate import get_benchmarks, get_benchmark_properties\n", + "from rich.pretty import pprint\n", + "import logging\n", + "logging.basicConfig(\n", + " level=logging.INFO,\n", + " format='%(levelname)s - %(name)s - %(message)s'\n", + ")\n", + "\n", + "# Get available benchmarks\n", + "Benchmark = get_benchmarks()\n", + "pprint(list(Benchmark))\n", + "\n", + "# Print properties for a specific benchmark\n", + "pprint(get_benchmark_properties(benchmark=Benchmark.GEN_QA))\n", + "\n", + "\n", + "# Create evaluator with GEN_QA benchmark\n", + "evaluator = BenchMarkEvaluator(\n", + " benchmark=Benchmark.GEN_QA,\n", + " model=BASE_MODEL,\n", + " s3_output_path=S3_OUTPUT_PATH,\n", + ")\n", + "\n", + "pprint(evaluator)" + ] + }, + { + "cell_type": "markdown", + "id": "run-evaluation", + "metadata": {}, + "source": [ + "## Step 2: Run Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "start-evaluation", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "# Run evaluation\n", + "execution = evaluator.evaluate()\n", + "\n", + "print(f\"Evaluation job started!\")\n", + "print(f\"Job ARN: {execution.arn}\")\n", + "print(f\"Job Name: {execution.name}\")\n", + "print(f\"Status: {execution.status.overall_status}\")\n", + "\n", + "pprint(execution)" + ] + }, + { + "cell_type": "markdown", + "id": "a3de8255-9f98-444a-99a6-cfe7cc2584af", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Step 3: Monitor Execution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "monitor-evaluation", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution.refresh()\n", + "\n", + "print(f\"Current status: {execution.status}\")\n", + "\n", + "# Display individual step statuses\n", + "if execution.status.step_details:\n", + " print(\"\\nStep Details:\")\n", + " for step in execution.status.step_details:\n", + " print(f\" {step.name}: {step.status}\")" + ] + }, + { + "cell_type": "markdown", + "id": "2ebac85a-adee-4f18-935d-478037c7a1f3", + "metadata": {}, + "source": [ + "## Step 4: Wait for Completion\n", + "\n", + "Wait for the pipeline to complete. This provides rich progress updates in Jupyter notebooks:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74b51cca-2024-4276-b05d-48f52e527c06", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution.wait(target_status=\"Succeeded\", poll=5, timeout=3600)\n", + "\n", + "print(f\"\\nFinal Status: {execution.status.overall_status}\")" + ] + }, + { + "cell_type": "markdown", + "id": "0d153370-213a-41d0-8a95-f4ffccf8f9aa", + "metadata": {}, + "source": [ + "## Step 5: View Results\n", + "\n", + "Display the evaluation results in a formatted table:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f470824-7740-48bb-9282-a7b9d0407fff", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "execution.show_results()" + ] + }, + { + "cell_type": "markdown", + "id": "92bda96d-5be7-408f-9b47-ae46772ac03e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Part 3. Deploying the Model to Bedrock for inference\n", + "\n", + "Trained model artifacts and checkpoints are stored in your designated escrow S3 bucket. You can access the training checkpoint location from the `describe_training_job` response.\n", + "\n", + "By calling `create_custom_model` API, you can create your custom model referencing the model artifacts stored in your S3 escrow bucket." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "784304f4-eb4f-48c8-b572-e5a18c5a9929", + "metadata": {}, + "outputs": [], + "source": [ + "import boto3\n", + "import json\n", + "from urllib.parse import urlparse\n", + "\n", + "bedrock_custom_model_name = \"\" # customize as needed\n", + "\n", + "describe_training_response = sm_client.describe_training_job(TrainingJobName=TRAINING_JOB_NAME)\n", + "\n", + "training_output_s3_uri = describe_training_response['OutputDataConfig']['S3OutputPath']\n", + "\n", + "def get_s3_manifest(training_output_s3_uri):\n", + " try:\n", + " s3_client = boto3.client('s3')\n", + " parsed_uri = urlparse(training_output_s3_uri)\n", + " bucket = parsed_uri.netloc\n", + " key = parsed_uri.path.lstrip('/')\n", + " manifest_key = f\"{key.rstrip('/')}/{TRAINING_JOB_NAME}/output/output/manifest.json\"\n", + "\n", + " print(f\"Fetching manifest from s3://{bucket}/{manifest_key}\")\n", + " response = s3_client.get_object(Bucket=bucket, Key=manifest_key)\n", + "\n", + " manifest_content = response['Body'].read().decode('utf-8')\n", + " manifest = json.loads(manifest_content)\n", + " if 'checkpoint_s3_bucket' not in manifest:\n", + " raise ValueError(\"Checkpoint location not found in manifest\")\n", + " print(f\"Successfully retrieved checkpoint S3 URI: {manifest['checkpoint_s3_bucket']}\")\n", + " return manifest['checkpoint_s3_bucket']\n", + " except s3_client.exceptions.NoSuchKey:\n", + " raise FileNotFoundError(f\"Manifest file not found at s3://{bucket}/{manifest_key}\")\n", + " except json.JSONDecodeError as e:\n", + " raise ValueError(f\"Failed to parse manifest JSON: {str(e)}\")\n", + " except Exception as e:\n", + " raise Exception(f\"Error fetching manifest: {str(e)}\")\n", + "\n", + "s3_checkpoint_path = get_s3_manifest(training_output_s3_uri)\n", + "\n", + "\n", + "bedrock_client = boto3.Session().client(service_name=\"bedrock\", region_name=REGION)\n", + "\n", + "\n", + "s3_checkpoint_path = describe_training_response[\"CheckpointConfig\"][\"S3Uri\"]\n", + "\n", + "try:\n", + " response = bedrock_client.create_custom_model(\n", + " modelName=bedrock_custom_model_name,\n", + " modelSourceConfig={\"s3DataSource\": {\"s3Uri\": s3_checkpoint_path}},\n", + " roleArn=ROLE_ARN,\n", + " # Optionally, add modelTags here\n", + " )\n", + " print(\"Custom model ARN:\", response[\"modelArn\"])\n", + "except Exception as e:\n", + " print(f\"An unexpected error occurred: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "f0dc4211-788c-4e5d-844a-58176ac69cea", + "metadata": {}, + "source": [ + "To monitor the job, use the `get_custom_model` operation to retrieve the job status. Please allow some time for the job to complete as this can take upto 20 minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3795cd13-57fd-44f7-b2e2-9f51f2df74c4", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "while True:\n", + " custom_model_response = bedrock_client.get_custom_model(modelIdentifier=bedrock_custom_model_name)\n", + " model_status = custom_model_response[\"modelStatus\"]\n", + " print(f\"Custom model status: {model_status}\")\n", + " if model_status == \"Active\":\n", + " break\n", + " elif model_status in [\"Failed\"]:\n", + " raise Exception(f\"Custom model creation failed with status: {model_status}\")\n", + " time.sleep(30)\n", + "print(\"Custom model is ACTIVE.\")\n", + "custom_model_response" + ] + }, + { + "cell_type": "markdown", + "id": "c4ecb46f-26ac-463e-b644-c8eb65173ac2", + "metadata": {}, + "source": [ + "After you create a custom model, you can set up inference using one of the following options:\n", + "1. **Purchase Provisioned Throughput** – Purchase Provisioned Throughput for your model to set up dedicated compute capacity with guaranteed throughput for consistent performance and lower latency.\n", + "For more information about Provisioned Throughput, see [Increase model invocation capacity with Provisioned Throughput in Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/prov-throughput.html). For more information about using custom models with Provisioned Throughput, [see Purchase Provisioned Throughput for a custom model](https://docs.aws.amazon.com/bedrock/latest/userguide/custom-model-use-pt.html).\n", + "2. **Deploy custom model for on-demand inference (only LoRA fine-tuned Amazon Nova models)** – To set up on-demand inference, you deploy the custom model with a custom model deployment. After you deploy the model, you invoke it using the ARN for the custom model deployment. With on-demand inference, you only pay for what you use and you don't need to set up provisioned compute resources.\n", + "For more information about deploying custom models for on-demand inference, see [Deploy a custom model for on-demand inference](https://docs.aws.amazon.com/bedrock/latest/userguide/deploy-custom-model-on-demand.html)." + ] + }, + { + "cell_type": "markdown", + "id": "f7b52193-3624-4485-84c7-86b7f5d0e7fb", + "metadata": {}, + "source": [ + "#### Deploy custom model for inference by using Provisioned Throughput" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f0064c4-7302-47de-a125-a0f07d4e13ad", + "metadata": {}, + "outputs": [], + "source": [ + "provisioned_model_name = \"test-provisioned-model\"\n", + "custom_model_id = custom_model_response[\"modelArn\"]\n", + "\n", + "try:\n", + " response = bedrock_client.create_provisioned_model_throughput(\n", + " modelId=custom_model_id, provisionedModelName=provisioned_model_name, modelUnits=1\n", + " )\n", + " provisioned_model_arn = response[\"provisionedModelArn\"]\n", + " print(\"Provisioned model ARN:\", provisioned_model_arn)\n", + "except Exception as e:\n", + " print(f\"An unexpected error occurred: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "ef1671ec-81d7-4fc7-89a5-7ee5a2e8b67b", + "metadata": {}, + "source": [ + "Wait for provisioned model to become ACTIVE" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "efef8325-fade-49ef-a2bd-ec0215a56f25", + "metadata": {}, + "outputs": [], + "source": [ + "while True:\n", + " response = bedrock_client.get_provisioned_model_throughput(\n", + " provisionedModelId=provisioned_model_arn\n", + " )\n", + " model_status = response[\"status\"]\n", + " print(f\"Provisioned model status: {model_status}\")\n", + " if model_status == \"InService\":\n", + " break\n", + " elif model_status in [\"Failed\"]:\n", + " raise Exception(f\"Provisioned model failed with status: {model_status}\")\n", + " time.sleep(30)\n", + "print(\"Provisioned model is in service.\")\n", + "response" + ] + }, + { + "cell_type": "markdown", + "id": "68a1443e-ef01-4ee1-9f8d-10e9ec3a55a3", + "metadata": {}, + "source": [ + "Finally, you can invoke the model like any other Bedrock-hosted model using the invoke-model API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80f8ecee-9a5f-46a5-97dc-72a2e6a7c693", + "metadata": {}, + "outputs": [], + "source": [ + "# Invoke model (Inference)\n", + "bedrock_runtime = boto3.client(\"bedrock-runtime\", region_name=REGION)\n", + "\n", + "request_body = {\n", + " \"inferenceConfig\": {\"max_new_tokens\": 1000, \"temperature\": 0.7, \"top_p\": 0.9},\n", + " \"messages\": [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\"text\": \"Tell me about Amazon Bedrock in less than 100 words.\"}\n", + " ],\n", + " }\n", + " ],\n", + "}\n", + "\n", + "response = bedrock_runtime.invoke_model(\n", + " modelId=provisioned_model_arn,\n", + " body=json.dumps(request_body),\n", + " contentType=\"application/json\",\n", + " accept=\"application/json\",\n", + ")\n", + "\n", + "response_body = json.loads(response[\"body\"].read())\n", + "print(response_body[\"output\"][\"message\"][\"content\"][0][\"text\"])" + ] + }, + { + "cell_type": "markdown", + "id": "b51edee9-6663-4863-a5f2-c72e9cfe7e9e", + "metadata": {}, + "source": [ + "#### Deploy custom model for On-Demand Inference\n", + "**Important Note:** On-demand inference is currently supported only for LoRA-based fine-tuned models.\n", + "\n", + "Once the custom model has reached Active Status, deploy it for on-demand inference by creating custom model deployment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d8beda2-c96d-40e5-ac86-ff2a58eadd40", + "metadata": {}, + "outputs": [], + "source": [ + "model_deployment_name = \"\"\n", + "custom_model_arn=custom_model_response[\"modelArn\"]\n", + "try:\n", + " response = bedrock_client.create_custom_model_deployment(\n", + " modelDeploymentName=model_deployment_name,\n", + " modelArn=custom_model_arn,\n", + " description=\"\",\n", + " tags=[\n", + " {\n", + " \"key\":\"\",\n", + " \"value\":\"\"\n", + " }\n", + " ]\n", + " )\n", + " custom_model_deployment_arn = response[\"customModelDeploymentArn\"]\n", + " print(\"Custom model deployment ARN:\", custom_model_deployment_arn)\n", + "except Exception as e:\n", + " print(f\"An unexpected error occurred: {e}\")\n", + "response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "863102f7-4e5d-4f64-945d-df5be269b307", + "metadata": {}, + "outputs": [], + "source": [ + "while True:\n", + " response = bedrock_client.get_custom_model_deployment(customModelDeploymentIdentifier=custom_model_deployment_arn)\n", + " model_status = response[\"status\"]\n", + " print(f\"Custom model deployment status: {model_status}\")\n", + " if model_status == \"Active\":\n", + " break\n", + " elif model_status in [\"Failed\"]:\n", + " raise Exception(f\"Custom model deployment failed with status: {model_status}\")\n", + " time.sleep(30)\n", + "print(\"Custom model is ACTIVE.\")\n", + "response" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79b99c7a-aa7f-409a-a988-9ed618e856e9", + "metadata": {}, + "outputs": [], + "source": [ + "bedrock_runtime = boto3.client(\"bedrock-runtime\", region_name=REGION)\n", + "\n", + "# invoke a deployed custom model using Converse API\n", + "response = bedrock_runtime.converse(\n", + " modelId=custom_model_deployment_arn,\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": [\n", + " {\n", + " \"text\": \"Tell me about Amazon Bedrock in less than 100 words.\",\n", + " }\n", + " ]\n", + " }\n", + " ]\n", + " )\n", + "result = response.get('output')\n", + "print(result)\n", + "\n", + "# invoke a deployed custom model using InvokeModel API\n", + "request_body = {\n", + " \"schemaVersion\": \"messages-v1\",\n", + " \"messages\": [{\"role\": \"user\", \n", + " \"content\": [{\"text\": \"Tell me about Amazon Bedrock in less than 100 words.\"}]}],\n", + " \"system\": [{\"text\": \"What is amazon bedrock?\"}],\n", + " \"inferenceConfig\": {\"maxTokens\": 500, \n", + " \"topP\": 0.9, \n", + " \"temperature\": 0.0\n", + " }\n", + "}\n", + "body = json.dumps(request_body)\n", + "response = bedrock_runtime.invoke_model(\n", + " modelId=custom_model_deployment_arn,\n", + " body=body\n", + " )\n", + "\n", + "# Extract and print the response text\n", + "model_response = json.loads(response[\"body\"].read())\n", + "response_text = model_response[\"output\"][\"message\"][\"content\"][0][\"text\"]\n", + "print(response_text)" + ] + }, + { + "cell_type": "markdown", + "id": "1b80972e-7f59-4357-9b23-74c1d3877342", + "metadata": {}, + "source": [ + "### Cleanup\n", + "Delete the resources that were created to stop incurring charges." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f83840f7-1279-4192-a13f-a05bef8fb3e4", + "metadata": {}, + "outputs": [], + "source": [ + "# Delete provisioned model throughput\n", + "print(f\"Deleting provisioned model throughput: {provisioned_model_arn}\")\n", + "try:\n", + " bedrock_client.delete_provisioned_model_throughput(\n", + " provisionedModelId=provisioned_model_name\n", + " )\n", + " print(\"Provisioned model throughput deleted successfully.\")\n", + "except Exception as e:\n", + " print(f\"Error deleting provisioned throughput: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41188290-dc41-4231-95f7-d371aa77fb1c", + "metadata": {}, + "outputs": [], + "source": [ + "# Delete custom model deployment if you have used on-demand inference.\n", + "print(f\"Deleting custom model deployment: {custom_model_deployment_arn}\")\n", + "try:\n", + " bedrock_client.delete_custom_model_deployment(\n", + " customModelDeploymentIdentifier=custom_model_deployment_arn\n", + " )\n", + " print(\"Custom model deployment deleted successfully.\")\n", + "except Exception as e:\n", + " print(f\"Error deleting custom model deployment: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff1805d7-14b6-4d6b-a331-1924fbae346b", + "metadata": {}, + "outputs": [], + "source": [ + "# Delete custom model\n", + "print(f\"Deleting custom model: {custom_model_id}\")\n", + "try:\n", + " bedrock_client.delete_custom_model(modelIdentifier=custom_model_id)\n", + " print(\"Custom model deleted successfully.\")\n", + "except Exception as e:\n", + " print(f\"Error deleting custom model: {e}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}