From 403bdafa99c0d74a9433c83d02ebd0265c2594bd Mon Sep 17 00:00:00 2001
From: Nathan Park <parknate@amazon.com>
Date: Thu, 1 May 2025 15:49:15 -0700
Subject: [PATCH 1/5] Improve error logging and documentation for issue 4007

---
 doc/frameworks/pytorch/using_pytorch.rst | 46 ++++++++++++++++++++++++
 src/sagemaker/utils.py                   | 35 ++++++++++++------
 2 files changed, 71 insertions(+), 10 deletions(-)

diff --git a/doc/frameworks/pytorch/using_pytorch.rst b/doc/frameworks/pytorch/using_pytorch.rst
index 4141dd84db..4c5d7c9cae 100644
--- a/doc/frameworks/pytorch/using_pytorch.rst
+++ b/doc/frameworks/pytorch/using_pytorch.rst
@@ -1048,6 +1048,43 @@ see `For versions 1.1 and lower <#for-versions-1.1-and-lower>`_.
 
 Where ``requirements.txt`` is an optional file that specifies dependencies on third-party libraries.
 
+Important Packaging Instructions
+------------------------------
+
+When creating your model artifact (``model.tar.gz``), follow these steps to avoid common deployment issues:
+
+1. Navigate to the directory containing your model files:
+
+   .. code:: bash
+
+       cd my_model
+
+2. Create the tar archive from within this directory:
+
+   .. code:: bash
+
+       tar czvf ../model.tar.gz *
+
+**Common Mistakes to Avoid:**
+
+* Do NOT create the archive from the parent directory using ``tar czvf model.tar.gz my_model/``. 
+  This creates an extra directory level that will cause deployment errors.
+* Ensure ``inference.py`` is directly under the ``code/`` directory in your archive.
+* Verify your archive structure using:
+
+  .. code:: bash
+
+      tar tvf model.tar.gz
+
+  You should see output similar to:
+
+  ::
+
+      model.pth
+      code/
+      code/inference.py
+      code/requirements.txt
+
 Create a ``PyTorchModel`` object
 --------------------------------
 
@@ -1066,6 +1103,15 @@ Now call the :class:`sagemaker.pytorch.model.PyTorchModel` constructor to create
 
 Now you can call the ``predict()`` method to get predictions from your deployed model.
 
+Troubleshooting
+--------------
+
+If you encounter a ``FileNotFoundError`` for ``inference.py``, check:
+
+1. That your model artifact is packaged correctly following the instructions above
+2. The structure of your ``model.tar.gz`` file matches the expected layout
+3. You're creating the archive from within the model directory, not from its parent
+
 ***********************************************
 Attach an estimator to an existing training job
 ***********************************************
diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py
index 1a75a3a5cc..617f8459d7 100644
--- a/src/sagemaker/utils.py
+++ b/src/sagemaker/utils.py
@@ -13,10 +13,12 @@
 """Placeholder docstring"""
 from __future__ import absolute_import
 
+import abc
 import contextlib
 import copy
 import errno
 import inspect
+import json
 import logging
 import os
 import random
@@ -25,31 +27,30 @@
 import tarfile
 import tempfile
 import time
-from functools import lru_cache
-from typing import Union, Any, List, Optional, Dict
-import json
-import abc
 import uuid
 from datetime import datetime
-from os.path import abspath, realpath, dirname, normpath, join as joinpath
-
+from functools import lru_cache
 from importlib import import_module
+from os.path import abspath, dirname
+from os.path import join as joinpath
+from os.path import normpath, realpath
+from typing import Any, Dict, List, Optional, Union
 
 import boto3
 import botocore
 from botocore.utils import merge_dicts
-from six.moves.urllib import parse
 from six import viewitems
+from six.moves.urllib import parse
 
 from sagemaker import deprecations
 from sagemaker.config import validate_sagemaker_config
 from sagemaker.config.config_utils import (
-    _log_sagemaker_config_single_substitution,
     _log_sagemaker_config_merge,
+    _log_sagemaker_config_single_substitution,
 )
 from sagemaker.enums import RoutingStrategy
 from sagemaker.session_settings import SessionSettings
-from sagemaker.workflow import is_pipeline_variable, is_pipeline_parameter_string
+from sagemaker.workflow import is_pipeline_parameter_string, is_pipeline_variable
 from sagemaker.workflow.entities import PipelineVariable
 
 ALTERNATE_DOMAINS = {
@@ -624,7 +625,21 @@ def _create_or_update_code_dir(
             if os.path.exists(os.path.join(code_dir, inference_script)):
                 pass
             else:
-                raise
+                raise FileNotFoundError(
+                    f"Could not find '{inference_script}'. Common solutions:\n"
+                    "1. Make sure inference.py exists in the code/ directory\n"
+                    "2. Package your model correctly:\n"
+                    "   - ✅ DO: Navigate to the directory containing model files and run:\n"
+                    "     cd /path/to/model_files\n"
+                    "     tar czvf ../model.tar.gz *\n"
+                    "   - ❌ DON'T: Create from parent directory:\n"
+                    "     tar czvf model.tar.gz model/\n"
+                    "\nExpected structure in model.tar.gz:\n"
+                    "   ├── model.pth (or your model file)\n"
+                    "   └── code/\n"
+                    "       ├── inference.py\n"
+                    "       └── requirements.txt"
+                )
 
     for dependency in dependencies:
         lib_dir = os.path.join(code_dir, "lib")

From a25ce1fcfd77888d28c1322efab49823177fab67 Mon Sep 17 00:00:00 2001
From: Nathan Park <parknate@amazon.com>
Date: Fri, 2 May 2025 11:15:00 -0700
Subject: [PATCH 2/5] Fix a whitespace

---
 doc/frameworks/pytorch/using_pytorch.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/frameworks/pytorch/using_pytorch.rst b/doc/frameworks/pytorch/using_pytorch.rst
index 4c5d7c9cae..9bd48ef984 100644
--- a/doc/frameworks/pytorch/using_pytorch.rst
+++ b/doc/frameworks/pytorch/using_pytorch.rst
@@ -1049,7 +1049,7 @@ see `For versions 1.1 and lower <#for-versions-1.1-and-lower>`_.
 Where ``requirements.txt`` is an optional file that specifies dependencies on third-party libraries.
 
 Important Packaging Instructions
-------------------------------
+--------------------------------
 
 When creating your model artifact (``model.tar.gz``), follow these steps to avoid common deployment issues:
 
@@ -1067,7 +1067,7 @@ When creating your model artifact (``model.tar.gz``), follow these steps to avoi
 
 **Common Mistakes to Avoid:**
 
-* Do NOT create the archive from the parent directory using ``tar czvf model.tar.gz my_model/``. 
+* Do NOT create the archive from the parent directory using ``tar czvf model.tar.gz my_model/``.
   This creates an extra directory level that will cause deployment errors.
 * Ensure ``inference.py`` is directly under the ``code/`` directory in your archive.
 * Verify your archive structure using:
@@ -1104,7 +1104,7 @@ Now call the :class:`sagemaker.pytorch.model.PyTorchModel` constructor to create
 Now you can call the ``predict()`` method to get predictions from your deployed model.
 
 Troubleshooting
---------------
+---------------
 
 If you encounter a ``FileNotFoundError`` for ``inference.py``, check:
 

From fd20c3f241a4140cf596d77756a4db9263c4df0d Mon Sep 17 00:00:00 2001
From: Nathan Park <parknate@amazon.com>
Date: Sun, 4 May 2025 20:16:47 -0700
Subject: [PATCH 3/5] Add hyperlink to RTDs

---
 src/sagemaker/utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py
index 617f8459d7..c3896e32a3 100644
--- a/src/sagemaker/utils.py
+++ b/src/sagemaker/utils.py
@@ -639,6 +639,8 @@ def _create_or_update_code_dir(
                     "   └── code/\n"
                     "       ├── inference.py\n"
                     "       └── requirements.txt"
+                    "\nFor more details, see the documentation:\n"
+                    "https://sagemaker.readthedocs.io/en/stable/frameworks/pytorch/using_pytorch.html#bring-your-own-model"
                 )
 
     for dependency in dependencies:

From 72aacf8329265349afce6516fcad520fbd353a5a Mon Sep 17 00:00:00 2001
From: Nathan Park <parknate@amazon.com>
Date: Sun, 4 May 2025 20:27:45 -0700
Subject: [PATCH 4/5] Condense line to < 120

---
 src/sagemaker/utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py
index c3896e32a3..18e3ae75a0 100644
--- a/src/sagemaker/utils.py
+++ b/src/sagemaker/utils.py
@@ -625,6 +625,10 @@ def _create_or_update_code_dir(
             if os.path.exists(os.path.join(code_dir, inference_script)):
                 pass
             else:
+                docs_url = (
+                    "https://sagemaker.readthedocs.io/en/stable/"
+                    "frameworks/pytorch/using_pytorch.html#bring-your-own-model"
+                )
                 raise FileNotFoundError(
                     f"Could not find '{inference_script}'. Common solutions:\n"
                     "1. Make sure inference.py exists in the code/ directory\n"
@@ -640,7 +644,7 @@ def _create_or_update_code_dir(
                     "       ├── inference.py\n"
                     "       └── requirements.txt"
                     "\nFor more details, see the documentation:\n"
-                    "https://sagemaker.readthedocs.io/en/stable/frameworks/pytorch/using_pytorch.html#bring-your-own-model"
+                    f"{docs_url}"
                 )
 
     for dependency in dependencies:

From 4f9c9a5e40c3ee77b94b1e3dfc2df2f37aaa3b32 Mon Sep 17 00:00:00 2001
From: Nathan Park <parknate@amazon.com>
Date: Mon, 5 May 2025 08:32:00 -0700
Subject: [PATCH 5/5] Better doc link

---
 src/sagemaker/utils.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py
index 18e3ae75a0..d4faa5ad9f 100644
--- a/src/sagemaker/utils.py
+++ b/src/sagemaker/utils.py
@@ -625,10 +625,6 @@ def _create_or_update_code_dir(
             if os.path.exists(os.path.join(code_dir, inference_script)):
                 pass
             else:
-                docs_url = (
-                    "https://sagemaker.readthedocs.io/en/stable/"
-                    "frameworks/pytorch/using_pytorch.html#bring-your-own-model"
-                )
                 raise FileNotFoundError(
                     f"Could not find '{inference_script}'. Common solutions:\n"
                     "1. Make sure inference.py exists in the code/ directory\n"
@@ -642,9 +638,10 @@ def _create_or_update_code_dir(
                     "   ├── model.pth (or your model file)\n"
                     "   └── code/\n"
                     "       ├── inference.py\n"
-                    "       └── requirements.txt"
+                    "       └── requirements.txt\n"
                     "\nFor more details, see the documentation:\n"
-                    f"{docs_url}"
+                    + "https://sagemaker.readthedocs.io/en/stable/"
+                    + "frameworks/pytorch/using_pytorch.html#bring-your-own-model"
                 )
 
     for dependency in dependencies: