diff --git a/.github/workflows/e2e-nvidia-l4-x1.yml b/.github/workflows/e2e-nvidia-l4-x1.yml
index 5ff9de55..ef4d9afb 100644
--- a/.github/workflows/e2e-nvidia-l4-x1.yml
+++ b/.github/workflows/e2e-nvidia-l4-x1.yml
@@ -15,10 +15,11 @@ on:
       - release-*
     paths:
       # note this should match the merging criteria in 'mergify.yml'
-      - '**.py'
-      - 'pyproject.toml'
-      - 'requirements**.txt'
-      - '.github/workflows/e2e-nvidia-l4-x1.yml' # This workflow
+      - "**.py"
+      - "pyproject.toml"
+      - "requirements**.txt"
+      - ".github/workflows/e2e-nvidia-l4-x1.yml" # This workflow
+      - "!tests/**" # we don't need to run e2e if we're just changing the tests.
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@@ -72,7 +73,7 @@ jobs:
               {"Key": "GitHubRef", "Value": "${{ github.ref }}"},
               {"Key": "GitHubPR", "Value": "${{ github.event.number }}"}
             ]
-  
+
   e2e-medium-test:
     needs:
       - start-medium-ec2-runner
@@ -153,7 +154,7 @@ jobs:
           . venv/bin/activate
           # set preserve to true so we can retain the logs
           ./scripts/e2e-ci.sh -mp
-          
+
           # HACK(osilkin): The above test runs the medium workflow test which does not actually test the training library.
           #                Therefore we must disable the upload of the training logs, as they will not exist in the same location.
           # we know that the file will be named something like f"/training_params_and_metrics_global{os.environ['RANK']}.jsonl" in python
@@ -200,7 +201,7 @@ jobs:
           github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
           label: ${{ needs.start-medium-ec2-runner.outputs.label }}
           ec2-instance-id: ${{ needs.start-medium-ec2-runner.outputs.ec2-instance-id }}
-      
+
       # - name: Download loss data
       #   id: download-logs
       #   uses: actions/download-artifact@v4
@@ -211,12 +212,12 @@ jobs:
       # - name: Install dependencies
       #   run: |
       #     pip install -r requirements-dev.txt
-      
+
       # - name: Try to upload to s3
       #   id: upload-s3
       #   continue-on-error: true
       #   run: |
-      #     output_file='./test.md' 
+      #     output_file='./test.md'
       #     python scripts/create-loss-graph.py  \
       #       --log-file "${{ steps.download-logs.outputs.download-path }}/training-log.jsonl" \
       #       --output-file "${output_file}" \
diff --git a/.github/workflows/smoke.yaml b/.github/workflows/smoke.yaml
new file mode 100644
index 00000000..a8d39210
--- /dev/null
+++ b/.github/workflows/smoke.yaml
@@ -0,0 +1,146 @@
+# SPDX-License-Identifier: Apache-2.0
+
+name: "Run smoke tests via Tox::pytest"
+# These tests will be long running and require accelerated hardware.
+
+on:
+  workflow_dispatch:
+    inputs:
+      branch:
+        type: string
+        default: main
+  # using this rather than pull_request because this workflow
+  # needs to run in the context of the base branch (main) and
+  # access the repo's secrets to start the AWS instances.
+  pull_request_target:
+    branches:
+      - main
+      - release-*
+
+permissions:
+  contents: read
+
+defaults:
+  run:
+    shell: bash
+
+env:
+  ec2_runner_variant: "g6e.12xlarge" # 4x L40s
+
+jobs:
+  start-ec2-runner:
+    runs-on: ubuntu-latest
+    outputs:
+      label: ${{ steps.start-ec2-runner.outputs.label }}
+      ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id}}
+
+    steps:
+      - name: "Harden runner"
+        uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.1
+        with:
+          egress-policy: audit
+
+      - name: "Configure AWS credentials"
+        uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.AWS_REGION }}
+
+      - name: "Start EC2 runner"
+        id: start-ec2-runner
+        uses: machulav/ec2-github-runner@28fbe1c4d7d9ba74134ca5ebc559d5b0a989a856 # v2.3.8
+        with:
+          mode: start
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+          ec2-image-id: ${{ vars.AWS_EC2_AMI }}
+          ec2-instance-type: ${{ env.ec2_runner_variant }}
+          subnet-id: subnet-024298cefa3bedd61
+          security-group-id: sg-06300447c4a5fbef3
+          iam-role-name: instructlab-ci-runner
+          aws-resource-tags: >
+            [
+            {"Key": "Name", "Value": "instructlab-ci-github-smoketest-runner"},
+            {"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
+            {"Key": "GitHubRef", "Value": "${{ github.ref }}"},
+            ]
+
+  run-smoke-tests:
+    needs:
+      - start-ec2-runner
+    runs-on: ${{needs.start-ec2-runner.outputs.label}}
+    # It is important that this job has no write permissions and has
+    # no access to any secrets. This part is where we are running
+    # untrusted code from PRs.
+    permissions: {}
+    steps:
+      - name: "Harden runner"
+        uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.1
+        with:
+          egress-policy: audit
+
+      - name: "Install packages"
+        run: |
+          cat /etc/os-release
+          sudo dnf install -y gcc gcc-c++ make git-core python3.11 python3.11-devel
+
+      - name: "Verify cuda environment is setup"
+        run: |
+          export CUDA_HOME="/usr/local/cuda"
+          export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64"
+          export PATH="${PATH}:${CUDA_HOME}/bin"
+          nvidia-smi
+
+      - name: "Checkout code"
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+          ref: ${{inputs.branch}}
+
+      # installs in $GITHUB_WORKSPACE/venv.
+      # only has to install Tox because Tox will do the other virtual environment management.
+      - name: "Setup Python virtual environment"
+        run: |
+          python3.11 -m venv --upgrade-deps venv
+          . venv/bin/activate
+          pip install tox
+
+      - name: "Show disk utilization BEFORE tests"
+        run: |
+          df -h
+
+      - name: "Run smoke tests with Tox and Pytest"
+        run: |
+          source venv/bin/activate
+          tox -e py3-smoke
+
+      - name: "Show disk utilization AFTER tests"
+        run: |
+          df -h
+
+  stop-ec2-runner:
+    needs:
+      - start-ec2-runner
+      - run-smoke-tests
+    runs-on: ubuntu-latest
+    if: ${{ always() }}
+    steps:
+      - name: "Harden runner"
+        uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.1
+        with:
+          egress-policy: audit
+
+      - name: "Configure AWS credentials"
+        uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.AWS_REGION }}
+
+      - name: "Stop EC2 runner"
+        uses: machulav/ec2-github-runner@28fbe1c4d7d9ba74134ca5ebc559d5b0a989a856 # v2.3.8
+        with:
+          mode: stop
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+          label: ${{ needs.start-ec2-runner.outputs.label }}
+          ec2-instance-id: ${{ needs.start-ec2-runner.outputs.ec2-instance-id }}
diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit.yaml
similarity index 100%
rename from .github/workflows/unit-tests.yaml
rename to .github/workflows/unit.yaml
diff --git a/pyproject.toml b/pyproject.toml
index ca053385..f56cfc64 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -107,3 +107,8 @@ exclude = [
 ]
 # honor excludes by not following there through imports
 follow_imports = "silent"
+
+[tool.pytest.ini_options]
+markers = [
+  "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index f77c807f..fcb76fbb 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -13,3 +13,4 @@ ipython
 ipykernel
 jupyter
 
+huggingface_hub
diff --git a/tests/smoke/test_train.py b/tests/smoke/test_train.py
new file mode 100644
index 00000000..d7e81cc9
--- /dev/null
+++ b/tests/smoke/test_train.py
@@ -0,0 +1,245 @@
+# Standard
+from typing import Generator
+import os
+import pathlib
+import shutil
+import sys
+import tempfile
+
+# Third Party
+from transformers import AutoModelForCausalLM
+import huggingface_hub
+import pytest
+
+# First Party
+from instructlab.training import data_process
+from instructlab.training.config import (
+    DataProcessArgs,
+    DistributedBackend,
+    TorchrunArgs,
+    TrainingArgs,
+)
+from instructlab.training.main_ds import run_training
+
+MINIMAL_TRAINING_ARGS = {
+    "max_seq_len": 140,  # this config fits nicely on 4xL40s and may need modification for other setups
+    "max_batch_len": 15000,
+    "num_epochs": 1,
+    "effective_batch_size": 3840,
+    "save_samples": 0,
+    "learning_rate": 1e-4,
+    "warmup_steps": 1,
+    "random_seed": 43,
+    "use_dolomite": False,
+    "is_padding_free": False,
+    "checkpoint_at_epoch": True,
+    "accelerate_full_state_at_epoch": True,
+    "process_data": False,  # expect that incoming data has already been prepared and cached.
+    "disable_flash_attn": False,
+}
+
+DEFAULT_TORCHRUN_ARGS = {
+    "nproc_per_node": 4,  # TODO: this is runner-specific. Should parametrize from environment.
+    "nnodes": 1,
+    "node_rank": 0,
+    "rdzv_id": 123,
+    "rdzv_endpoint": "127.0.0.1:12345",
+}
+
+REFERENCE_TEST_MODEL = "instructlab/granite-7b-lab"
+RUNNER_CPUS_EXPECTED = 4
+
+
+@pytest.fixture(scope="module")
+def custom_tmp_dir() -> Generator[pathlib.Path, None, None]:
+    """A custom fixture for a temporary directory.
+    By default, `tmp_dir` builtin fixture is function-scoped
+    but we can reuse the same cached storage between many tests.
+
+    Yields:
+        Generator[pathlib.Path, None, None]: path to root directory of temp storage.
+    """
+    temp_dir = tempfile.mkdtemp()
+
+    temp_path = pathlib.Path(temp_dir)
+
+    yield temp_path
+
+    shutil.rmtree(temp_path)
+
+
+@pytest.fixture(scope="function")
+def checkpoint_dir(
+    custom_tmp_dir: pathlib.Path,
+) -> Generator[pathlib.Path, None, None]:
+    """
+    Creates a 'checkpoints' directory.
+    This directory must be function-scoped because each test
+    will create its own checkpoints.
+    """
+    ckpt_dir = custom_tmp_dir / "checkpoints"
+    ckpt_dir.mkdir()
+
+    yield ckpt_dir
+
+    shutil.rmtree(ckpt_dir)
+
+
+@pytest.fixture(scope="module")
+def prepared_data_dir(custom_tmp_dir: pathlib.Path) -> pathlib.Path:
+    """Sets up module-scoped temporary dir for storage of preprocessed data.
+
+    Args:
+        custom_tmp_dir (pathlib.Path): root dir of temporary storage
+
+    Returns:
+        pathlib.Path: path to directory where preprocessed data can be cached
+    """
+    data_file_dir = custom_tmp_dir / "prepared_data"
+    data_file_dir.mkdir()
+
+    return data_file_dir
+
+
+@pytest.fixture(scope="module")
+def cached_model_dir(custom_tmp_dir: pathlib.Path) -> pathlib.Path:
+    """Sets up module-scoped temporary dir for storage of model checkpoint
+
+    Args:
+        custom_tmp_dir (pathlib.Path): root dir of temporary storage
+
+    Returns:
+        pathlib.Path: path to directory where model checkpoint can be cached
+    """
+    model_dir = custom_tmp_dir / "model"
+    model_dir.mkdir()
+    return model_dir
+
+
+@pytest.fixture(scope="module")
+def cached_test_model(cached_model_dir: pathlib.Path) -> pathlib.Path:
+    """
+    Downloads test model artifacts to temporary cache from HF repo.
+    Assumes that the artifacts for the tokenizer are in the same repo.
+
+    Some interesting behavior:
+    (1) if model is already cached in $HF_HOME/hub/<model> the parameter blobs
+        will be copied into the specified `local_dir`. If some remote
+        files (like paper.pdf or tokenizer.config) aren't in the HF_HOME
+        cache, they'll be pulled and stored in the `local_dir` cache.
+    (2) if model is NOT already cached in $HF_HOME/hub/<model>, a reference will
+        still be created to it but the downloaded artifacts will not be copied
+        back to the HF_HOME cache from the `local_dir`.
+    """
+
+    huggingface_hub.snapshot_download(
+        repo_id=REFERENCE_TEST_MODEL,
+        local_dir=cached_model_dir,
+    )
+
+    return cached_model_dir
+
+
+def this_file_path() -> pathlib.Path:
+    """returns the fully qualified path to this file."""
+    return pathlib.Path(__file__).resolve()
+
+
+def repo_root_dir() -> pathlib.Path:
+    """returns the fully qualified path to the root of the repo."""
+    current_file_path = this_file_path()
+    return current_file_path.parents[2]
+
+
+def data_in_repo_path() -> pathlib.Path:
+    """The data that we'll use in these tests is stored in the repo as an artifact.
+    This returns a path to the `data.jsonl` file based on this file's location
+    in the repo.
+
+    Returns:
+        pathlib.Path: Path to a `.jsonl` file for tests
+    """
+    repo_root = repo_root_dir()
+    data_in_repo_path = repo_root / "sample-data" / "train_all_pruned_SDG.jsonl"
+    return data_in_repo_path
+
+
+def chat_template_in_repo_path() -> pathlib.Path:
+    """The chat template that we'll use in these tests is stored in the repo as an artifact.
+    This returns a path to the `chattemplate.py` file based on this file's location
+    in the repo.
+
+    Returns:
+        pathlib.Path: Path to a `chat_template.py" file for tests
+    """
+    repo_root = repo_root_dir()
+    chat_template_path = (
+        repo_root
+        / "src"
+        / "instructlab"
+        / "training"
+        / "chat_templates"
+        / "ibm_generic_tmpl.py"
+    )
+    return chat_template_path
+
+
+# TODO: This uses our data preprocessing utility which is not, itself, well tested.
+# need to write tests for this as well.
+@pytest.fixture(scope="module")
+def cached_training_data(
+    prepared_data_dir: pathlib.Path, cached_test_model: pathlib.Path
+) -> pathlib.Path:
+    """Renders test data in model template, tokenizes, and saves to fs"""
+
+    data_in_repo = data_in_repo_path()
+    chat_template = chat_template_in_repo_path()
+
+    data_process_args = DataProcessArgs(
+        data_output_path=str(prepared_data_dir),
+        data_path=str(data_in_repo),
+        max_seq_len=MINIMAL_TRAINING_ARGS["max_seq_len"],
+        model_path=str(cached_test_model),
+        chat_tmpl_path=str(chat_template),
+        num_cpu_procs=RUNNER_CPUS_EXPECTED,
+    )
+
+    data_process.main(data_process_args)
+
+    return prepared_data_dir / "data.jsonl"
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize(
+    "dist_backend", [DistributedBackend.FSDP, DistributedBackend.DEEPSPEED]
+)
+@pytest.mark.parametrize("cpu_offload", [True, False])
+def test_training_feature_matrix(
+    cached_test_model: pathlib.Path,
+    cached_training_data: pathlib.Path,
+    checkpoint_dir: pathlib.Path,
+    prepared_data_dir: pathlib.Path,
+    cpu_offload: bool,
+    dist_backend: DistributedBackend,
+) -> None:
+    train_args = TrainingArgs(
+        model_path=str(cached_test_model),
+        data_path=str(cached_training_data),
+        data_output_dir=str(prepared_data_dir),
+        ckpt_output_dir=str(checkpoint_dir),
+        **MINIMAL_TRAINING_ARGS,
+    )
+
+    train_args.distributed_backend = dist_backend
+
+    if dist_backend == DistributedBackend.FSDP:
+        train_args.fsdp_options.cpu_offload_params = cpu_offload
+    else:
+        pytest.xfail("DeepSpeed not currently functional. OOMs during backprop.")
+        if cpu_offload:
+            pytest.xfail("DeepSpeed CPU Adam isn't currently building correctly")
+        train_args.deepspeed_options.cpu_offload_optimizer = cpu_offload
+
+    torch_args = TorchrunArgs(**DEFAULT_TORCHRUN_ARGS)
+
+    run_training(torch_args=torch_args, train_args=train_args)
diff --git a/tests/test_init.py b/tests/unit/test_init.py
similarity index 77%
rename from tests/test_init.py
rename to tests/unit/test_init.py
index b361b9ea..3212c37e 100644
--- a/tests/test_init.py
+++ b/tests/unit/test_init.py
@@ -2,6 +2,5 @@
 import pytest
 
 
-@pytest.mark.fast
 def test_fake():
     assert True
diff --git a/tox.ini b/tox.ini
index 86dca1ce..97b98be0 100644
--- a/tox.ini
+++ b/tox.ini
@@ -19,12 +19,31 @@ basepython = python3.11
 
 [testenv:py3-unit]
 description = run unit tests with pytest
-commands = {envpython} -m pytest tests {posargs}
+passenv =
+	HF_HOME
+deps = 
+    pytest
+    pytest-asyncio
+    pytest-cov
+    pytest-html
+    -r requirements-dev.txt
+commands = {envpython} -m pytest tests/test_unit {posargs}
 # NOTE: {posargs} is a placeholder for input positional arguments
 # such as `tox -e py3-unit -- --pdb` if we wanted to run pytest with pdb enabled.
 # `--` delimits flags that are meant for tox vs. those that are positional arguments for
 # the command that's being run in the environment.
 
+[testenv:py3-smoke]
+description = run accelerated smoke tests with pytest
+passenv =
+	HF_HOME
+deps = 
+    pytest
+    pytest-asyncio
+    -r requirements-dev.txt
+    -r requirements-cuda.txt
+commands = {envpython} -m pytest tests/test_smoke {posargs}
+
 # format, check, and linting targets don't build and install the project to
 # speed up testing.
 [testenv:lint]