diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl
index 4f7f05b1..848bd41a 100644
--- a/Dockerfile.tmpl
+++ b/Dockerfile.tmpl
@@ -1,7 +1,52 @@
 ARG BASE_IMAGE \
     BASE_IMAGE_TAG
 
-FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG}
+# =============================================================================
+# Stage 1: Apply --force-reinstall operations to base image
+# These replace packages from the base, causing layer bloat. We squash this
+# stage to eliminate the duplicate package data.
+# =============================================================================
+FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base-reinstalls
+
+# Combine all --force-reinstall operations into one layer before squashing.
+# b/408281617: Torch is adamant that it can not install cudnn 9.3.x, only 9.1.x, but Tensorflow can only support 9.3.x.
+# This conflict causes a number of package downgrades, which are handled in this command.
+# b/394382016: sigstore (dependency of kagglehub) requires a prerelease packages, installing separate.
+# b/385145217: Intel MKL numpy removed - Intel's channel only has numpy 1.26.4, but base image has
+#              numpy 2.0.2. Downgrading would break packages built against numpy 2.x ABI.
+# b/404590350: Ray and torchtune have conflicting tune cli, we will prioritize torchtune.
+# b/415358158: Gensim removed from Colab image to upgrade scipy
+# b/456239669: remove huggingface-hub pin when pytorch-lighting and transformer are compatible
+# b/315753846: Unpin translate package, currently conflicts with adk 1.17.0
+# b/468379293: Unpin Pandas once cuml/cudf are compatible, version 3.0 causes issues
+# b/468383498: numpy will auto-upgrade to 2.4.x, which causes issues with numerous packages
+# b/468367647: Unpin protobuf, version greater than v5.29.5 causes issues with numerous packages
+# b/408298750: We reinstall nltk because older versions have: `AttributeError: module 'inspect' has no attribute 'formatargspec'`
+RUN uv pip install --no-cache \
+    --index-url https://pypi.nvidia.com --extra-index-url https://pypi.org/simple/ --index-strategy unsafe-first-match \
+    --system --force-reinstall "cuml-cu12==25.2.1" \
+    "nvidia-cudnn-cu12==9.3.0.75" "nvidia-cublas-cu12==12.5.3.2" "nvidia-cusolver-cu12==11.6.3.83" \
+    "nvidia-cuda-cupti-cu12==12.5.82" "nvidia-cuda-nvrtc-cu12==12.5.82" "nvidia-cuda-runtime-cu12==12.5.82" \
+    "nvidia-cufft-cu12==11.2.3.61" "nvidia-curand-cu12==10.3.6.82" "nvidia-cusparse-cu12==12.5.1.3" \
+    "nvidia-nvjitlink-cu12==12.5.82" \
+    && uv pip install --no-cache --system --force-reinstall "pynvjitlink-cu12==0.5.2" \
+    && uv pip install --no-cache --system --force-reinstall --prerelease=allow "kagglehub[pandas-datasets,hf-datasets,signing]>=0.3.12" \
+    && uv pip install --no-cache --system --force-reinstall --no-deps torchtune gensim "scipy<=1.15.3" "huggingface-hub==0.36.0" "google-cloud-translate==3.12.1" "numpy==2.0.2" "pandas==2.2.2" \
+    && uv pip install --no-cache --system --force-reinstall "protobuf==5.29.5" \
+    && uv pip install --no-cache --system --force-reinstall "nltk>=3.9.1" \
+    && rm -rf /root/.cache/uv /root/.cache/pip
+
+# =============================================================================
+# Stage 2: Squash the base + reinstalls to eliminate layer bloat
+# =============================================================================
+FROM scratch AS clean-base
+COPY --from=base-reinstalls / /
+
+# =============================================================================
+# Stage 3: Continue with cacheable operations
+# These layers will be cached normally on subsequent builds
+# =============================================================================
+FROM clean-base
 
 ADD kaggle_requirements.txt /kaggle_requirements.txt
 
@@ -12,32 +57,22 @@ RUN pip freeze | grep -E 'tensorflow|keras|torch|jax' > /colab_requirements.txt
 RUN cat /colab_requirements.txt >> /requirements.txt
 RUN cat /kaggle_requirements.txt >> /requirements.txt
 
-# Install Kaggle packages
-RUN uv pip install --system -r /requirements.txt
+# TODO: GPU requirements.txt
+# TODO: merge them better (override matching ones).
+
+# Install Kaggle packages (--no-cache prevents cache buildup)
+RUN uv pip install --no-cache --system -r /requirements.txt
 
 # Install manual packages:
 # b/183041606#comment5: the Kaggle data proxy doesn't support these APIs. If the library is missing, it falls back to using a regular BigQuery query to fetch data.
 RUN uv pip uninstall --system google-cloud-bigquery-storage
 
-# b/394382016: sigstore (dependency of kagglehub) requires a prerelease packages, installing separate.
-RUN uv pip install --system --force-reinstall --prerelease=allow "kagglehub[pandas-datasets,hf-datasets,signing]>=0.3.12"
-
 # uv cannot install this in requirements.txt without --no-build-isolation
 # to avoid affecting the larger build, we'll post-install it.
-RUN uv pip install --no-build-isolation --system "git+https://github.com/Kaggle/learntools"
+RUN uv pip install --no-cache --no-build-isolation --system "git+https://github.com/Kaggle/learntools"
 
 # newer daal4py requires tbb>=2022, but libpysal is downgrading it for some reason
-RUN uv pip install --system "tbb>=2022" "libpysal==4.9.2"
-
-# b/404590350: Ray and torchtune have conflicting tune cli, we will prioritize torchtune.
-# b/415358158: Gensim removed from Colab image to upgrade scipy
-# b/456239669: remove huggingface-hub pin when pytorch-lighting and transformer are compatible
-# b/315753846: Unpin translate package, currently conflicts with adk 1.17.0
-# b/468379293: Unpin Pandas once cuml/cudf are compatible, version 3.0 causes issues
-# b/468383498: numpy will auto-upgrade to 2.4.x, which causes issues with numerous packages
-# b/468367647: Unpin protobuf, version greater than v5.29.5 causes issues with numerous packages
-RUN uv pip install --system --force-reinstall --no-deps torchtune gensim "scipy<=1.15.3" "huggingface-hub==0.36.0" "google-cloud-translate==3.12.1" "numpy==2.0.2" "pandas==2.2.2"
-RUN uv pip install --system --force-reinstall "protobuf==5.29.5"
+RUN uv pip install --no-cache --system "tbb>=2022" "libpysal==4.9.2"
 
 # Adding non-package dependencies:
 ADD clean-layer.sh  /tmp/clean-layer.sh
@@ -48,7 +83,7 @@ ARG PACKAGE_PATH=/usr/local/lib/python3.12/dist-packages
 
 # Install GPU-specific non-pip packages.
 {{ if eq .Accelerator "gpu" }}
-RUN uv pip install --system "pycuda"
+RUN uv pip install --no-cache --system "pycuda"
 {{ end }}
 
 
@@ -72,9 +107,7 @@ RUN apt-get install -y libfreetype6-dev && \
     apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing
 
 # NLTK Project datasets
-# b/408298750: We currently reinstall the package, because we get the following error:
-# `AttributeError: module 'inspect' has no attribute 'formatargspec'. Did you mean: 'formatargvalues'?`
-RUN uv pip install --system --force-reinstall "nltk>=3.9.1"
+# Note: nltk is reinstalled in stage 1 to fix b/408298750 (formatargspec error)
 RUN mkdir -p /usr/share/nltk_data && \
     # NLTK Downloader no longer continues smoothly after an error, so we explicitly list
     # the corpuses that work
@@ -168,6 +201,9 @@ ENV GIT_COMMIT=${GIT_COMMIT} \
 # Correlate current release with the git hash inside the kernel editor by running `!cat /etc/git_commit`.
 RUN echo "$GIT_COMMIT" > /etc/git_commit && echo "$BUILD_DATE" > /etc/build_date
 
+# Final cleanup
+RUN rm -rf /root/.cache/uv /root/.cache/pip /tmp/clean-layer.sh
+
 {{ if eq .Accelerator "gpu" }}
 # Add the CUDA home.
 ENV CUDA_HOME=/usr/local/cuda
diff --git a/clean-layer.sh b/clean-layer.sh
index 467e1cac..303ce32d 100755
--- a/clean-layer.sh
+++ b/clean-layer.sh
@@ -10,8 +10,8 @@
 set -e
 set -x
 
-# Delete files that pip caches when installing a package.
-rm -rf /root/.cache/pip/*
+# Delete files that pip and uv cache when installing packages.
+rm -rf /root/.cache/pip/* /root/.cache/uv/*
 # Delete old downloaded archive files 
 apt-get autoremove -y
 # Delete downloaded archive files
diff --git a/kaggle_requirements.txt b/kaggle_requirements.txt
index 43a170ce..b585f3df 100644
--- a/kaggle_requirements.txt
+++ b/kaggle_requirements.txt
@@ -91,7 +91,7 @@ path
 path.py
 pdf2image
 plotly-express
-preprocessing
+# Removed: preprocessing (unmaintained since 2017, requires nltk==3.2.4 incompatible with Python 3.11)
 pudb
 pyLDAvis
 pycryptodome
@@ -109,7 +109,9 @@ qtconsole
 ray
 rgf-python
 s3fs
-scikit-learn
+# b/302136621: Fix eli5 import for learntools
+# Note: scikit-learn 1.2.2 is incompatible with numpy 2.x ABI - using 1.5.2 which supports numpy 2.x
+scikit-learn==1.5.2
 # Scikit-learn accelerated library for x86
 scikit-learn-intelex>=2023.0.1
 scikit-multilearn