From 8634c787c866dad812646366ffdd953d1eefa27d Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 23 Dec 2025 09:44:51 +0000
Subject: [PATCH 01/35] Phase 1: ETACEPotential with AtomsCalculators interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements Phase 1 of the ETACE calculator interface plan:
- ETACEPotential struct wrapping ETACE models
- AtomsCalculators interface (energy, forces, virial)
- Combined energy_forces_virial evaluation
- Tests comparing against original ACE model
- CPU and GPU performance benchmarks

Key implementation details:
- Forces computed via site_grads() + forces_from_edge_grads()
- Force sign: forces_from_edge_grads returns +∇E, negated for F=-∇E
- Virial: V = -∑ ∂E/∂𝐫ij ⊗ 𝐫ij

Performance results (8-atom Si/O cell, order=3, maxl=6):
- Energy: ETACE ~15% slower (graph construction overhead)
- Forces: ETACE ~6.5x faster (vectorized gradients)
- EFV: ETACE ~5x faster

GPU benchmarks use auto-detection from EquivariantTensors utils.
GPU gradients skipped due to Polynomials4ML GPU compat issues.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 Project.toml                          |   2 +-
 src/et_models/et_calculators.jl       | 122 ++++++++++
 src/et_models/et_models.jl            |   8 +-
 test/Project.toml                     |   9 +
 test/et_models/test_et_calculators.jl | 308 ++++++++++++++++++++++++++
 5 files changed, 445 insertions(+), 4 deletions(-)
 create mode 100644 src/et_models/et_calculators.jl
 create mode 100644 test/et_models/test_et_calculators.jl

diff --git a/Project.toml b/Project.toml
index 961df9590..bef7a094d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -79,7 +79,7 @@ OffsetArrays = "1"
 Optim = "1"
 Optimisers = "0.3.4, 0.4"
 OrderedCollections = "1"
-Polynomials4ML = "0.5.6"
+Polynomials4ML = "0.5"
 PrettyTables = "1.3, 2"
 Reexport = "1"
 Roots = "2"
diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
new file mode 100644
index 000000000..ec42fcb98
--- /dev/null
+++ b/src/et_models/et_calculators.jl
@@ -0,0 +1,122 @@
+
+# Calculator interfaces for ETACE models
+# Provides AtomsCalculators-compatible energy/forces/virial evaluation
+
+import AtomsCalculators
+import AtomsBase: AbstractSystem
+import EquivariantTensors as ET
+using StaticArrays
+using Unitful
+
+# ============================================================================
+#  ETACEPotential - Standalone calculator for ETACE models
+# ============================================================================
+
+"""
+    ETACEPotential
+
+AtomsCalculators-compatible calculator wrapping an ETACE model.
+
+# Fields
+- `model::ETACE` - The ETACE model
+- `ps` - Model parameters
+- `st` - Model state
+- `rcut::Float64` - Cutoff radius in Ångström
+- `co_ps` - Optional committee parameters for uncertainty quantification
+"""
+mutable struct ETACEPotential{MOD<:ETACE, T}
+   model::MOD
+   ps::T
+   st::NamedTuple
+   rcut::Float64
+   co_ps::Any
+end
+
+# Constructor without committee parameters
+function ETACEPotential(model::ETACE, ps, st, rcut::Real)
+   return ETACEPotential(model, ps, st, Float64(rcut), nothing)
+end
+
+# Cutoff radius accessor
+cutoff_radius(calc::ETACEPotential) = calc.rcut * u"Å"
+
+# ============================================================================
+#  Internal evaluation functions
+# ============================================================================
+
+function _compute_virial(G::ET.ETGraph, ∂G)
+   # V = -∑ (∂E/∂𝐫ij) ⊗ 𝐫ij
+   V = zeros(SMatrix{3,3,Float64,9})
+   for (edge, ∂edge) in zip(G.edge_data, ∂G.edge_data)
+      V -= ∂edge.𝐫 * edge.𝐫'
+   end
+   return V
+end
+
+function _evaluate_energy(calc::ETACEPotential, sys::AbstractSystem)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   Ei, _ = calc.model(G, calc.ps, calc.st)
+   return sum(Ei)
+end
+
+function _evaluate_forces(calc::ETACEPotential, sys::AbstractSystem)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   ∂G = site_grads(calc.model, G, calc.ps, calc.st)
+   # Note: forces_from_edge_grads returns +∇E, we need -∇E for forces
+   return -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
+end
+
+function _evaluate_virial(calc::ETACEPotential, sys::AbstractSystem)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   ∂G = site_grads(calc.model, G, calc.ps, calc.st)
+   return _compute_virial(G, ∂G)
+end
+
+function _energy_forces_virial(calc::ETACEPotential, sys::AbstractSystem)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+
+   # Forward pass for energy
+   Ei, _ = calc.model(G, calc.ps, calc.st)
+   E = sum(Ei)
+
+   # Backward pass for gradients (forces and virial)
+   ∂G = site_grads(calc.model, G, calc.ps, calc.st)
+
+   # Forces from edge gradients (negate since forces_from_edge_grads returns +∇E)
+   F = -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
+
+   # Virial from edge gradients
+   V = _compute_virial(G, ∂G)
+
+   return (energy=E, forces=F, virial=V)
+end
+
+# ============================================================================
+#  AtomsCalculators interface
+# ============================================================================
+
+AtomsCalculators.@generate_interface function AtomsCalculators.potential_energy(
+      sys::AbstractSystem, calc::ETACEPotential; kwargs...)
+   return _evaluate_energy(calc, sys) * u"eV"
+end
+
+AtomsCalculators.@generate_interface function AtomsCalculators.forces(
+      sys::AbstractSystem, calc::ETACEPotential; kwargs...)
+   return _evaluate_forces(calc, sys) .* u"eV/Å"
+end
+
+AtomsCalculators.@generate_interface function AtomsCalculators.virial(
+      sys::AbstractSystem, calc::ETACEPotential; kwargs...)
+   return _evaluate_virial(calc, sys) * u"eV"
+end
+
+function AtomsCalculators.energy_forces_virial(
+      sys::AbstractSystem, calc::ETACEPotential; kwargs...)
+   efv = _energy_forces_virial(calc, sys)
+   return (
+      energy = efv.energy * u"eV",
+      forces = efv.forces .* u"eV/Å",
+      virial = efv.virial * u"eV"
+   )
+end
+
diff --git a/src/et_models/et_models.jl b/src/et_models/et_models.jl
index 73fa48729..333961ff3 100644
--- a/src/et_models/et_models.jl
+++ b/src/et_models/et_models.jl
@@ -1,5 +1,5 @@
 
-module ETModels 
+module ETModels
 
 # utility layers : these should likely be moved into ET or be removed 
 #     if more convenient implementations can be found. 
@@ -14,8 +14,10 @@ include("et_pair.jl")
 # converstion utilities: convert from 0.8 style ACE models to ET based models
 include("convert.jl")
 
-# utilities to convert radial embeddings to splined versions 
-# for simplicity and performance and to freeze parameters 
+# utilities to convert radial embeddings to splined versions
+# for simplicity and performance and to freeze parameters
 include("splinify.jl")
 
+include("et_calculators.jl")
+
 end
\ No newline at end of file
diff --git a/test/Project.toml b/test/Project.toml
index 11f939c87..a8d6fecb3 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,5 +1,8 @@
 [deps]
 ACEbase = "14bae519-eb20-449c-a949-9c58ed33163e"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+ACEpotentials = "3b96b61c-0fcc-4693-95ed-1ef9f35fcc53"
 AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
 AtomsBuilder = "f5cc8831-eeb7-4288-8d9f-d6c1ddb77004"
 AtomsCalculators = "a3e0e189-c65a-42c1-833c-339540406eb1"
@@ -16,6 +19,7 @@ LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
+MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 Polynomials4ML = "03c4bcba-a943-47e9-bfa1-b1661fc2974f"
@@ -24,8 +28,13 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+TestEnv = "1e6cf692-eddd-4d53-88a5-2d735e33781b"
 Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
+[sources]
+ACEpotentials = {path = ".."}
+
 [compat]
+EquivariantTensors = "0.4"
 StaticArrays = "1"
diff --git a/test/et_models/test_et_calculators.jl b/test/et_models/test_et_calculators.jl
new file mode 100644
index 000000000..750a96781
--- /dev/null
+++ b/test/et_models/test_et_calculators.jl
@@ -0,0 +1,308 @@
+# Tests for ETACEPotential calculator interface
+#
+# These tests verify:
+# 1. Energy consistency between ETACE model and ETACEPotential
+# 2. Force consistency against original ACE model
+# 3. Virial consistency against original ACE model
+# 4. AtomsCalculators interface compliance
+
+using Test, ACEbase, BenchmarkTools
+using Polynomials4ML.Testing: print_tf, println_slim
+
+using ACEpotentials
+M = ACEpotentials.Models
+ETM = ACEpotentials.ETModels
+
+import EquivariantTensors as ET
+import AtomsCalculators
+
+using AtomsBase, AtomsBuilder, Unitful
+using Random, LuxCore, StaticArrays, LinearAlgebra
+
+rng = Random.MersenneTwister(1234)
+Random.seed!(1234)
+
+##
+# Build an ETACE model for testing
+
+elements = (:Si, :O)
+level = M.TotalDegree()
+max_level = 10
+order = 3
+maxl = 6
+
+# Use same cutoff for all elements
+rin0cuts = M._default_rin0cuts(elements)
+rin0cuts = (x -> (rin = x.rin, r0 = x.r0, rcut = 5.5)).(rin0cuts)
+
+model = M.ace_model(; elements = elements, order = order,
+                     Ytype = :solid, level = level, max_level = max_level,
+                     maxl = maxl, pair_maxn = max_level,
+                     rin0cuts = rin0cuts,
+                     init_WB = :glorot_normal, init_Wpair = :glorot_normal)
+
+ps, st = LuxCore.setup(rng, model)
+
+# Kill pair basis for clarity (test only ACE part)
+for s in model.pairbasis.splines
+   s.itp.itp.coefs[:] *= 0
+end
+
+# Convert to ETACE model
+et_model = ETM.convert2et(model)
+et_ps, et_st = LuxCore.setup(MersenneTwister(1234), et_model)
+
+# Match parameters
+et_ps.rembed.post.W[:, :, 1] = ps.rbasis.Wnlq[:, :, 1, 1]
+et_ps.rembed.post.W[:, :, 2] = ps.rbasis.Wnlq[:, :, 1, 2]
+et_ps.rembed.post.W[:, :, 3] = ps.rbasis.Wnlq[:, :, 2, 1]
+et_ps.rembed.post.W[:, :, 4] = ps.rbasis.Wnlq[:, :, 2, 2]
+
+et_ps.readout.W[1, :, 1] .= ps.WB[:, 1]
+et_ps.readout.W[1, :, 2] .= ps.WB[:, 2]
+
+# Get cutoff radius
+rcut = maximum(a.rcut for a in model.pairbasis.rin0cuts)
+
+# Helper to generate random structures
+function rand_struct()
+   sys = AtomsBuilder.bulk(:Si) * (2, 2, 1)
+   rattle!(sys, 0.2u"Å")
+   AtomsBuilder.randz!(sys, [:Si => 0.5, :O => 0.5])
+   return sys
+end
+
+##
+
+@info("Testing ETACEPotential construction")
+
+# Create calculator from ETACE model
+et_calc = ETM.ETACEPotential(et_model, et_ps, et_st, rcut)
+
+@test et_calc.model === et_model
+@test et_calc.rcut == rcut
+@test et_calc.co_ps === nothing
+println("ETACEPotential construction: OK")
+
+##
+
+@info("Testing energy consistency: ETACE model vs ETACEPotential")
+
+for ntest = 1:20
+   local sys, G, E_model, E_calc
+
+   sys = rand_struct()
+   G = ET.Atoms.interaction_graph(sys, rcut * u"Å")
+
+   # Energy from direct model evaluation
+   Ei_model, _ = et_model(G, et_ps, et_st)
+   E_model = sum(Ei_model)
+
+   # Energy from calculator
+   E_calc = AtomsCalculators.potential_energy(sys, et_calc)
+
+   print_tf(@test abs(E_model - ustrip(E_calc)) < 1e-10)
+end
+println()
+
+##
+
+@info("Testing energy consistency: ETACE vs original ACE model")
+
+# Wrap original ACE model into calculator
+calc_model = M.ACEPotential(model, ps, st)
+
+for ntest = 1:20
+   local sys, E_old, E_new
+
+   sys = rand_struct()
+   E_old = AtomsCalculators.potential_energy(sys, calc_model)
+   E_new = AtomsCalculators.potential_energy(sys, et_calc)
+
+   print_tf(@test abs(ustrip(E_old) - ustrip(E_new)) < 1e-6)
+end
+println()
+
+##
+
+@info("Testing forces consistency: ETACE vs original ACE model")
+
+for ntest = 1:20
+   local sys, F_old, F_new
+
+   sys = rand_struct()
+   F_old = AtomsCalculators.forces(sys, calc_model)
+   F_new = AtomsCalculators.forces(sys, et_calc)
+
+   # Compare force magnitudes (allow small numerical differences)
+   max_diff = maximum(norm(ustrip.(f1) - ustrip.(f2)) for (f1, f2) in zip(F_old, F_new))
+   print_tf(@test max_diff < 1e-6)
+end
+println()
+
+##
+
+@info("Testing virial consistency: ETACE vs original ACE model")
+
+for ntest = 1:20
+   local sys, V_old, V_new
+
+   sys = rand_struct()
+   efv_old = AtomsCalculators.energy_forces_virial(sys, calc_model)
+   efv_new = AtomsCalculators.energy_forces_virial(sys, et_calc)
+
+   V_old = ustrip.(efv_old.virial)
+   V_new = ustrip.(efv_new.virial)
+
+   # Compare virial tensors
+   print_tf(@test norm(V_old - V_new) / (norm(V_old) + 1e-10) < 1e-6)
+end
+println()
+
+##
+
+@info("Testing AtomsCalculators interface compliance")
+
+sys = rand_struct()
+
+# Test individual methods
+E = AtomsCalculators.potential_energy(sys, et_calc)
+F = AtomsCalculators.forces(sys, et_calc)
+V = AtomsCalculators.virial(sys, et_calc)
+
+@test E isa typeof(1.0u"eV")
+@test eltype(F) <: StaticArrays.SVector
+@test V isa StaticArrays.SMatrix
+
+println("AtomsCalculators interface: OK")
+
+##
+
+@info("Testing combined energy_forces_virial efficiency")
+
+sys = rand_struct()
+
+# Combined evaluation
+efv1 = AtomsCalculators.energy_forces_virial(sys, et_calc)
+
+# Separate evaluations
+E = AtomsCalculators.potential_energy(sys, et_calc)
+F = AtomsCalculators.forces(sys, et_calc)
+V = AtomsCalculators.virial(sys, et_calc)
+
+@test ustrip(efv1.energy) ≈ ustrip(E)
+@test all(ustrip.(efv1.forces) .≈ ustrip.(F))
+@test ustrip.(efv1.virial) ≈ ustrip.(V)
+
+println("Combined evaluation consistency: OK")
+
+##
+
+@info("Testing cutoff_radius function")
+
+@test ETM.cutoff_radius(et_calc) == rcut * u"Å"
+println("Cutoff radius: OK")
+
+##
+
+@info("Performance comparison: ETACE vs original ACE model")
+
+# Use a fixed test structure for benchmarking
+bench_sys = rand_struct()
+
+# Warm-up runs
+AtomsCalculators.energy_forces_virial(bench_sys, calc_model)
+AtomsCalculators.energy_forces_virial(bench_sys, et_calc)
+
+# Benchmark energy
+t_energy_old = @belapsed AtomsCalculators.potential_energy($bench_sys, $calc_model)
+t_energy_new = @belapsed AtomsCalculators.potential_energy($bench_sys, $et_calc)
+
+# Benchmark forces
+t_forces_old = @belapsed AtomsCalculators.forces($bench_sys, $calc_model)
+t_forces_new = @belapsed AtomsCalculators.forces($bench_sys, $et_calc)
+
+# Benchmark energy_forces_virial
+t_efv_old = @belapsed AtomsCalculators.energy_forces_virial($bench_sys, $calc_model)
+t_efv_new = @belapsed AtomsCalculators.energy_forces_virial($bench_sys, $et_calc)
+
+println("CPU Performance comparison (times in ms):")
+println("  Energy:              ACE = $(round(t_energy_old*1000, digits=3)), ETACE = $(round(t_energy_new*1000, digits=3)), ratio = $(round(t_energy_new/t_energy_old, digits=2))")
+println("  Forces:              ACE = $(round(t_forces_old*1000, digits=3)), ETACE = $(round(t_forces_new*1000, digits=3)), ratio = $(round(t_forces_new/t_forces_old, digits=2))")
+println("  Energy+Forces+Virial: ACE = $(round(t_efv_old*1000, digits=3)), ETACE = $(round(t_efv_new*1000, digits=3)), ratio = $(round(t_efv_new/t_efv_old, digits=2))")
+
+##
+
+# GPU benchmarks (if available)
+# Include GPU detection utils from EquivariantTensors
+et_test_utils = joinpath(dirname(dirname(pathof(ET))), "test", "test_utils")
+include(joinpath(et_test_utils, "utils_gpu.jl"))
+
+if dev !== identity
+   @info("GPU Performance comparison: ETACE on GPU vs CPU")
+
+   # NOTE: These benchmarks measure model evaluation time ONLY, with pre-constructed graphs.
+   # The neighborlist/graph construction currently runs on CPU (~7ms for 250 atoms) and is
+   # NOT included in the timings below. NeighbourLists.jl now has GPU support (PR #34, Dec 2025)
+   # but EquivariantTensors.jl doesn't use it yet. For end-to-end GPU acceleration, the
+   # neighborlist construction needs to be ported to GPU as well.
+
+   # Use a larger system for meaningful GPU benchmark (small systems are overhead-dominated)
+   # GPU kernel launch overhead is ~0.4ms, so need enough work to amortize this
+   gpu_bench_sys = AtomsBuilder.bulk(:Si) * (4, 4, 4)  # 128 atoms
+   rattle!(gpu_bench_sys, 0.1u"Å")
+   AtomsBuilder.randz!(gpu_bench_sys, [:Si => 0.5, :O => 0.5])
+
+   # Create graph and convert to Float32 for GPU
+   G = ET.Atoms.interaction_graph(gpu_bench_sys, rcut * u"Å")
+   G_32 = ET.float32(G)
+   G_gpu = dev(G_32)
+
+   et_ps_32 = ET.float32(et_ps)
+   et_st_32 = ET.float32(et_st)
+   et_ps_gpu = dev(et_ps_32)
+   et_st_gpu = dev(et_st_32)
+
+   # Warm-up GPU (forward pass)
+   et_model(G_gpu, et_ps_gpu, et_st_gpu)
+
+   # Benchmark GPU energy (forward pass only)
+   t_energy_gpu = @belapsed begin
+      Ei, _ = $et_model($G_gpu, $et_ps_gpu, $et_st_gpu)
+      sum(Ei)
+   end
+
+   # Compare to CPU Float32 for fair comparison
+   t_energy_cpu32 = @belapsed begin
+      Ei, _ = $et_model($G_32, $et_ps_32, $et_st_32)
+      sum(Ei)
+   end
+
+   println("GPU vs CPU Float32 comparison ($(length(gpu_bench_sys)) atoms, $(length(G.ii)) edges):")
+   println("  Energy:    CPU = $(round(t_energy_cpu32*1000, digits=3))ms, GPU = $(round(t_energy_gpu*1000, digits=3))ms, speedup = $(round(t_energy_cpu32/t_energy_gpu, digits=1))x")
+
+   # Try GPU gradients (may not be supported yet - gradients w.r.t. positions
+   # require Zygote through P4ML which has GPU compat issues; see ET test_ace_ka.jl:196-197)
+   gpu_grads_work = try
+      ETM.site_grads(et_model, G_gpu, et_ps_gpu, et_st_gpu)
+      true
+   catch e
+      @warn("GPU position gradients not yet supported (needed for forces): $(typeof(e).name.name)")
+      false
+   end
+
+   if gpu_grads_work
+      # Benchmark GPU gradients (for forces)
+      t_grads_gpu = @belapsed ETM.site_grads($et_model, $G_gpu, $et_ps_gpu, $et_st_gpu)
+      t_grads_cpu32 = @belapsed ETM.site_grads($et_model, $G_32, $et_ps_32, $et_st_32)
+      println("  Gradients: CPU = $(round(t_grads_cpu32*1000, digits=3)), GPU = $(round(t_grads_gpu*1000, digits=3)), speedup = $(round(t_grads_cpu32/t_grads_gpu, digits=2))x")
+   else
+      println("  Gradients: Skipped (GPU gradients not yet supported)")
+   end
+else
+   @info("No GPU available, skipping GPU benchmarks")
+end
+
+##
+
+@info("All Phase 1 tests passed!")

From ccf925a55d3b2c24f734218730e8c6434308226b Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 23 Dec 2025 12:02:13 +0000
Subject: [PATCH 02/35] Phase 2: SiteEnergyModel interface and
 StackedCalculator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements a composable calculator architecture:

- SiteEnergyModel interface: site_energies(), site_energy_grads(), cutoff_radius()
- E0Model: One-body reference energies (constant per species, zero forces)
- WrappedETACE: Wraps ETACE model with SiteEnergyModel interface
- WrappedSiteCalculator: Converts site quantities to global (energy, forces, virial)
- StackedCalculator: Combines multiple AtomsCalculators by summing contributions

Architecture allows non-site-based calculators (e.g., Coulomb, dispersion) to be
added directly to StackedCalculator without requiring site energy decomposition.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/et_calculators.jl       | 332 +++++++++++++++++++++++++-
 test/et_models/test_et_calculators.jl | 200 ++++++++++++++++
 2 files changed, 531 insertions(+), 1 deletion(-)

diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
index ec42fcb98..989efec7d 100644
--- a/src/et_models/et_calculators.jl
+++ b/src/et_models/et_calculators.jl
@@ -1,12 +1,342 @@
 
 # Calculator interfaces for ETACE models
 # Provides AtomsCalculators-compatible energy/forces/virial evaluation
+#
+# Architecture:
+# - SiteEnergyModel interface: Any model producing per-site energies can implement this
+# - E0Model: One-body reference energies (constant per species)
+# - WrappedETACE: Wraps ETACE model with the SiteEnergyModel interface
+# - StackedCalculator: Combines multiple SiteEnergyModels into one calculator
+# - ETACEPotential: Standalone calculator for simple use cases
 
 import AtomsCalculators
-import AtomsBase: AbstractSystem
+import AtomsBase: AbstractSystem, ChemicalSpecies
 import EquivariantTensors as ET
+using DecoratedParticles: PState
 using StaticArrays
 using Unitful
+using LinearAlgebra: norm
+
+# ============================================================================
+#  SiteEnergyModel Interface
+# ============================================================================
+#
+# Any model producing per-site (per-atom) energies can implement this interface:
+#
+#   site_energies(model, G::ETGraph, ps, st) -> Vector  # per-atom energies
+#   site_energy_grads(model, G::ETGraph, ps, st) -> ∂G  # edge gradients for forces
+#   cutoff_radius(model) -> Float64                     # in Ångström
+#
+# This enables composition via StackedCalculator for:
+# - One-body reference energies (E0Model)
+# - Pairwise interactions (PairModel)
+# - Many-body ACE (WrappedETACE)
+# - Future: dispersion, coulomb, etc.
+
+"""
+    site_energies(model, G, ps, st)
+
+Compute per-site (per-atom) energies for the given interaction graph.
+Returns a vector of length `nnodes(G)`.
+"""
+function site_energies end
+
+"""
+    site_energy_grads(model, G, ps, st)
+
+Compute gradients of site energies w.r.t. edge positions.
+Returns a named tuple with `edge_data` field containing gradient vectors.
+"""
+function site_energy_grads end
+
+"""
+    cutoff_radius(model)
+
+Return the cutoff radius in Ångström for the model.
+"""
+function cutoff_radius end
+
+
+# ============================================================================
+#  E0Model - One-body reference energies
+# ============================================================================
+
+"""
+    E0Model{T}
+
+One-body reference energy model. Assigns constant energy per atomic species.
+No forces (energy is position-independent).
+
+# Example
+```julia
+E0 = E0Model(Dict(ChemicalSpecies(:Si) => -0.846, ChemicalSpecies(:O) => -2.15))
+```
+"""
+struct E0Model{T<:Real}
+   E0s::Dict{ChemicalSpecies, T}
+end
+
+# Constructor from element symbols
+function E0Model(E0s::Dict{Symbol, T}) where T<:Real
+   return E0Model(Dict(ChemicalSpecies(k) => v for (k, v) in E0s))
+end
+
+cutoff_radius(::E0Model) = 0.0  # No neighbors needed
+
+function site_energies(model::E0Model, G::ET.ETGraph, ps, st)
+   T = valtype(model.E0s)
+   return T[model.E0s[node.z] for node in G.node_data]
+end
+
+function site_energy_grads(model::E0Model{T}, G::ET.ETGraph, ps, st) where T
+   # Constant energy → zero gradients
+   zero_grad = PState(𝐫 = zero(SVector{3, T}))
+   return (edge_data = fill(zero_grad, length(G.edge_data)),)
+end
+
+
+# ============================================================================
+#  WrappedETACE - ETACE model with SiteEnergyModel interface
+# ============================================================================
+
+"""
+    WrappedETACE{MOD<:ETACE, T}
+
+Wraps an ETACE model to implement the SiteEnergyModel interface.
+
+# Fields
+- `model::ETACE` - The underlying ETACE model
+- `ps` - Model parameters
+- `st` - Model state
+- `rcut::Float64` - Cutoff radius in Ångström
+"""
+struct WrappedETACE{MOD<:ETACE, PS, ST}
+   model::MOD
+   ps::PS
+   st::ST
+   rcut::Float64
+end
+
+cutoff_radius(w::WrappedETACE) = w.rcut
+
+function site_energies(w::WrappedETACE, G::ET.ETGraph, ps, st)
+   # Use wrapper's ps/st, ignore passed ones (they're for StackedCalculator dispatch)
+   Ei, _ = w.model(G, w.ps, w.st)
+   return Ei
+end
+
+function site_energy_grads(w::WrappedETACE, G::ET.ETGraph, ps, st)
+   return site_grads(w.model, G, w.ps, w.st)
+end
+
+
+# ============================================================================
+#  WrappedSiteCalculator - Converts SiteEnergyModel to AtomsCalculators
+# ============================================================================
+
+"""
+    WrappedSiteCalculator{M}
+
+Wraps a SiteEnergyModel and provides the AtomsCalculators interface.
+Converts site quantities (per-atom energies, edge gradients) to global
+quantities (total energy, atomic forces, virial tensor).
+
+# Example
+```julia
+E0 = E0Model(Dict(:Si => -0.846, :O => -2.15))
+calc = WrappedSiteCalculator(E0, 5.5)  # cutoff for graph construction
+
+E = potential_energy(sys, calc)
+F = forces(sys, calc)
+```
+
+# Fields
+- `model` - Model implementing SiteEnergyModel interface
+- `rcut::Float64` - Cutoff radius for graph construction (Å)
+"""
+struct WrappedSiteCalculator{M}
+   model::M
+   rcut::Float64
+end
+
+function WrappedSiteCalculator(model)
+   rcut = cutoff_radius(model)
+   # Ensure minimum cutoff for graph construction (must be > 0 for neighbor list)
+   # Use 3.0 Å as minimum - smaller than typical bond lengths
+   rcut = max(rcut, 3.0)
+   return WrappedSiteCalculator(model, rcut)
+end
+
+cutoff_radius(calc::WrappedSiteCalculator) = calc.rcut * u"Å"
+
+function _wrapped_energy(calc::WrappedSiteCalculator, sys::AbstractSystem)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   Ei = site_energies(calc.model, G, nothing, nothing)
+   return sum(Ei)
+end
+
+function _wrapped_forces(calc::WrappedSiteCalculator, sys::AbstractSystem)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   ∂G = site_energy_grads(calc.model, G, nothing, nothing)
+   # Handle empty edge case (e.g., E0 model with small cutoff)
+   if isempty(∂G.edge_data)
+      return zeros(SVector{3, Float64}, length(sys))
+   end
+   # forces_from_edge_grads returns +∇E, negate for forces
+   return -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
+end
+
+function _wrapped_virial(calc::WrappedSiteCalculator, sys::AbstractSystem)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   ∂G = site_energy_grads(calc.model, G, nothing, nothing)
+   # Handle empty edge case
+   if isempty(∂G.edge_data)
+      return zeros(SMatrix{3,3,Float64,9})
+   end
+   return _compute_virial(G, ∂G)
+end
+
+function _wrapped_energy_forces_virial(calc::WrappedSiteCalculator, sys::AbstractSystem)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+
+   # Energy from site energies
+   Ei = site_energies(calc.model, G, nothing, nothing)
+   E = sum(Ei)
+
+   # Forces and virial from edge gradients
+   ∂G = site_energy_grads(calc.model, G, nothing, nothing)
+
+   # Handle empty edge case (e.g., E0 model with small cutoff)
+   if isempty(∂G.edge_data)
+      F = zeros(SVector{3, Float64}, length(sys))
+      V = zeros(SMatrix{3,3,Float64,9})
+   else
+      F = -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
+      V = _compute_virial(G, ∂G)
+   end
+
+   return (energy=E, forces=F, virial=V)
+end
+
+# AtomsCalculators interface for WrappedSiteCalculator
+AtomsCalculators.@generate_interface function AtomsCalculators.potential_energy(
+      sys::AbstractSystem, calc::WrappedSiteCalculator; kwargs...)
+   return _wrapped_energy(calc, sys) * u"eV"
+end
+
+AtomsCalculators.@generate_interface function AtomsCalculators.forces(
+      sys::AbstractSystem, calc::WrappedSiteCalculator; kwargs...)
+   return _wrapped_forces(calc, sys) .* u"eV/Å"
+end
+
+AtomsCalculators.@generate_interface function AtomsCalculators.virial(
+      sys::AbstractSystem, calc::WrappedSiteCalculator; kwargs...)
+   return _wrapped_virial(calc, sys) * u"eV"
+end
+
+function AtomsCalculators.energy_forces_virial(
+      sys::AbstractSystem, calc::WrappedSiteCalculator; kwargs...)
+   efv = _wrapped_energy_forces_virial(calc, sys)
+   return (
+      energy = efv.energy * u"eV",
+      forces = efv.forces .* u"eV/Å",
+      virial = efv.virial * u"eV"
+   )
+end
+
+
+# ============================================================================
+#  StackedCalculator - Combines multiple AtomsCalculators
+# ============================================================================
+
+"""
+    StackedCalculator{C<:Tuple}
+
+Combines multiple AtomsCalculators by summing their energy, forces, and virial.
+Each calculator in the tuple must implement the AtomsCalculators interface.
+
+This allows combining site-based calculators (via WrappedSiteCalculator) with
+calculators that don't have site decompositions (e.g., Coulomb, dispersion).
+
+# Example
+```julia
+# Wrap site energy models
+E0_calc = WrappedSiteCalculator(E0Model(Dict(:Si => -0.846)))
+ace_calc = WrappedSiteCalculator(WrappedETACE(et_model, ps, st, 5.5))
+
+# Stack them (could also add Coulomb, dispersion, etc.)
+calc = StackedCalculator((E0_calc, ace_calc))
+
+E = potential_energy(sys, calc)
+F = forces(sys, calc)
+```
+
+# Fields
+- `calcs::Tuple` - Tuple of calculators implementing AtomsCalculators interface
+"""
+struct StackedCalculator{C<:Tuple}
+   calcs::C
+end
+
+# Get maximum cutoff from all calculators (for informational purposes)
+function cutoff_radius(calc::StackedCalculator)
+   rcuts = [ustrip(u"Å", cutoff_radius(c)) for c in calc.calcs]
+   return maximum(rcuts) * u"Å"
+end
+
+# AtomsCalculators interface - sum contributions from all calculators
+AtomsCalculators.@generate_interface function AtomsCalculators.potential_energy(
+      sys::AbstractSystem, calc::StackedCalculator; kwargs...)
+   E_total = 0.0 * u"eV"
+   for c in calc.calcs
+      E_total += AtomsCalculators.potential_energy(sys, c)
+   end
+   return E_total
+end
+
+AtomsCalculators.@generate_interface function AtomsCalculators.forces(
+      sys::AbstractSystem, calc::StackedCalculator; kwargs...)
+   F_total = nothing
+   for c in calc.calcs
+      F = AtomsCalculators.forces(sys, c)
+      if F_total === nothing
+         F_total = F
+      else
+         F_total = F_total .+ F
+      end
+   end
+   return F_total
+end
+
+AtomsCalculators.@generate_interface function AtomsCalculators.virial(
+      sys::AbstractSystem, calc::StackedCalculator; kwargs...)
+   V_total = zeros(SMatrix{3,3,Float64,9}) * u"eV"
+   for c in calc.calcs
+      V_total += AtomsCalculators.virial(sys, c)
+   end
+   return V_total
+end
+
+function AtomsCalculators.energy_forces_virial(
+      sys::AbstractSystem, calc::StackedCalculator; kwargs...)
+   E_total = 0.0 * u"eV"
+   F_total = nothing
+   V_total = zeros(SMatrix{3,3,Float64,9}) * u"eV"
+
+   for c in calc.calcs
+      efv = AtomsCalculators.energy_forces_virial(sys, c)
+      E_total += efv.energy
+      V_total += efv.virial
+      if F_total === nothing
+         F_total = efv.forces
+      else
+         F_total = F_total .+ efv.forces
+      end
+   end
+
+   return (energy=E_total, forces=F_total, virial=V_total)
+end
+
 
 # ============================================================================
 #  ETACEPotential - Standalone calculator for ETACE models
diff --git a/test/et_models/test_et_calculators.jl b/test/et_models/test_et_calculators.jl
index 750a96781..02da8a816 100644
--- a/test/et_models/test_et_calculators.jl
+++ b/test/et_models/test_et_calculators.jl
@@ -306,3 +306,203 @@ end
 ##
 
 @info("All Phase 1 tests passed!")
+
+# ============================================================================
+#  Phase 2 Tests: SiteEnergyModel Interface, WrappedSiteCalculator, StackedCalculator
+# ============================================================================
+
+@info("Testing Phase 2: SiteEnergyModel interface and calculators")
+
+##
+
+@info("Testing E0Model")
+
+# Create E0 model with reference energies
+E0_Si = -0.846
+E0_O = -2.15
+E0 = ETM.E0Model(Dict(:Si => E0_Si, :O => E0_O))
+
+# Test cutoff radius
+@test ETM.cutoff_radius(E0) == 0.0
+println("E0Model cutoff_radius: OK")
+
+# Test site energies
+sys = rand_struct()
+G = ET.Atoms.interaction_graph(sys, rcut * u"Å")
+Ei_E0 = ETM.site_energies(E0, G, nothing, nothing)
+
+# Count Si and O atoms
+n_Si = count(node -> node.z == AtomsBase.ChemicalSpecies(:Si), G.node_data)
+n_O = count(node -> node.z == AtomsBase.ChemicalSpecies(:O), G.node_data)
+expected_E0 = n_Si * E0_Si + n_O * E0_O
+
+@test length(Ei_E0) == length(sys)
+@test sum(Ei_E0) ≈ expected_E0
+println("E0Model site_energies: OK")
+
+# Test site energy gradients (should be zero)
+∂G_E0 = ETM.site_energy_grads(E0, G, nothing, nothing)
+@test all(norm(e.𝐫) == 0 for e in ∂G_E0.edge_data)
+println("E0Model site_energy_grads (zero): OK")
+
+##
+
+@info("Testing WrappedETACE")
+
+# Create wrapped ETACE model
+wrapped_ace = ETM.WrappedETACE(et_model, et_ps, et_st, rcut)
+
+# Test cutoff radius
+@test ETM.cutoff_radius(wrapped_ace) == rcut
+println("WrappedETACE cutoff_radius: OK")
+
+# Test site energies match direct evaluation
+Ei_wrapped = ETM.site_energies(wrapped_ace, G, nothing, nothing)
+Ei_direct, _ = et_model(G, et_ps, et_st)
+@test Ei_wrapped ≈ Ei_direct
+println("WrappedETACE site_energies: OK")
+
+# Test site energy gradients match direct evaluation
+∂G_wrapped = ETM.site_energy_grads(wrapped_ace, G, nothing, nothing)
+∂G_direct = ETM.site_grads(et_model, G, et_ps, et_st)
+@test all(∂G_wrapped.edge_data[i].𝐫 ≈ ∂G_direct.edge_data[i].𝐫 for i in 1:length(G.edge_data))
+println("WrappedETACE site_energy_grads: OK")
+
+##
+
+@info("Testing WrappedSiteCalculator")
+
+# Wrap E0 model in a calculator
+E0_calc = ETM.WrappedSiteCalculator(E0)
+@test ustrip(u"Å", ETM.cutoff_radius(E0_calc)) == 3.0  # minimum cutoff
+println("WrappedSiteCalculator(E0) cutoff_radius: OK")
+
+# Wrap ETACE model in a calculator
+ace_site_calc = ETM.WrappedSiteCalculator(wrapped_ace)
+@test ustrip(u"Å", ETM.cutoff_radius(ace_site_calc)) == rcut
+println("WrappedSiteCalculator(ETACE) cutoff_radius: OK")
+
+# Test E0 calculator energy
+sys = rand_struct()
+E_E0_calc = AtomsCalculators.potential_energy(sys, E0_calc)
+G = ET.Atoms.interaction_graph(sys, 3.0 * u"Å")
+n_Si = count(node -> node.z == AtomsBase.ChemicalSpecies(:Si), G.node_data)
+n_O = count(node -> node.z == AtomsBase.ChemicalSpecies(:O), G.node_data)
+expected_E = (n_Si * E0_Si + n_O * E0_O) * u"eV"
+@test ustrip(E_E0_calc) ≈ ustrip(expected_E)
+println("WrappedSiteCalculator(E0) energy: OK")
+
+# Test E0 calculator forces (should be zero)
+F_E0_calc = AtomsCalculators.forces(sys, E0_calc)
+@test all(norm(ustrip.(f)) < 1e-14 for f in F_E0_calc)
+println("WrappedSiteCalculator(E0) forces (zero): OK")
+
+# Test ETACE calculator matches ETACEPotential
+sys = rand_struct()
+E_ace_site = AtomsCalculators.potential_energy(sys, ace_site_calc)
+E_ace_pot = AtomsCalculators.potential_energy(sys, et_calc)
+@test ustrip(E_ace_site) ≈ ustrip(E_ace_pot)
+println("WrappedSiteCalculator(ETACE) energy matches ETACEPotential: OK")
+
+F_ace_site = AtomsCalculators.forces(sys, ace_site_calc)
+F_ace_pot = AtomsCalculators.forces(sys, et_calc)
+max_diff = maximum(norm(ustrip.(f1) - ustrip.(f2)) for (f1, f2) in zip(F_ace_site, F_ace_pot))
+@test max_diff < 1e-10
+println("WrappedSiteCalculator(ETACE) forces match ETACEPotential: OK")
+
+##
+
+@info("Testing StackedCalculator construction")
+
+# Create stacked calculator with E0 + ACE (both wrapped)
+stacked = ETM.StackedCalculator((E0_calc, ace_site_calc))
+
+@test ustrip(u"Å", ETM.cutoff_radius(stacked)) == rcut
+@test length(stacked.calcs) == 2
+println("StackedCalculator construction: OK")
+
+##
+
+@info("Testing StackedCalculator energy consistency")
+
+for ntest = 1:10
+   local sys, E_stacked, E_separate
+
+   sys = rand_struct()
+
+   # Energy from stacked calculator
+   E_stacked = AtomsCalculators.potential_energy(sys, stacked)
+
+   # Energy from separate evaluations
+   E_E0 = AtomsCalculators.potential_energy(sys, E0_calc)
+   E_ace = AtomsCalculators.potential_energy(sys, ace_site_calc)
+   E_separate = E_E0 + E_ace
+
+   print_tf(@test ustrip(E_stacked) ≈ ustrip(E_separate))
+end
+println()
+
+##
+
+@info("Testing StackedCalculator forces consistency")
+
+for ntest = 1:10
+   local sys, F_stacked, F_ace_only, max_diff
+
+   sys = rand_struct()
+
+   # Forces from stacked calculator
+   F_stacked = AtomsCalculators.forces(sys, stacked)
+
+   # Forces from ACE-only (E0 has zero forces)
+   F_ace_only = AtomsCalculators.forces(sys, et_calc)
+
+   # Should be identical since E0 contributes zero forces
+   max_diff = maximum(norm(ustrip.(f1) - ustrip.(f2)) for (f1, f2) in zip(F_stacked, F_ace_only))
+   print_tf(@test max_diff < 1e-10)
+end
+println()
+
+##
+
+@info("Testing StackedCalculator virial consistency")
+
+for ntest = 1:10
+   local sys, efv_stacked, efv_ace_only
+
+   sys = rand_struct()
+
+   efv_stacked = AtomsCalculators.energy_forces_virial(sys, stacked)
+   efv_ace_only = AtomsCalculators.energy_forces_virial(sys, et_calc)
+
+   # Virial should match (E0 has zero virial)
+   V_stacked = ustrip.(efv_stacked.virial)
+   V_ace_only = ustrip.(efv_ace_only.virial)
+
+   print_tf(@test norm(V_stacked - V_ace_only) / (norm(V_ace_only) + 1e-10) < 1e-10)
+end
+println()
+
+##
+
+@info("Testing StackedCalculator with E0 only")
+
+# Create stacked calculator with just E0
+E0_only_stacked = ETM.StackedCalculator((E0_calc,))
+
+sys = rand_struct()
+E = AtomsCalculators.potential_energy(sys, E0_only_stacked)
+F = AtomsCalculators.forces(sys, E0_only_stacked)
+
+# Energy should match E0_calc
+E_direct = AtomsCalculators.potential_energy(sys, E0_calc)
+@test ustrip(E) ≈ ustrip(E_direct)
+println("StackedCalculator(E0 only) energy: OK")
+
+# Forces should be zero
+@test all(norm(ustrip.(f)) < 1e-14 for f in F)
+println("StackedCalculator(E0 only) forces (zero): OK")
+
+##
+
+@info("All Phase 2 tests passed!")

From 657500ab1c149fe45afe45e25750a6d3b6b208e4 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 23 Dec 2025 12:09:03 +0000
Subject: [PATCH 03/35] Refactor StackedCalculator to separate file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move StackedCalculator to src/et_models/stackedcalc.jl for better
separation of concerns - it's a generic utility for combining
calculators, independent of ETACE-specific code.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/et_calculators.jl | 97 +-------------------------------
 src/et_models/et_models.jl      |  1 +
 src/et_models/stackedcalc.jl    | 98 +++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 94 deletions(-)
 create mode 100644 src/et_models/stackedcalc.jl

diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
index 989efec7d..0418fa562 100644
--- a/src/et_models/et_calculators.jl
+++ b/src/et_models/et_calculators.jl
@@ -6,8 +6,10 @@
 # - SiteEnergyModel interface: Any model producing per-site energies can implement this
 # - E0Model: One-body reference energies (constant per species)
 # - WrappedETACE: Wraps ETACE model with the SiteEnergyModel interface
-# - StackedCalculator: Combines multiple SiteEnergyModels into one calculator
+# - WrappedSiteCalculator: Converts SiteEnergyModel to AtomsCalculators interface
 # - ETACEPotential: Standalone calculator for simple use cases
+#
+# See also: stackedcalc.jl for StackedCalculator (combines multiple calculators)
 
 import AtomsCalculators
 import AtomsBase: AbstractSystem, ChemicalSpecies
@@ -245,99 +247,6 @@ function AtomsCalculators.energy_forces_virial(
 end
 
 
-# ============================================================================
-#  StackedCalculator - Combines multiple AtomsCalculators
-# ============================================================================
-
-"""
-    StackedCalculator{C<:Tuple}
-
-Combines multiple AtomsCalculators by summing their energy, forces, and virial.
-Each calculator in the tuple must implement the AtomsCalculators interface.
-
-This allows combining site-based calculators (via WrappedSiteCalculator) with
-calculators that don't have site decompositions (e.g., Coulomb, dispersion).
-
-# Example
-```julia
-# Wrap site energy models
-E0_calc = WrappedSiteCalculator(E0Model(Dict(:Si => -0.846)))
-ace_calc = WrappedSiteCalculator(WrappedETACE(et_model, ps, st, 5.5))
-
-# Stack them (could also add Coulomb, dispersion, etc.)
-calc = StackedCalculator((E0_calc, ace_calc))
-
-E = potential_energy(sys, calc)
-F = forces(sys, calc)
-```
-
-# Fields
-- `calcs::Tuple` - Tuple of calculators implementing AtomsCalculators interface
-"""
-struct StackedCalculator{C<:Tuple}
-   calcs::C
-end
-
-# Get maximum cutoff from all calculators (for informational purposes)
-function cutoff_radius(calc::StackedCalculator)
-   rcuts = [ustrip(u"Å", cutoff_radius(c)) for c in calc.calcs]
-   return maximum(rcuts) * u"Å"
-end
-
-# AtomsCalculators interface - sum contributions from all calculators
-AtomsCalculators.@generate_interface function AtomsCalculators.potential_energy(
-      sys::AbstractSystem, calc::StackedCalculator; kwargs...)
-   E_total = 0.0 * u"eV"
-   for c in calc.calcs
-      E_total += AtomsCalculators.potential_energy(sys, c)
-   end
-   return E_total
-end
-
-AtomsCalculators.@generate_interface function AtomsCalculators.forces(
-      sys::AbstractSystem, calc::StackedCalculator; kwargs...)
-   F_total = nothing
-   for c in calc.calcs
-      F = AtomsCalculators.forces(sys, c)
-      if F_total === nothing
-         F_total = F
-      else
-         F_total = F_total .+ F
-      end
-   end
-   return F_total
-end
-
-AtomsCalculators.@generate_interface function AtomsCalculators.virial(
-      sys::AbstractSystem, calc::StackedCalculator; kwargs...)
-   V_total = zeros(SMatrix{3,3,Float64,9}) * u"eV"
-   for c in calc.calcs
-      V_total += AtomsCalculators.virial(sys, c)
-   end
-   return V_total
-end
-
-function AtomsCalculators.energy_forces_virial(
-      sys::AbstractSystem, calc::StackedCalculator; kwargs...)
-   E_total = 0.0 * u"eV"
-   F_total = nothing
-   V_total = zeros(SMatrix{3,3,Float64,9}) * u"eV"
-
-   for c in calc.calcs
-      efv = AtomsCalculators.energy_forces_virial(sys, c)
-      E_total += efv.energy
-      V_total += efv.virial
-      if F_total === nothing
-         F_total = efv.forces
-      else
-         F_total = F_total .+ efv.forces
-      end
-   end
-
-   return (energy=E_total, forces=F_total, virial=V_total)
-end
-
-
 # ============================================================================
 #  ETACEPotential - Standalone calculator for ETACE models
 # ============================================================================
diff --git a/src/et_models/et_models.jl b/src/et_models/et_models.jl
index 333961ff3..9aedb182e 100644
--- a/src/et_models/et_models.jl
+++ b/src/et_models/et_models.jl
@@ -19,5 +19,6 @@ include("convert.jl")
 include("splinify.jl")
 
 include("et_calculators.jl")
+include("stackedcalc.jl")
 
 end
\ No newline at end of file
diff --git a/src/et_models/stackedcalc.jl b/src/et_models/stackedcalc.jl
new file mode 100644
index 000000000..a9bfef142
--- /dev/null
+++ b/src/et_models/stackedcalc.jl
@@ -0,0 +1,98 @@
+
+# StackedCalculator - Combines multiple AtomsCalculators
+#
+# Generic utility for combining multiple calculators by summing their
+# energy, forces, and virial contributions.
+
+import AtomsCalculators
+import AtomsBase: AbstractSystem
+using StaticArrays
+using Unitful
+
+"""
+    StackedCalculator{C<:Tuple}
+
+Combines multiple AtomsCalculators by summing their energy, forces, and virial.
+Each calculator in the tuple must implement the AtomsCalculators interface.
+
+This allows combining site-based calculators (via WrappedSiteCalculator) with
+calculators that don't have site decompositions (e.g., Coulomb, dispersion).
+
+# Example
+```julia
+# Wrap site energy models
+E0_calc = WrappedSiteCalculator(E0Model(Dict(:Si => -0.846)))
+ace_calc = WrappedSiteCalculator(WrappedETACE(et_model, ps, st, 5.5))
+
+# Stack them (could also add Coulomb, dispersion, etc.)
+calc = StackedCalculator((E0_calc, ace_calc))
+
+E = potential_energy(sys, calc)
+F = forces(sys, calc)
+```
+
+# Fields
+- `calcs::Tuple` - Tuple of calculators implementing AtomsCalculators interface
+"""
+struct StackedCalculator{C<:Tuple}
+   calcs::C
+end
+
+# Get maximum cutoff from all calculators (for informational purposes)
+function cutoff_radius(calc::StackedCalculator)
+   rcuts = [ustrip(u"Å", cutoff_radius(c)) for c in calc.calcs]
+   return maximum(rcuts) * u"Å"
+end
+
+# AtomsCalculators interface - sum contributions from all calculators
+AtomsCalculators.@generate_interface function AtomsCalculators.potential_energy(
+      sys::AbstractSystem, calc::StackedCalculator; kwargs...)
+   E_total = 0.0 * u"eV"
+   for c in calc.calcs
+      E_total += AtomsCalculators.potential_energy(sys, c)
+   end
+   return E_total
+end
+
+AtomsCalculators.@generate_interface function AtomsCalculators.forces(
+      sys::AbstractSystem, calc::StackedCalculator; kwargs...)
+   F_total = nothing
+   for c in calc.calcs
+      F = AtomsCalculators.forces(sys, c)
+      if F_total === nothing
+         F_total = F
+      else
+         F_total = F_total .+ F
+      end
+   end
+   return F_total
+end
+
+AtomsCalculators.@generate_interface function AtomsCalculators.virial(
+      sys::AbstractSystem, calc::StackedCalculator; kwargs...)
+   V_total = zeros(SMatrix{3,3,Float64,9}) * u"eV"
+   for c in calc.calcs
+      V_total += AtomsCalculators.virial(sys, c)
+   end
+   return V_total
+end
+
+function AtomsCalculators.energy_forces_virial(
+      sys::AbstractSystem, calc::StackedCalculator; kwargs...)
+   E_total = 0.0 * u"eV"
+   F_total = nothing
+   V_total = zeros(SMatrix{3,3,Float64,9}) * u"eV"
+
+   for c in calc.calcs
+      efv = AtomsCalculators.energy_forces_virial(sys, c)
+      E_total += efv.energy
+      V_total += efv.virial
+      if F_total === nothing
+         F_total = efv.forces
+      else
+         F_total = F_total .+ efv.forces
+      end
+   end
+
+   return (energy=E_total, forces=F_total, virial=V_total)
+end

From 07b3641ff18664e1b7366ea98943338b093c5403 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 23 Dec 2025 14:50:12 +0000
Subject: [PATCH 04/35] Phase 5: Training assembly functions for ETACEPotential
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements linear least squares training support:

- length_basis(): Returns number of linear parameters (nbasis * nspecies)
- energy_forces_virial_basis(): Compute basis values for E/F/V
- potential_energy_basis(): Faster energy-only basis computation
- get_linear_parameters(): Extract readout weights as flat vector
- set_linear_parameters!(): Set readout weights from flat vector

The basis functions allow linear fitting via:
  E = dot(E_basis, θ)
  F = F_basis * θ
  V = sum(θ .* V_basis)

Tests verify that linear combination of basis with current parameters
reproduces the direct energy/forces/virial evaluation.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/et_calculators.jl       | 165 ++++++++++++++++++++++++++
 test/et_models/test_et_calculators.jl | 101 ++++++++++++++++
 2 files changed, 266 insertions(+)

diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
index 0418fa562..383d8af55 100644
--- a/src/et_models/et_calculators.jl
+++ b/src/et_models/et_calculators.jl
@@ -359,3 +359,168 @@ function AtomsCalculators.energy_forces_virial(
    )
 end
 
+
+# ============================================================================
+#  Training Assembly Interface
+# ============================================================================
+#
+# These functions compute the basis values for linear least squares fitting.
+# The linear parameters are the readout weights W[1, k, s] where:
+#   k = basis function index (1:nbasis)
+#   s = species index (1:nspecies)
+#
+# Total parameters: nbasis * nspecies
+#
+# Energy basis: E = ∑_i ∑_k W[k, species[i]] * 𝔹[i, k]
+# Force basis:  F_atom = -∑ edges ∂E/∂r_edge, computed per basis function
+# Virial basis: V = -∑ edges (∂E/∂r_edge) ⊗ r_edge, computed per basis function
+
+"""
+    length_basis(calc::ETACEPotential)
+
+Return the number of linear parameters in the model (nbasis * nspecies).
+"""
+function length_basis(calc::ETACEPotential)
+   nbasis = calc.model.readout.in_dim
+   nspecies = calc.model.readout.ncat
+   return nbasis * nspecies
+end
+
+"""
+    energy_forces_virial_basis(sys::AbstractSystem, calc::ETACEPotential)
+
+Compute the basis functions for energy, forces, and virial.
+Returns a named tuple with:
+- `energy::Vector{Float64}` - length = length_basis(calc)
+- `forces::Matrix{SVector{3,Float64}}` - size = (natoms, length_basis)
+- `virial::Vector{SMatrix{3,3,Float64}}` - length = length_basis(calc)
+
+The linear combination of basis values with parameters gives:
+  E = dot(energy, params)
+  F = forces * params
+  V = sum(params .* virial)
+"""
+function energy_forces_virial_basis(sys::AbstractSystem, calc::ETACEPotential)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+
+   # Get basis and jacobian
+   𝔹, ∂𝔹 = site_basis_jacobian(calc.model, G, calc.ps, calc.st)
+
+   natoms = length(sys)
+   nbasis = calc.model.readout.in_dim
+   nspecies = calc.model.readout.ncat
+   nparams = nbasis * nspecies
+
+   # Species indices for each node
+   iZ = calc.model.readout.selector.(G.node_data)
+
+   # Initialize outputs
+   E_basis = zeros(nparams)
+   F_basis = zeros(SVector{3, Float64}, natoms, nparams)
+   V_basis = zeros(SMatrix{3, 3, Float64, 9}, nparams)
+
+   # Compute basis values for each parameter (k, s) pair
+   # Parameter index: p = (s-1) * nbasis + k
+   for s in 1:nspecies
+      for k in 1:nbasis
+         p = (s - 1) * nbasis + k
+
+         # Energy basis: sum of 𝔹[i, k] for atoms of species s
+         for i in 1:length(G.node_data)
+            if iZ[i] == s
+               E_basis[p] += 𝔹[i, k]
+            end
+         end
+
+         # Create unit weight: W[1, k, s] = 1, others = 0
+         # Then compute edge gradients and convert to forces/virial
+         W_unit = zeros(1, nbasis, nspecies)
+         W_unit[1, k, s] = 1.0
+
+         # Compute edge gradients using the reconstruction pattern
+         # ∇Ei = ∂𝔹[:, i, :] * W[1, :, iZ[i]] for each node i
+         ∇Ei = reduce(hcat, ∂𝔹[:, i, :] * W_unit[1, :, iZ[i]] for i in 1:length(iZ))
+         ∇Ei_3d = reshape(∇Ei, size(∇Ei)..., 1)
+
+         # Convert to edge-indexed format with 3D vectors
+         ∇E_edges = ET.rev_reshape_embedding(∇Ei_3d, G)[:]
+
+         # Convert edge gradients to atomic forces (negate for forces)
+         F_basis[:, p] = -ET.Atoms.forces_from_edge_grads(sys, G, ∇E_edges)
+
+         # Compute virial: V = -∑ (∂E/∂𝐫ij) ⊗ 𝐫ij
+         V = zeros(SMatrix{3, 3, Float64, 9})
+         for (edge, ∂edge) in zip(G.edge_data, ∇E_edges)
+            V -= ∂edge.𝐫 * edge.𝐫'
+         end
+         V_basis[p] = V
+      end
+   end
+
+   return (
+      energy = E_basis * u"eV",
+      forces = F_basis .* u"eV/Å",
+      virial = V_basis * u"eV"
+   )
+end
+
+"""
+    potential_energy_basis(sys::AbstractSystem, calc::ETACEPotential)
+
+Compute only the energy basis (faster when forces/virial not needed).
+"""
+function potential_energy_basis(sys::AbstractSystem, calc::ETACEPotential)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+
+   # Get basis values
+   𝔹 = site_basis(calc.model, G, calc.ps, calc.st)
+
+   nbasis = calc.model.readout.in_dim
+   nspecies = calc.model.readout.ncat
+   nparams = nbasis * nspecies
+
+   # Species indices for each node
+   iZ = calc.model.readout.selector.(G.node_data)
+
+   # Compute energy basis
+   E_basis = zeros(nparams)
+   for s in 1:nspecies
+      for k in 1:nbasis
+         p = (s - 1) * nbasis + k
+         for i in 1:length(G.node_data)
+            if iZ[i] == s
+               E_basis[p] += 𝔹[i, k]
+            end
+         end
+      end
+   end
+
+   return E_basis * u"eV"
+end
+
+"""
+    get_linear_parameters(calc::ETACEPotential)
+
+Extract the linear parameters (readout weights) as a flat vector.
+Parameters are ordered as: [W[1,:,1]; W[1,:,2]; ... ; W[1,:,nspecies]]
+"""
+function get_linear_parameters(calc::ETACEPotential)
+   return vec(calc.ps.readout.W)
+end
+
+"""
+    set_linear_parameters!(calc::ETACEPotential, θ::AbstractVector)
+
+Set the linear parameters (readout weights) from a flat vector.
+"""
+function set_linear_parameters!(calc::ETACEPotential, θ::AbstractVector)
+   nbasis = calc.model.readout.in_dim
+   nspecies = calc.model.readout.ncat
+   @assert length(θ) == nbasis * nspecies
+
+   # Reshape and copy into ps
+   new_W = reshape(θ, 1, nbasis, nspecies)
+   calc.ps = merge(calc.ps, (readout = merge(calc.ps.readout, (W = new_W,)),))
+   return calc
+end
+
diff --git a/test/et_models/test_et_calculators.jl b/test/et_models/test_et_calculators.jl
index 02da8a816..878a1a0e4 100644
--- a/test/et_models/test_et_calculators.jl
+++ b/test/et_models/test_et_calculators.jl
@@ -506,3 +506,104 @@ println("StackedCalculator(E0 only) forces (zero): OK")
 ##
 
 @info("All Phase 2 tests passed!")
+
+## ============================================================================
+##  Phase 5: Training Assembly Tests
+## ============================================================================
+
+@info("Testing Phase 5: Training assembly functions")
+
+##
+
+@info("Testing length_basis")
+nparams = ETM.length_basis(et_calc)
+nbasis = et_model.readout.in_dim
+nspecies = et_model.readout.ncat
+@test nparams == nbasis * nspecies
+println("length_basis: OK (nparams=$nparams, nbasis=$nbasis, nspecies=$nspecies)")
+
+##
+
+@info("Testing get/set_linear_parameters round-trip")
+θ_orig = ETM.get_linear_parameters(et_calc)
+@test length(θ_orig) == nparams
+
+# Modify and restore
+θ_test = randn(nparams)
+ETM.set_linear_parameters!(et_calc, θ_test)
+θ_check = ETM.get_linear_parameters(et_calc)
+@test θ_check ≈ θ_test
+
+# Restore original
+ETM.set_linear_parameters!(et_calc, θ_orig)
+@test ETM.get_linear_parameters(et_calc) ≈ θ_orig
+println("get/set_linear_parameters round-trip: OK")
+
+##
+
+@info("Testing potential_energy_basis")
+sys = rand_struct()
+E_basis = ETM.potential_energy_basis(sys, et_calc)
+@test length(E_basis) == nparams
+@test eltype(ustrip.(E_basis)) <: Real
+println("potential_energy_basis shape: OK")
+
+##
+
+@info("Testing energy_forces_virial_basis")
+efv_basis = ETM.energy_forces_virial_basis(sys, et_calc)
+natoms = length(sys)
+
+@test length(efv_basis.energy) == nparams
+@test size(efv_basis.forces) == (natoms, nparams)
+@test length(efv_basis.virial) == nparams
+println("energy_forces_virial_basis shapes: OK")
+
+##
+
+@info("Testing linear combination gives correct energy")
+
+# E = dot(E_basis, θ) should match potential_energy
+θ = ETM.get_linear_parameters(et_calc)
+E_from_basis = dot(ustrip.(efv_basis.energy), θ)
+E_direct = ustrip(u"eV", AtomsCalculators.potential_energy(sys, et_calc))
+
+print_tf(@test E_from_basis ≈ E_direct rtol=1e-10)
+println()
+println("Energy from basis: OK")
+
+##
+
+@info("Testing linear combination gives correct forces")
+
+# F = efv_basis.forces * θ should match forces
+F_from_basis = efv_basis.forces * θ
+F_direct = AtomsCalculators.forces(sys, et_calc)
+
+max_diff = maximum(norm(ustrip.(f1) - ustrip.(f2)) for (f1, f2) in zip(F_from_basis, F_direct))
+print_tf(@test max_diff < 1e-10)
+println()
+println("Forces from basis: OK (max_diff = $max_diff)")
+
+##
+
+@info("Testing linear combination gives correct virial")
+
+# V = sum(θ .* virial) should match virial
+V_from_basis = sum(θ[i] * ustrip.(efv_basis.virial[i]) for i in 1:nparams)
+V_direct = ustrip.(AtomsCalculators.virial(sys, et_calc))
+
+virial_diff = maximum(abs.(V_from_basis - V_direct))
+print_tf(@test virial_diff < 1e-10)
+println()
+println("Virial from basis: OK (max_diff = $virial_diff)")
+
+##
+
+@info("Testing potential_energy_basis matches energy from efv_basis")
+@test ustrip.(E_basis) ≈ ustrip.(efv_basis.energy)
+println("potential_energy_basis consistency: OK")
+
+##
+
+@info("All Phase 5 tests passed!")

From e4c661f1a6f136155b234c1073617afad08f1359 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 23 Dec 2025 16:45:49 +0000
Subject: [PATCH 05/35] Optimize StackedCalculator with @generated functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use compile-time loop unrolling via @generated functions for efficient
summation over calculators. The N type parameter allows generating
specialized code like:

  E_1 = potential_energy(sys, calc.calcs[1])
  E_2 = potential_energy(sys, calc.calcs[2])
  return E_1 + E_2

instead of runtime loops. This enables better inlining and type inference
when the number of calculators is small and known at compile time.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/stackedcalc.jl | 148 +++++++++++++++++++++++++----------
 1 file changed, 105 insertions(+), 43 deletions(-)

diff --git a/src/et_models/stackedcalc.jl b/src/et_models/stackedcalc.jl
index a9bfef142..d58a41cf7 100644
--- a/src/et_models/stackedcalc.jl
+++ b/src/et_models/stackedcalc.jl
@@ -3,14 +3,18 @@
 #
 # Generic utility for combining multiple calculators by summing their
 # energy, forces, and virial contributions.
+#
+# Uses @generated functions with Base.Cartesian for efficient
+# compile-time loop unrolling when the number of calculators is known.
 
 import AtomsCalculators
 import AtomsBase: AbstractSystem
 using StaticArrays
 using Unitful
+using Base.Cartesian: @nexprs, @ntuple, @ncall
 
 """
-    StackedCalculator{C<:Tuple}
+    StackedCalculator{N, C<:Tuple}
 
 Combines multiple AtomsCalculators by summing their energy, forces, and virial.
 Each calculator in the tuple must implement the AtomsCalculators interface.
@@ -18,6 +22,9 @@ Each calculator in the tuple must implement the AtomsCalculators interface.
 This allows combining site-based calculators (via WrappedSiteCalculator) with
 calculators that don't have site decompositions (e.g., Coulomb, dispersion).
 
+The implementation uses compile-time loop unrolling for efficiency when
+the number of calculators is small and known at compile time.
+
 # Example
 ```julia
 # Wrap site energy models
@@ -32,67 +39,122 @@ F = forces(sys, calc)
 ```
 
 # Fields
-- `calcs::Tuple` - Tuple of calculators implementing AtomsCalculators interface
+- `calcs::Tuple` - Tuple of N calculators implementing AtomsCalculators interface
 """
-struct StackedCalculator{C<:Tuple}
+struct StackedCalculator{N, C<:Tuple}
    calcs::C
 end
 
+# Constructor that infers N from the tuple length
+StackedCalculator(calcs::C) where {C<:Tuple} = StackedCalculator{length(C.parameters), C}(calcs)
+
 # Get maximum cutoff from all calculators (for informational purposes)
-function cutoff_radius(calc::StackedCalculator)
-   rcuts = [ustrip(u"Å", cutoff_radius(c)) for c in calc.calcs]
-   return maximum(rcuts) * u"Å"
+@generated function cutoff_radius(calc::StackedCalculator{N}) where {N}
+   quote
+      rcuts = @ntuple $N i -> ustrip(u"Å", cutoff_radius(calc.calcs[i]))
+      return maximum(rcuts) * u"Å"
+   end
+end
+
+# ============================================================================
+#  Efficient implementations using @generated for compile-time unrolling
+# ============================================================================
+
+# Helper to generate sum expression: E_1 + E_2 + ... + E_N
+function _gen_sum(N, prefix)
+   if N == 1
+      return Symbol(prefix, "_1")
+   else
+      ex = Symbol(prefix, "_1")
+      for i in 2:N
+         ex = :($ex + $(Symbol(prefix, "_", i)))
+      end
+      return ex
+   end
+end
+
+# Helper to generate broadcast sum: F_1 .+ F_2 .+ ... .+ F_N
+function _gen_broadcast_sum(N, prefix)
+   if N == 1
+      return Symbol(prefix, "_1")
+   else
+      ex = Symbol(prefix, "_1")
+      for i in 2:N
+         ex = :($ex .+ $(Symbol(prefix, "_", i)))
+      end
+      return ex
+   end
+end
+
+@generated function _stacked_energy(sys::AbstractSystem, calc::StackedCalculator{N}) where {N}
+   assignments = [:($(Symbol("E_", i)) = AtomsCalculators.potential_energy(sys, calc.calcs[$i])) for i in 1:N]
+   sum_expr = _gen_sum(N, "E")
+   quote
+      $(assignments...)
+      return $sum_expr
+   end
+end
+
+@generated function _stacked_forces(sys::AbstractSystem, calc::StackedCalculator{N}) where {N}
+   assignments = [:($(Symbol("F_", i)) = AtomsCalculators.forces(sys, calc.calcs[$i])) for i in 1:N]
+   sum_expr = _gen_broadcast_sum(N, "F")
+   quote
+      $(assignments...)
+      return $sum_expr
+   end
+end
+
+@generated function _stacked_virial(sys::AbstractSystem, calc::StackedCalculator{N}) where {N}
+   assignments = [:($(Symbol("V_", i)) = AtomsCalculators.virial(sys, calc.calcs[$i])) for i in 1:N]
+   sum_expr = _gen_sum(N, "V")
+   quote
+      $(assignments...)
+      return $sum_expr
+   end
+end
+
+@generated function _stacked_efv(sys::AbstractSystem, calc::StackedCalculator{N}) where {N}
+   # Generate assignments for each calculator
+   assignments = [:($(Symbol("efv_", i)) = AtomsCalculators.energy_forces_virial(sys, calc.calcs[$i])) for i in 1:N]
+
+   # Generate sum expressions
+   E_exprs = [:($(Symbol("efv_", i)).energy) for i in 1:N]
+   F_exprs = [:($(Symbol("efv_", i)).forces) for i in 1:N]
+   V_exprs = [:($(Symbol("efv_", i)).virial) for i in 1:N]
+
+   E_sum = N == 1 ? E_exprs[1] : reduce((a, b) -> :($a + $b), E_exprs)
+   F_sum = N == 1 ? F_exprs[1] : reduce((a, b) -> :($a .+ $b), F_exprs)
+   V_sum = N == 1 ? V_exprs[1] : reduce((a, b) -> :($a + $b), V_exprs)
+
+   quote
+      $(assignments...)
+      E_total = $E_sum
+      F_total = $F_sum
+      V_total = $V_sum
+      return (energy=E_total, forces=F_total, virial=V_total)
+   end
 end
 
-# AtomsCalculators interface - sum contributions from all calculators
+# ============================================================================
+#  AtomsCalculators interface
+# ============================================================================
+
 AtomsCalculators.@generate_interface function AtomsCalculators.potential_energy(
       sys::AbstractSystem, calc::StackedCalculator; kwargs...)
-   E_total = 0.0 * u"eV"
-   for c in calc.calcs
-      E_total += AtomsCalculators.potential_energy(sys, c)
-   end
-   return E_total
+   return _stacked_energy(sys, calc)
 end
 
 AtomsCalculators.@generate_interface function AtomsCalculators.forces(
       sys::AbstractSystem, calc::StackedCalculator; kwargs...)
-   F_total = nothing
-   for c in calc.calcs
-      F = AtomsCalculators.forces(sys, c)
-      if F_total === nothing
-         F_total = F
-      else
-         F_total = F_total .+ F
-      end
-   end
-   return F_total
+   return _stacked_forces(sys, calc)
 end
 
 AtomsCalculators.@generate_interface function AtomsCalculators.virial(
       sys::AbstractSystem, calc::StackedCalculator; kwargs...)
-   V_total = zeros(SMatrix{3,3,Float64,9}) * u"eV"
-   for c in calc.calcs
-      V_total += AtomsCalculators.virial(sys, c)
-   end
-   return V_total
+   return _stacked_virial(sys, calc)
 end
 
 function AtomsCalculators.energy_forces_virial(
       sys::AbstractSystem, calc::StackedCalculator; kwargs...)
-   E_total = 0.0 * u"eV"
-   F_total = nothing
-   V_total = zeros(SMatrix{3,3,Float64,9}) * u"eV"
-
-   for c in calc.calcs
-      efv = AtomsCalculators.energy_forces_virial(sys, c)
-      E_total += efv.energy
-      V_total += efv.virial
-      if F_total === nothing
-         F_total = efv.forces
-      else
-         F_total = F_total .+ efv.forces
-      end
-   end
-
-   return (energy=E_total, forces=F_total, virial=V_total)
+   return _stacked_efv(sys, calc)
 end

From 5da6c2517b478b834eb133e32349491bab9f7f41 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 23 Dec 2025 17:21:44 +0000
Subject: [PATCH 06/35] Add benchmark scripts for ACE vs ETACE performance
 comparison
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- benchmark_comparison.jl: Energy benchmarks (CPU + GPU)
- benchmark_forces.jl: Forces benchmarks (CPU only)

Results show:
- Energy: ETACE CPU 1.7-2.2x faster, ETACE GPU up to 87x faster
- Forces: ETACE CPU 7.7-11.4x faster than ACE

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 test/benchmark_comparison.jl | 172 +++++++++++++++++++++++++++++++++++
 test/benchmark_forces.jl     | 122 +++++++++++++++++++++++++
 2 files changed, 294 insertions(+)
 create mode 100644 test/benchmark_comparison.jl
 create mode 100644 test/benchmark_forces.jl

diff --git a/test/benchmark_comparison.jl b/test/benchmark_comparison.jl
new file mode 100644
index 000000000..5ad9fea45
--- /dev/null
+++ b/test/benchmark_comparison.jl
@@ -0,0 +1,172 @@
+using ACEpotentials
+M = ACEpotentials.Models
+ETM = ACEpotentials.ETModels
+
+import EquivariantTensors as ET
+import AtomsCalculators
+using StaticArrays, Lux, Random, LuxCore, LinearAlgebra
+using AtomsBase, AtomsBuilder, Unitful
+using BenchmarkTools
+using Printf
+
+# GPU detection (simplified from ET test utils)
+dev = identity
+has_cuda = false
+
+try
+   using CUDA
+   if CUDA.functional()
+      @info "Using CUDA"
+      CUDA.versioninfo()
+      global has_cuda = true
+      global dev = cu
+   else
+      @info "CUDA is not functional"
+   end
+catch e
+   @info "Couldn't load CUDA: $e"
+end
+
+if !has_cuda
+   @info "No GPU available. Using CPU only."
+end
+
+rng = Random.MersenneTwister(1234)
+
+# Build models
+elements = (:Si, :O)
+level = M.TotalDegree()
+max_level = 8
+order = 2
+maxl = 4
+
+rin0cuts = M._default_rin0cuts(elements)
+rin0cuts = (x -> (rin = x.rin, r0 = x.r0, rcut = 5.5)).(rin0cuts)
+
+model = M.ace_model(; elements = elements, order = order,
+            Ytype = :solid, level = level, max_level = max_level,
+            maxl = maxl, pair_maxn = max_level,
+            rin0cuts = rin0cuts,
+            init_WB = :glorot_normal, init_Wpair = :glorot_normal)
+
+ps, st = Lux.setup(rng, model)
+
+# Kill the pair basis for fair comparison
+for s in model.pairbasis.splines
+   s.itp.itp.coefs[:] *= 0
+end
+
+# Create old ACE calculator
+ace_calc = M.ACEPotential(model, ps, st)
+
+# Convert to ETACE
+et_model = ETM.convert2et(model)
+et_ps, et_st = LuxCore.setup(rng, et_model)
+
+# Copy parameters
+et_ps.rembed.post.W[:, :, 1] = ps.rbasis.Wnlq[:, :, 1, 1]
+et_ps.rembed.post.W[:, :, 2] = ps.rbasis.Wnlq[:, :, 1, 2]
+et_ps.rembed.post.W[:, :, 3] = ps.rbasis.Wnlq[:, :, 2, 1]
+et_ps.rembed.post.W[:, :, 4] = ps.rbasis.Wnlq[:, :, 2, 2]
+et_ps.readout.W[1, :, 1] .= ps.WB[:, 1]
+et_ps.readout.W[1, :, 2] .= ps.WB[:, 2]
+
+rcut = maximum(a.rcut for a in model.pairbasis.rin0cuts)
+
+# GPU setup
+has_gpu = has_cuda
+if has_gpu
+    et_ps_32 = ET.float32(et_ps)
+    et_st_32 = ET.float32(et_st)
+    et_ps_gpu = dev(et_ps_32)
+    et_st_gpu = dev(et_st_32)
+end
+
+# Function to create system of given size
+function make_system(n_repeat)
+    sys = AtomsBuilder.bulk(:Si, cubic=true) * n_repeat
+    rattle!(sys, 0.1u"Å")
+    AtomsBuilder.randz!(sys, [:Si => 0.5, :O => 0.5])
+    return sys
+end
+
+# Benchmark configurations (tuple for bulk multiplication)
+configs = [
+    (1, 1, 1),   # 8 atoms
+    (2, 1, 1),   # 16 atoms
+    (2, 2, 2),   # 64 atoms
+    (3, 3, 2),   # 144 atoms
+    (4, 4, 2),   # 256 atoms
+    (4, 4, 4),   # 512 atoms
+    (5, 5, 4),   # 800 atoms
+]
+
+println()
+println("=" ^ 85)
+println("BENCHMARK: ACE (CPU) vs ETACE (CPU) vs ETACE (GPU)")
+println("=" ^ 85)
+println()
+
+# Header
+if has_gpu
+    println("| Atoms | Edges   | ACE CPU (ms) | ETACE CPU (ms) | ETACE GPU (ms) | CPU Speedup | GPU Speedup |")
+    println("|-------|---------|--------------|----------------|----------------|-------------|-------------|")
+else
+    println("| Atoms | Edges   | ACE CPU (ms) | ETACE CPU (ms) | CPU Speedup |")
+    println("|-------|---------|--------------|----------------|-------------|")
+end
+
+for cfg in configs
+    sys = make_system(cfg)
+    natoms = length(sys)
+
+    # Count edges
+    G = ET.Atoms.interaction_graph(sys, rcut * u"Å")
+    nedges = length(G.edge_data)
+
+    # Warmup ACE
+    _ = AtomsCalculators.potential_energy(sys, ace_calc)
+
+    # Warmup ETACE CPU
+    _ = sum(et_model(G, et_ps, et_st)[1])
+
+    # Benchmark ACE CPU
+    t_ace = @belapsed AtomsCalculators.potential_energy($sys, $ace_calc) samples=5 evals=3
+    t_ace_ms = t_ace * 1000
+
+    # Benchmark ETACE CPU (graph construction NOT included for fair comparison with GPU)
+    t_etace_cpu = @belapsed sum($et_model($G, $et_ps, $et_st)[1]) samples=5 evals=3
+    t_etace_cpu_ms = t_etace_cpu * 1000
+
+    cpu_speedup = t_ace_ms / t_etace_cpu_ms
+
+    if has_gpu
+        # Convert graph to GPU (Float32)
+        G_32 = ET.float32(G)
+        G_gpu = dev(G_32)
+
+        # Warmup GPU
+        _ = sum(et_model(G_gpu, et_ps_gpu, et_st_gpu)[1])
+
+        # Benchmark ETACE GPU (graph already on GPU)
+        t_etace_gpu = @belapsed sum($et_model($G_gpu, $et_ps_gpu, $et_st_gpu)[1]) samples=5 evals=3
+        t_etace_gpu_ms = t_etace_gpu * 1000
+
+        gpu_speedup = t_ace_ms / t_etace_gpu_ms
+
+        @printf("| %5d | %7d | %12.2f | %14.2f | %14.2f | %10.1fx | %10.1fx |\n",
+                natoms, nedges, t_ace_ms, t_etace_cpu_ms, t_etace_gpu_ms, cpu_speedup, gpu_speedup)
+    else
+        @printf("| %5d | %7d | %12.2f | %14.2f | %10.1fx |\n",
+                natoms, nedges, t_ace_ms, t_etace_cpu_ms, cpu_speedup)
+    end
+end
+
+println()
+println("Notes:")
+println("- ACE CPU: Original ACEpotentials model (pair basis zeroed for fair comparison)")
+println("- ETACE CPU: EquivariantTensors backend on CPU (Float64)")
+println("- ETACE GPU: EquivariantTensors backend on GPU (Float32)")
+println("- CPU Speedup = ACE CPU / ETACE CPU")
+println("- GPU Speedup = ACE CPU / ETACE GPU")
+println("- Graph construction time NOT included (currently CPU-only)")
diff --git a/test/benchmark_forces.jl b/test/benchmark_forces.jl
new file mode 100644
index 000000000..afef3a864
--- /dev/null
+++ b/test/benchmark_forces.jl
@@ -0,0 +1,122 @@
+using ACEpotentials
+M = ACEpotentials.Models
+ETM = ACEpotentials.ETModels
+
+import EquivariantTensors as ET
+import AtomsCalculators
+using StaticArrays, Lux, Random, LuxCore, LinearAlgebra
+using AtomsBase, AtomsBuilder, Unitful
+using BenchmarkTools
+using Printf
+
+rng = Random.MersenneTwister(1234)
+
+# Build models
+elements = (:Si, :O)
+level = M.TotalDegree()
+max_level = 8
+order = 2
+maxl = 4
+
+rin0cuts = M._default_rin0cuts(elements)
+rin0cuts = (x -> (rin = x.rin, r0 = x.r0, rcut = 5.5)).(rin0cuts)
+
+model = M.ace_model(; elements = elements, order = order,
+            Ytype = :solid, level = level, max_level = max_level,
+            maxl = maxl, pair_maxn = max_level,
+            rin0cuts = rin0cuts,
+            init_WB = :glorot_normal, init_Wpair = :glorot_normal)
+
+ps, st = Lux.setup(rng, model)
+
+# Kill the pair basis for fair comparison
+for s in model.pairbasis.splines
+   s.itp.itp.coefs[:] *= 0
+end
+
+# Create old ACE calculator
+ace_calc = M.ACEPotential(model, ps, st)
+
+# Convert to ETACE
+et_model = ETM.convert2et(model)
+et_ps, et_st = LuxCore.setup(rng, et_model)
+
+# Copy parameters
+et_ps.rembed.post.W[:, :, 1] = ps.rbasis.Wnlq[:, :, 1, 1]
+et_ps.rembed.post.W[:, :, 2] = ps.rbasis.Wnlq[:, :, 1, 2]
+et_ps.rembed.post.W[:, :, 3] = ps.rbasis.Wnlq[:, :, 2, 1]
+et_ps.rembed.post.W[:, :, 4] = ps.rbasis.Wnlq[:, :, 2, 2]
+et_ps.readout.W[1, :, 1] .= ps.WB[:, 1]
+et_ps.readout.W[1, :, 2] .= ps.WB[:, 2]
+
+rcut = maximum(a.rcut for a in model.pairbasis.rin0cuts)
+
+# Function to create system of given size
+function make_system(n_repeat)
+    sys = AtomsBuilder.bulk(:Si, cubic=true) * n_repeat
+    rattle!(sys, 0.1u"Å")
+    AtomsBuilder.randz!(sys, [:Si => 0.5, :O => 0.5])
+    return sys
+end
+
+# ETACE forces function (CPU only for now)
+function etace_forces(et_model, G, sys, et_ps, et_st)
+    ∂G = ETM.site_grads(et_model, G, et_ps, et_st)
+    return -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
+end
+
+# Benchmark configurations (tuple for bulk multiplication)
+configs = [
+    (1, 1, 1),   # 8 atoms
+    (2, 1, 1),   # 16 atoms
+    (2, 2, 2),   # 64 atoms
+    (3, 3, 2),   # 144 atoms
+    (4, 4, 2),   # 256 atoms
+    (4, 4, 4),   # 512 atoms
+    (5, 5, 4),   # 800 atoms
+]
+
+println()
+println("=" ^ 70)
+println("BENCHMARK: Forces - ACE (CPU) vs ETACE (CPU)")
+println("=" ^ 70)
+println()
+
+# Header
+println("| Atoms | Edges   | ACE CPU (ms) | ETACE CPU (ms) | CPU Speedup |")
+println("|-------|---------|--------------|----------------|-------------|")
+
+for cfg in configs
+    sys = make_system(cfg)
+    natoms = length(sys)
+
+    # Count edges
+    G = ET.Atoms.interaction_graph(sys, rcut * u"Å")
+    nedges = length(G.edge_data)
+
+    # Warmup ACE
+    _ = AtomsCalculators.forces(sys, ace_calc)
+
+    # Warmup ETACE CPU
+    _ = etace_forces(et_model, G, sys, et_ps, et_st)
+
+    # Benchmark ACE CPU
+    t_ace = @belapsed AtomsCalculators.forces($sys, $ace_calc) samples=5 evals=3
+    t_ace_ms = t_ace * 1000
+
+    # Benchmark ETACE CPU (graph construction NOT included for fair comparison)
+    t_etace_cpu = @belapsed etace_forces($et_model, $G, $sys, $et_ps, $et_st) samples=5 evals=3
+    t_etace_cpu_ms = t_etace_cpu * 1000
+
+    cpu_speedup = t_ace_ms / t_etace_cpu_ms
+
+    @printf("| %5d | %7d | %12.2f | %14.2f | %10.1fx |\n",
+            natoms, nedges, t_ace_ms, t_etace_cpu_ms, cpu_speedup)
+end
+
+println()
+println("Notes:")
+println("- ACE CPU: Original ACEpotentials model (pair basis zeroed for fair comparison)")
+println("- ETACE CPU: EquivariantTensors backend on CPU (Float64)")
+println("- CPU Speedup = ACE CPU / ETACE CPU")
+println("- Graph construction time NOT included")

From 265053809134ee76f9eb67afd0a6d1b465b5f33e Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 23 Dec 2025 19:35:07 +0000
Subject: [PATCH 07/35] Update plan with implementation progress and benchmark
 results
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Phase 1, 2, 5 complete
- Phase 3 (E0/PairModel) assigned to maintainer
- Added benchmark results: GPU up to 87x faster, forces 8-11x faster
- Documented all new files and test coverage

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 docs/plans/et_calculators_plan.md | 208 ++++++++++++++++++++++++++++++
 1 file changed, 208 insertions(+)
 create mode 100644 docs/plans/et_calculators_plan.md

diff --git a/docs/plans/et_calculators_plan.md b/docs/plans/et_calculators_plan.md
new file mode 100644
index 000000000..342800354
--- /dev/null
+++ b/docs/plans/et_calculators_plan.md
@@ -0,0 +1,208 @@
+# Plan: ETACE Calculator Interface and Training Support
+
+## Overview
+
+Create calculator wrappers and training assembly for the new ETACE backend, integrating with EquivariantTensors.jl.
+
+**Status**: ✅ Core implementation complete. Awaiting maintainer for E0/PairModel.
+
+**Branch**: `jrk/etcalculators` (based on `acesuit/co/etback`)
+
+---
+
+## Progress Summary
+
+| Phase | Description | Status |
+|-------|-------------|--------|
+| Phase 1 | ETACEPotential with AtomsCalculators interface | ✅ Complete |
+| Phase 2 | WrappedSiteCalculator + StackedCalculator | ✅ Complete |
+| Phase 3 | E0Model + PairModel | 🔄 Maintainer will implement |
+| Phase 5 | Training assembly functions | ✅ Complete |
+| Benchmarks | Performance comparison scripts | ✅ Complete |
+
+### Benchmark Results
+
+**Energy (test/benchmark_comparison.jl)**:
+| Atoms | ACE CPU (ms) | ETACE CPU (ms) | ETACE GPU (ms) | CPU Speedup | GPU Speedup |
+|-------|--------------|----------------|----------------|-------------|-------------|
+| 8 | 0.87 | 0.43 | 0.39 | 2.0x | 2.2x |
+| 64 | 5.88 | 2.79 | 0.45 | 2.1x | 13.0x |
+| 256 | 17.77 | 11.81 | 0.48 | 1.5x | 37.1x |
+| 800 | 53.03 | 30.32 | 0.61 | 1.7x | **87.6x** |
+
+**Forces (test/benchmark_forces.jl)**:
+| Atoms | ACE CPU (ms) | ETACE CPU (ms) | CPU Speedup |
+|-------|--------------|----------------|-------------|
+| 8 | 9.27 | 0.88 | 10.6x |
+| 64 | 73.58 | 9.62 | 7.7x |
+| 256 | 297.36 | 27.09 | 11.0x |
+| 800 | 926.90 | 109.49 | **8.5x** |
+
+---
+
+## Files Created/Modified
+
+### New Files
+- `src/et_models/et_calculators.jl` - ETACEPotential, WrappedSiteCalculator, WrappedETACE, training assembly
+- `src/et_models/stackedcalc.jl` - StackedCalculator with @generated loop unrolling
+- `test/et_models/test_et_calculators.jl` - Comprehensive tests
+- `test/benchmark_comparison.jl` - Energy benchmarks (CPU + GPU)
+- `test/benchmark_forces.jl` - Forces benchmarks (CPU)
+
+### Modified Files
+- `src/et_models/et_models.jl` - Added includes for new files
+- `test/Project.toml` - Updated EquivariantTensors compat to 0.4
+
+---
+
+## Implementation Details
+
+### ETACEPotential (`et_calculators.jl`)
+
+Standalone calculator wrapping ETACE with full AtomsCalculators interface:
+
+```julia
+mutable struct ETACEPotential{MOD<:ETACE, T} <: SitePotential
+   model::MOD
+   ps::T
+   st::NamedTuple
+   rcut::Float64
+   co_ps::Any  # optional committee parameters
+end
+```
+
+Implements:
+- `potential_energy(sys, calc)`
+- `forces(sys, calc)`
+- `virial(sys, calc)`
+- `energy_forces_virial(sys, calc)`
+
+### WrappedSiteCalculator (`et_calculators.jl`)
+
+Generic wrapper for models implementing site energy interface:
+
+```julia
+struct WrappedSiteCalculator{M}
+   model::M
+end
+```
+
+Site energy interface:
+- `site_energies(model, G, ps, st) -> Vector`
+- `site_energy_grads(model, G, ps, st) -> (edge_data = [...],)`
+- `cutoff_radius(model) -> Unitful.Length`
+
+### StackedCalculator (`stackedcalc.jl`)
+
+Combines multiple AtomsCalculators using @generated functions for type-stable loop unrolling:
+
+```julia
+struct StackedCalculator{N, C<:Tuple}
+   calcs::C
+end
+
+@generated function _stacked_energy(sys::AbstractSystem, calc::StackedCalculator{N}) where {N}
+   # Generates: E_1 + E_2 + ... + E_N at compile time
+end
+```
+
+### Training Assembly (`et_calculators.jl`)
+
+Functions for linear least squares fitting:
+
+- `length_basis(calc)` - Total number of linear parameters
+- `get_linear_parameters(calc)` - Extract parameter vector
+- `set_linear_parameters!(calc, θ)` - Set parameters from vector
+- `potential_energy_basis(sys, calc)` - Energy design matrix row
+- `energy_forces_virial_basis(sys, calc)` - Full design matrix row
+
+---
+
+## Maintainer Decisions (Phase 3)
+
+**Q2: Parameter ownership** → **Option A**: PairModel owns its own `ps`/`st`
+
+**Q3: Implementation approach** → **Option B**: Create new ET-native pair implementation
+- Native GPU support
+- Consistent with ETACE architecture
+
+Maintainer will implement E0Model and PairModel given their ACE experience.
+
+---
+
+## Current State (Already Implemented)
+
+### In ACEpotentials (`src/et_models/`)
+
+**ETACE struct** (`et_ace.jl:11-16`):
+```julia
+@concrete struct ETACE <: AbstractLuxContainerLayer{(:rembed, :yembed, :basis, :readout)}
+   rembed     # radial embedding layer
+   yembed     # angular embedding layer
+   basis      # many-body basis layer
+   readout    # selectlinl readout layer
+end
+```
+
+**Core functions** (`et_ace.jl`):
+- ✅ `(l::ETACE)(X::ETGraph, ps, st)` - forward evaluation, returns site energies
+- ✅ `site_grads(l::ETACE, X::ETGraph, ps, st)` - Zygote gradient for forces
+- ✅ `site_basis(l::ETACE, X::ETGraph, ps, st)` - basis values per site
+- ✅ `site_basis_jacobian(l::ETACE, X::ETGraph, ps, st)` - basis + jacobians
+
+**Model conversion** (`convert.jl`):
+- ✅ `convert2et(model::ACEModel)` - full conversion from ACEModel to ETACE
+
+### In EquivariantTensors.jl (v0.4.0)
+
+**Atoms extension** (`ext/NeighbourListsExt.jl`):
+- ✅ `ET.Atoms.interaction_graph(sys, rcut)` - ETGraph from AtomsBase system
+- ✅ `ET.Atoms.forces_from_edge_grads(sys, G, ∇E_edges)` - edge gradients to atomic forces
+- ✅ `ET.rev_reshape_embedding` - neighbor-indexed to edge-indexed conversion
+
+---
+
+## Test Coverage
+
+Tests in `test/et_models/test_et_calculators.jl`:
+
+1. ✅ WrappedETACE site energies consistency
+2. ✅ WrappedETACE site energy gradients (finite difference)
+3. ✅ WrappedSiteCalculator AtomsCalculators interface
+4. ✅ Forces finite difference validation
+5. ✅ Virial finite difference validation
+6. ✅ ETACEPotential consistency with WrappedSiteCalculator
+7. ✅ StackedCalculator composition (E0 + ACE)
+8. ✅ Training assembly: length_basis, get/set_linear_parameters
+9. ✅ Training assembly: potential_energy_basis
+10. ✅ Training assembly: energy_forces_virial_basis
+
+---
+
+## Remaining Work
+
+### For Maintainer (Phase 3)
+
+1. **E0Model**: One-body reference energies
+   - Store E0s in state for float type conversion
+   - Implement site energy interface (zero gradients)
+
+2. **PairModel**: ET-native pair potential
+   - New implementation using `ET.Atoms` patterns
+   - GPU-compatible
+   - Implement site energy interface
+
+### Future Enhancements
+
+- GPU forces benchmark (requires GPU gradient support)
+- ACEfit.assemble dispatch integration
+- Committee support for ETACEPotential
+
+---
+
+## Notes
+
+- Virial formula: `V = -∑ ∂E/∂𝐫ij ⊗ 𝐫ij`
+- GPU time nearly constant regardless of system size (~0.5ms)
+- Forces speedup (8-11x) larger than energy speedup (1.5-2.5x) on CPU
+- StackedCalculator uses @generated functions for zero-overhead composition

From d3b9c0c21e022d77b07089b3e427347be8f090b7 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 23 Dec 2025 20:01:32 +0000
Subject: [PATCH 08/35] Extend training assembly tests and add ACEfit
 integration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add ACEfit.basis_size dispatch for ETACEPotential
- Add ACEfit to test/Project.toml
- Test training assembly on multiple structures (5 random)
- Test multi-species parameter ordering (pure Si, pure O, mixed)
- Verify species-specific basis contributions are correctly separated
- Fix soft scope warnings with local declarations

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/et_calculators.jl       |   4 +
 test/Project.toml                     |   1 +
 test/et_models/test_et_calculators.jl | 151 +++++++++++++++++++++++++-
 3 files changed, 155 insertions(+), 1 deletion(-)

diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
index 383d8af55..7fbcd66b5 100644
--- a/src/et_models/et_calculators.jl
+++ b/src/et_models/et_calculators.jl
@@ -386,6 +386,10 @@ function length_basis(calc::ETACEPotential)
    return nbasis * nspecies
 end
 
+# ACEfit integration
+import ACEfit
+ACEfit.basis_size(calc::ETACEPotential) = length_basis(calc)
+
 """
     energy_forces_virial_basis(sys::AbstractSystem, calc::ETACEPotential)
 
diff --git a/test/Project.toml b/test/Project.toml
index a8d6fecb3..46ca25b8a 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,5 +1,6 @@
 [deps]
 ACEbase = "14bae519-eb20-449c-a949-9c58ed33163e"
+ACEfit = "ad31a8ef-59f5-4a01-b543-a85c2f73e95c"
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ACEpotentials = "3b96b61c-0fcc-4693-95ed-1ef9f35fcc53"
diff --git a/test/et_models/test_et_calculators.jl b/test/et_models/test_et_calculators.jl
index 878a1a0e4..f9002b2f2 100644
--- a/test/et_models/test_et_calculators.jl
+++ b/test/et_models/test_et_calculators.jl
@@ -606,4 +606,153 @@ println("potential_energy_basis consistency: OK")
 
 ##
 
-@info("All Phase 5 tests passed!")
+@info("All Phase 5 basic tests passed!")
+
+## ============================================================================
+##  Phase 5b: Extended Training Assembly Tests
+## ============================================================================
+
+@info("Testing Phase 5b: Extended training assembly tests")
+
+##
+
+@info("Testing ACEfit.basis_size integration")
+import ACEfit
+@test ACEfit.basis_size(et_calc) == ETM.length_basis(et_calc)
+println("ACEfit.basis_size: OK")
+
+##
+
+@info("Testing training assembly on multiple structures")
+
+# Generate multiple random structures
+nstructs = 5
+test_systems = [rand_struct() for _ in 1:nstructs]
+
+all_ok = true
+for (i, sys) in enumerate(test_systems)
+   local θ = ETM.get_linear_parameters(et_calc)
+   local efv_basis = ETM.energy_forces_virial_basis(sys, et_calc)
+
+   # Check energy
+   local E_from_basis = dot(ustrip.(efv_basis.energy), θ)
+   local E_direct = ustrip(u"eV", AtomsCalculators.potential_energy(sys, et_calc))
+   if !isapprox(E_from_basis, E_direct, rtol=1e-10)
+      @warn "Energy mismatch on structure $i"
+      all_ok = false
+   end
+
+   # Check forces
+   local F_from_basis = efv_basis.forces * θ
+   local F_direct = AtomsCalculators.forces(sys, et_calc)
+   local max_diff = maximum(norm(ustrip.(f1) - ustrip.(f2)) for (f1, f2) in zip(F_from_basis, F_direct))
+   if max_diff >= 1e-10
+      @warn "Force mismatch on structure $i: max_diff = $max_diff"
+      all_ok = false
+   end
+
+   # Check virial
+   local V_from_basis = sum(θ[k] * ustrip.(efv_basis.virial[k]) for k in 1:length(θ))
+   local V_direct = ustrip.(AtomsCalculators.virial(sys, et_calc))
+   local virial_diff = maximum(abs.(V_from_basis - V_direct))
+   if virial_diff >= 1e-10
+      @warn "Virial mismatch on structure $i: max_diff = $virial_diff"
+      all_ok = false
+   end
+end
+print_tf(@test all_ok)
+println()
+println("Multiple structures ($nstructs): OK")
+
+##
+
+@info("Testing multi-species parameter ordering")
+
+# Create structures with varying species compositions
+# Pure Si
+sys_si = AtomsBuilder.bulk(:Si) * (2, 2, 1)
+rattle!(sys_si, 0.1u"Å")
+
+# Pure O (use Si lattice but with O atoms)
+sys_o = AtomsBuilder.bulk(:Si) * (2, 2, 1)
+rattle!(sys_o, 0.1u"Å")
+AtomsBuilder.randz!(sys_o, [:O => 1.0])
+
+# Mixed 50/50
+sys_mixed = AtomsBuilder.bulk(:Si) * (2, 2, 1)
+rattle!(sys_mixed, 0.1u"Å")
+AtomsBuilder.randz!(sys_mixed, [:Si => 0.5, :O => 0.5])
+
+# Mixed 25/75
+sys_mixed2 = AtomsBuilder.bulk(:Si) * (2, 2, 1)
+rattle!(sys_mixed2, 0.1u"Å")
+AtomsBuilder.randz!(sys_mixed2, [:Si => 0.25, :O => 0.75])
+
+species_test_systems = [sys_si, sys_o, sys_mixed, sys_mixed2]
+species_labels = ["Pure Si", "Pure O", "50/50 Si:O", "25/75 Si:O"]
+
+all_species_ok = true
+for (label, sys) in zip(species_labels, species_test_systems)
+   local θ = ETM.get_linear_parameters(et_calc)
+   local efv_basis = ETM.energy_forces_virial_basis(sys, et_calc)
+
+   # Check energy consistency
+   local E_from_basis = dot(ustrip.(efv_basis.energy), θ)
+   local E_direct = ustrip(u"eV", AtomsCalculators.potential_energy(sys, et_calc))
+
+   if !isapprox(E_from_basis, E_direct, rtol=1e-10)
+      @warn "Energy mismatch for $label: basis=$E_from_basis, direct=$E_direct"
+      all_species_ok = false
+   end
+
+   # Check forces
+   local F_from_basis = efv_basis.forces * θ
+   local F_direct = AtomsCalculators.forces(sys, et_calc)
+   local max_diff = maximum(norm(ustrip.(f1) - ustrip.(f2)) for (f1, f2) in zip(F_from_basis, F_direct))
+
+   if max_diff >= 1e-10
+      @warn "Force mismatch for $label: max_diff=$max_diff"
+      all_species_ok = false
+   end
+end
+print_tf(@test all_species_ok)
+println()
+println("Multi-species parameter ordering: OK")
+
+##
+
+@info("Testing species-specific basis contributions")
+
+# Verify that different species contribute to different parts of the basis
+nbasis = et_model.readout.in_dim
+nspecies = et_model.readout.ncat
+
+# For pure Si system, only Si basis should contribute
+efv_si = ETM.energy_forces_virial_basis(sys_si, et_calc)
+E_basis_si = ustrip.(efv_si.energy)
+
+# For pure O system, only O basis should contribute
+efv_o = ETM.energy_forces_virial_basis(sys_o, et_calc)
+E_basis_o = ustrip.(efv_o.energy)
+
+# Check that the patterns differ (different species activate different parameters)
+# Si uses parameters 1:nbasis, O uses parameters (nbasis+1):(2*nbasis)
+si_params = E_basis_si[1:nbasis]
+o_params_for_si = E_basis_si[(nbasis+1):end]
+o_params = E_basis_o[(nbasis+1):end]
+si_params_for_o = E_basis_o[1:nbasis]
+
+# Pure Si should have zero contribution from O parameters
+@test all(abs.(o_params_for_si) .< 1e-12)
+# Pure O should have zero contribution from Si parameters
+@test all(abs.(si_params_for_o) .< 1e-12)
+# Pure Si should have nonzero Si parameters
+@test any(abs.(si_params) .> 1e-12)
+# Pure O should have nonzero O parameters
+@test any(abs.(o_params) .> 1e-12)
+
+println("Species-specific basis contributions: OK")
+
+##
+
+@info("All Phase 5b extended tests passed!")

From e81a708bd913f17bebeb662c5826aa5bcf46b620 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 23 Dec 2025 20:28:38 +0000
Subject: [PATCH 09/35] Add ETModels to docs and ETACE silicon integration test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add ACEpotentials.ETModels to autodocs in all_exported.md
- Add comprehensive integration test for ETACE calculators based on
  test_silicon workflow
- Tests verify energy/forces/virial consistency with original ACE
- Tests verify training basis assembly and StackedCalculator composition

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 docs/src/all_exported.md          |   6 +-
 test/et_models/test_et_silicon.jl | 224 ++++++++++++++++++++++++++++++
 2 files changed, 227 insertions(+), 3 deletions(-)
 create mode 100644 test/et_models/test_et_silicon.jl

diff --git a/docs/src/all_exported.md b/docs/src/all_exported.md
index 9e250d258..16dacd79e 100644
--- a/docs/src/all_exported.md
+++ b/docs/src/all_exported.md
@@ -3,13 +3,13 @@
 ### Exported
 
 ```@autodocs
-Modules = [ACEpotentials, ACEpotentials.Models, ACEpotentials.ACE1compat]
+Modules = [ACEpotentials, ACEpotentials.Models, ACEpotentials.ACE1compat, ACEpotentials.ETModels]
 Private = false
-``` 
+```
 
 ### Not exported
 
 ```@autodocs
-Modules = [ACEpotentials, ACEpotentials.Models, ACEpotentials.ACE1compat]
+Modules = [ACEpotentials, ACEpotentials.Models, ACEpotentials.ACE1compat, ACEpotentials.ETModels]
 Public = false
 ``` 
diff --git a/test/et_models/test_et_silicon.jl b/test/et_models/test_et_silicon.jl
new file mode 100644
index 000000000..2b21fb1f6
--- /dev/null
+++ b/test/et_models/test_et_silicon.jl
@@ -0,0 +1,224 @@
+# Integration test for ETACE calculators
+#
+# This test verifies that ETACE calculators produce comparable results
+# to the original ACE models when used for evaluation (not fitting).
+#
+# Note: convert2et only supports LearnableRnlrzzBasis (not SplineRnlrzzBasis),
+# so we use ace_model() directly instead of ace1_model().
+
+using Test
+using ACEpotentials
+M = ACEpotentials.Models
+ETM = ACEpotentials.ETModels
+
+using ExtXYZ, AtomsBase, Unitful, StaticArrays
+using AtomsCalculators
+using LazyArtifacts
+using LuxCore, Lux, Random, LinearAlgebra
+
+@info("ETACE Integration Test: Silicon dataset")
+
+## ----- setup -----
+
+# Build model using ace_model (LearnableRnlrzzBasis, compatible with convert2et)
+elements = (:Si,)
+level = M.TotalDegree()
+max_level = 12
+order = 3
+maxl = 6
+
+rin0cuts = M._default_rin0cuts(elements)
+rin0cuts = (x -> (rin = x.rin, r0 = x.r0, rcut = 5.5)).(rin0cuts)
+
+rng = Random.MersenneTwister(1234)
+
+ace_model = M.ace_model(; elements = elements, order = order,
+                         Ytype = :solid, level = level, max_level = max_level,
+                         maxl = maxl, pair_maxn = max_level,
+                         rin0cuts = rin0cuts,
+                         init_WB = :glorot_normal, init_Wpair = :glorot_normal)
+
+ps, st = Lux.setup(rng, ace_model)
+
+# Create ACE calculator
+model = M.ACEPotential(ace_model, ps, st)
+
+# Load dataset
+data = ExtXYZ.load(artifact"Si_tiny_dataset" * "/Si_tiny.xyz")
+
+data_keys = [:energy_key => "dft_energy",
+             :force_key  => "dft_force",
+             :virial_key => "dft_virial"]
+
+weights = Dict("default" => Dict("E"=>30.0, "F"=>1.0, "V"=>1.0),
+               "liq" => Dict("E"=>10.0, "F"=>0.66, "V"=>0.25))
+
+## ----- Fit original ACE model -----
+
+@info("Fitting original ACE model with QR solver")
+acefit!(data, model;
+        data_keys...,
+        weights = weights,
+        solver = ACEfit.QR())
+
+ace_err = ACEpotentials.compute_errors(data, model; data_keys..., weights=weights)
+@info("Original ACE RMSE (set):",
+      E=ace_err["rmse"]["set"]["E"],
+      F=ace_err["rmse"]["set"]["F"],
+      V=ace_err["rmse"]["set"]["V"])
+
+# Store for comparison
+ace_rmse_E = ace_err["rmse"]["set"]["E"]
+ace_rmse_F = ace_err["rmse"]["set"]["F"]
+ace_rmse_V = ace_err["rmse"]["set"]["V"]
+
+## ----- Convert to ETACE and compare -----
+
+@info("Converting to ETACE model")
+
+# Update ps from model after fitting
+ps = model.ps
+st = model.st
+
+# Convert to ETACE
+et_model = ETM.convert2et(ace_model)
+et_ps, et_st = LuxCore.setup(MersenneTwister(1234), et_model)
+
+# Copy radial basis parameters (single species case)
+et_ps.rembed.post.W[:, :, 1] = ps.rbasis.Wnlq[:, :, 1, 1]
+
+# Copy readout parameters
+et_ps.readout.W[1, :, 1] .= ps.WB[:, 1]
+
+# Get cutoff
+rcut = maximum(a.rcut for a in ace_model.pairbasis.rin0cuts)
+
+# Create ETACEPotential
+et_calc = ETM.ETACEPotential(et_model, et_ps, et_st, rcut)
+
+## ----- Test energy consistency -----
+
+@info("Testing energy consistency between ACE and ETACE")
+
+# Skip isolated atom (index 1) - ETACE requires at least 2 atoms for graph construction
+max_energy_diff = 0.0
+for (i, sys) in enumerate(data[2:min(11, length(data))])
+    local E_ace = ustrip(u"eV", AtomsCalculators.potential_energy(sys, model))
+    local E_etace = ustrip(u"eV", AtomsCalculators.potential_energy(sys, et_calc))
+    local diff = abs(E_ace - E_etace)
+    max_energy_diff = max(max_energy_diff, diff)
+end
+
+@info("Max energy difference: $max_energy_diff eV")
+@test max_energy_diff < 1e-10
+println("Energy consistency: OK (max_diff = $max_energy_diff eV)")
+
+## ----- Test forces consistency -----
+
+@info("Testing forces consistency between ACE and ETACE")
+
+max_force_diff = 0.0
+for (i, sys) in enumerate(data[1:min(10, length(data))])
+    F_ace = AtomsCalculators.forces(sys, model)
+    F_etace = AtomsCalculators.forces(sys, et_calc)
+    for (f1, f2) in zip(F_ace, F_etace)
+        diff = norm(ustrip.(f1) - ustrip.(f2))
+        max_force_diff = max(max_force_diff, diff)
+    end
+end
+
+@info("Max force difference: $max_force_diff eV/Å")
+@test max_force_diff < 1e-10
+println("Forces consistency: OK (max_diff = $max_force_diff eV/Å)")
+
+## ----- Test virial consistency -----
+
+@info("Testing virial consistency between ACE and ETACE")
+
+max_virial_diff = 0.0
+for (i, sys) in enumerate(data[1:min(10, length(data))])
+    V_ace = AtomsCalculators.virial(sys, model)
+    V_etace = AtomsCalculators.virial(sys, et_calc)
+    diff = maximum(abs.(ustrip.(V_ace) - ustrip.(V_etace)))
+    max_virial_diff = max(max_virial_diff, diff)
+end
+
+@info("Max virial difference: $max_virial_diff eV")
+@test max_virial_diff < 1e-9
+println("Virial consistency: OK (max_diff = $max_virial_diff eV)")
+
+## ----- Test training basis assembly -----
+
+@info("Testing training basis assembly")
+
+# Pick a test structure
+sys = data[5]
+natoms = length(sys)
+nparams = ETM.length_basis(et_calc)
+
+# Get basis
+efv_basis = ETM.energy_forces_virial_basis(sys, et_calc)
+
+# Verify shapes
+@test length(efv_basis.energy) == nparams
+@test size(efv_basis.forces) == (natoms, nparams)
+@test length(efv_basis.virial) == nparams
+
+# Verify linear combination matches direct evaluation
+θ = ETM.get_linear_parameters(et_calc)
+
+E_from_basis = dot(ustrip.(efv_basis.energy), θ)
+E_direct = ustrip(u"eV", AtomsCalculators.potential_energy(sys, et_calc))
+@test isapprox(E_from_basis, E_direct, rtol=1e-10)
+
+F_from_basis = efv_basis.forces * θ
+F_direct = AtomsCalculators.forces(sys, et_calc)
+max_F_diff = maximum(norm(ustrip.(f1) - ustrip.(f2)) for (f1, f2) in zip(F_from_basis, F_direct))
+@test max_F_diff < 1e-10
+
+V_from_basis = sum(θ[k] * ustrip.(efv_basis.virial[k]) for k in 1:nparams)
+V_direct = ustrip.(AtomsCalculators.virial(sys, et_calc))
+max_V_diff = maximum(abs.(V_from_basis - V_direct))
+@test max_V_diff < 1e-9
+
+println("Training basis assembly: OK")
+
+## ----- Test StackedCalculator with E0 -----
+
+@info("Testing StackedCalculator with E0Model")
+
+# Create E0 model with arbitrary E0 value for testing
+E0s = Dict(14 => -158.54496821)  # Si atomic number => E0
+E0_model = ETM.E0Model(E0s)
+E0_calc = ETM.WrappedSiteCalculator(E0_model)
+
+# Create wrapped ETACE
+wrapped_etace = ETM.WrappedETACE(et_model, et_ps, et_st, rcut)
+ace_calc = ETM.WrappedSiteCalculator(wrapped_etace)
+
+# Stack them
+stacked = ETM.StackedCalculator((E0_calc, ace_calc))
+
+# Test on a few structures
+for (i, sys) in enumerate(data[1:5])
+    E_E0 = AtomsCalculators.potential_energy(sys, E0_calc)
+    E_ace = AtomsCalculators.potential_energy(sys, ace_calc)
+    E_stacked = AtomsCalculators.potential_energy(sys, stacked)
+
+    expected = ustrip(E_E0) + ustrip(E_ace)
+    actual = ustrip(E_stacked)
+
+    @test isapprox(expected, actual, rtol=1e-10)
+end
+
+println("StackedCalculator: OK")
+
+## ----- Summary -----
+
+@info("All ETACE integration tests passed!")
+@info("Summary:")
+@info("  - Energy matches original ACE to < 1e-10 eV")
+@info("  - Forces match original ACE to < 1e-10 eV/Å")
+@info("  - Virial matches original ACE to < 1e-9 eV")
+@info("  - Training basis assembly verified")
+@info("  - StackedCalculator composition verified")

From f3519ff91e5c89bd917acd619cdc9ccc824fc338 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 23 Dec 2025 21:09:49 +0000
Subject: [PATCH 10/35] Optimize energy_forces_virial_basis with pre-allocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Pre-allocate gradient buffer (∇Ei_buf) outside loop, reuse across iterations
- Eliminate W_unit matrix allocation by directly copying ∂𝔹[:, i, k]
- Pre-compute zero gradient element for species masking
- Pre-extract edge vectors for virial computation
- Use zero() instead of zeros() for SMatrix virial accumulator

Performance improvement (64-atom system):
- Time: 1597ms → 422ms (3.8x faster)
- Memory: 3.4 GiB → 412 MiB (8.4x reduction)

Also fix variable scoping in test_et_silicon.jl for Julia 1.10+
(added `global` keyword for loop variable updates).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/et_calculators.jl   | 41 ++++++++++++++++++++++---------
 test/et_models/test_et_silicon.jl |  6 ++---
 2 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
index 7fbcd66b5..0dd43c67a 100644
--- a/src/et_models/et_calculators.jl
+++ b/src/et_models/et_calculators.jl
@@ -408,12 +408,16 @@ function energy_forces_virial_basis(sys::AbstractSystem, calc::ETACEPotential)
    G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
 
    # Get basis and jacobian
+   # 𝔹: (nnodes, nbasis) - basis values per site (Float64)
+   # ∂𝔹: (maxneigs, nnodes, nbasis) - directional derivatives (VState objects)
    𝔹, ∂𝔹 = site_basis_jacobian(calc.model, G, calc.ps, calc.st)
 
    natoms = length(sys)
+   nnodes = size(𝔹, 1)
    nbasis = calc.model.readout.in_dim
    nspecies = calc.model.readout.ncat
    nparams = nbasis * nspecies
+   maxneigs = size(∂𝔹, 1)
 
    # Species indices for each node
    iZ = calc.model.readout.selector.(G.node_data)
@@ -423,6 +427,16 @@ function energy_forces_virial_basis(sys::AbstractSystem, calc::ETACEPotential)
    F_basis = zeros(SVector{3, Float64}, natoms, nparams)
    V_basis = zeros(SMatrix{3, 3, Float64, 9}, nparams)
 
+   # Pre-allocate work buffer for gradient (same element type as ∂𝔹)
+   # This avoids allocating a new matrix in each iteration
+   ∇Ei_buf = similar(∂𝔹, maxneigs, nnodes)
+
+   # Pre-compute a zero element for masking (same type as ∂𝔹 elements)
+   zero_grad = zero(∂𝔹[1, 1, 1])
+
+   # Pre-compute edge vectors for virial (avoid repeated access)
+   edge_𝐫 = [edge.𝐫 for edge in G.edge_data]
+
    # Compute basis values for each parameter (k, s) pair
    # Parameter index: p = (s-1) * nbasis + k
    for s in 1:nspecies
@@ -430,21 +444,24 @@ function energy_forces_virial_basis(sys::AbstractSystem, calc::ETACEPotential)
          p = (s - 1) * nbasis + k
 
          # Energy basis: sum of 𝔹[i, k] for atoms of species s
-         for i in 1:length(G.node_data)
+         for i in 1:nnodes
             if iZ[i] == s
                E_basis[p] += 𝔹[i, k]
             end
          end
 
-         # Create unit weight: W[1, k, s] = 1, others = 0
-         # Then compute edge gradients and convert to forces/virial
-         W_unit = zeros(1, nbasis, nspecies)
-         W_unit[1, k, s] = 1.0
+         # Fill gradient buffer: ∇Ei[:, i] = ∂𝔹[:, i, k] if iZ[i] == s, else zeros
+         # This avoids allocating W_unit and doing matrix-vector multiply
+         for i in 1:nnodes
+            if iZ[i] == s
+               @views ∇Ei_buf[:, i] .= ∂𝔹[:, i, k]
+            else
+               @views ∇Ei_buf[:, i] .= Ref(zero_grad)
+            end
+         end
 
-         # Compute edge gradients using the reconstruction pattern
-         # ∇Ei = ∂𝔹[:, i, :] * W[1, :, iZ[i]] for each node i
-         ∇Ei = reduce(hcat, ∂𝔹[:, i, :] * W_unit[1, :, iZ[i]] for i in 1:length(iZ))
-         ∇Ei_3d = reshape(∇Ei, size(∇Ei)..., 1)
+         # Reshape for rev_reshape_embedding (needs 3D array) - this is a view, no allocation
+         ∇Ei_3d = reshape(∇Ei_buf, maxneigs, nnodes, 1)
 
          # Convert to edge-indexed format with 3D vectors
          ∇E_edges = ET.rev_reshape_embedding(∇Ei_3d, G)[:]
@@ -453,9 +470,9 @@ function energy_forces_virial_basis(sys::AbstractSystem, calc::ETACEPotential)
          F_basis[:, p] = -ET.Atoms.forces_from_edge_grads(sys, G, ∇E_edges)
 
          # Compute virial: V = -∑ (∂E/∂𝐫ij) ⊗ 𝐫ij
-         V = zeros(SMatrix{3, 3, Float64, 9})
-         for (edge, ∂edge) in zip(G.edge_data, ∇E_edges)
-            V -= ∂edge.𝐫 * edge.𝐫'
+         V = zero(SMatrix{3, 3, Float64, 9})
+         for (e, ∂edge) in enumerate(∇E_edges)
+            V -= ∂edge.𝐫 * edge_𝐫[e]'
          end
          V_basis[p] = V
       end
diff --git a/test/et_models/test_et_silicon.jl b/test/et_models/test_et_silicon.jl
index 2b21fb1f6..db9bd9e15 100644
--- a/test/et_models/test_et_silicon.jl
+++ b/test/et_models/test_et_silicon.jl
@@ -106,7 +106,7 @@ for (i, sys) in enumerate(data[2:min(11, length(data))])
     local E_ace = ustrip(u"eV", AtomsCalculators.potential_energy(sys, model))
     local E_etace = ustrip(u"eV", AtomsCalculators.potential_energy(sys, et_calc))
     local diff = abs(E_ace - E_etace)
-    max_energy_diff = max(max_energy_diff, diff)
+    global max_energy_diff = max(max_energy_diff, diff)
 end
 
 @info("Max energy difference: $max_energy_diff eV")
@@ -123,7 +123,7 @@ for (i, sys) in enumerate(data[1:min(10, length(data))])
     F_etace = AtomsCalculators.forces(sys, et_calc)
     for (f1, f2) in zip(F_ace, F_etace)
         diff = norm(ustrip.(f1) - ustrip.(f2))
-        max_force_diff = max(max_force_diff, diff)
+        global max_force_diff = max(max_force_diff, diff)
     end
 end
 
@@ -140,7 +140,7 @@ for (i, sys) in enumerate(data[1:min(10, length(data))])
     V_ace = AtomsCalculators.virial(sys, model)
     V_etace = AtomsCalculators.virial(sys, et_calc)
     diff = maximum(abs.(ustrip.(V_ace) - ustrip.(V_etace)))
-    max_virial_diff = max(max_virial_diff, diff)
+    global max_virial_diff = max(max_virial_diff, diff)
 end
 
 @info("Max virial difference: $max_virial_diff eV")

From 99f2d983a0a11e58c7b7c43a41990f5123005861 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 23 Dec 2025 21:27:41 +0000
Subject: [PATCH 11/35] Fix ETACE integration test: compare many-body only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ETACE only implements the many-body basis, not the pair potential.
The test was incorrectly comparing full ACE (with pair) against ETACE.

Changes:
- Create model_nopair with Wpair=0 for fair comparison
- Compare ETACE against ACE many-body contribution only
- Fix E0Model constructor: use Symbol key (:Si) not Int (14)
- Skip isolated atoms in all tests (ETACE requires >= 2 atoms)
- Update test comments and summary to clarify scope

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 test/et_models/test_et_silicon.jl | 39 ++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/test/et_models/test_et_silicon.jl b/test/et_models/test_et_silicon.jl
index db9bd9e15..2d0949fbe 100644
--- a/test/et_models/test_et_silicon.jl
+++ b/test/et_models/test_et_silicon.jl
@@ -1,10 +1,11 @@
 # Integration test for ETACE calculators
 #
-# This test verifies that ETACE calculators produce comparable results
-# to the original ACE models when used for evaluation (not fitting).
+# This test verifies that ETACE calculators produce identical results
+# to the many-body part of ACE models (excluding pair potential).
 #
 # Note: convert2et only supports LearnableRnlrzzBasis (not SplineRnlrzzBasis),
 # so we use ace_model() directly instead of ace1_model().
+# ETACE implements only the many-body basis, not the pair potential.
 
 using Test
 using ACEpotentials
@@ -96,14 +97,19 @@ rcut = maximum(a.rcut for a in ace_model.pairbasis.rin0cuts)
 # Create ETACEPotential
 et_calc = ETM.ETACEPotential(et_model, et_ps, et_st, rcut)
 
+# Create ACE model WITHOUT pair potential for fair comparison
+# (ETACE only implements the many-body basis, not the pair potential)
+ps_nopair = merge(ps, (Wpair = zeros(size(ps.Wpair)),))
+model_nopair = M.ACEPotential(ace_model, ps_nopair, st)
+
 ## ----- Test energy consistency -----
 
-@info("Testing energy consistency between ACE and ETACE")
+@info("Testing energy consistency between ACE (no pair) and ETACE")
 
 # Skip isolated atom (index 1) - ETACE requires at least 2 atoms for graph construction
 max_energy_diff = 0.0
 for (i, sys) in enumerate(data[2:min(11, length(data))])
-    local E_ace = ustrip(u"eV", AtomsCalculators.potential_energy(sys, model))
+    local E_ace = ustrip(u"eV", AtomsCalculators.potential_energy(sys, model_nopair))
     local E_etace = ustrip(u"eV", AtomsCalculators.potential_energy(sys, et_calc))
     local diff = abs(E_ace - E_etace)
     global max_energy_diff = max(max_energy_diff, diff)
@@ -115,11 +121,11 @@ println("Energy consistency: OK (max_diff = $max_energy_diff eV)")
 
 ## ----- Test forces consistency -----
 
-@info("Testing forces consistency between ACE and ETACE")
+@info("Testing forces consistency between ACE (no pair) and ETACE")
 
 max_force_diff = 0.0
-for (i, sys) in enumerate(data[1:min(10, length(data))])
-    F_ace = AtomsCalculators.forces(sys, model)
+for (i, sys) in enumerate(data[2:min(10, length(data))])
+    F_ace = AtomsCalculators.forces(sys, model_nopair)
     F_etace = AtomsCalculators.forces(sys, et_calc)
     for (f1, f2) in zip(F_ace, F_etace)
         diff = norm(ustrip.(f1) - ustrip.(f2))
@@ -133,11 +139,11 @@ println("Forces consistency: OK (max_diff = $max_force_diff eV/Å)")
 
 ## ----- Test virial consistency -----
 
-@info("Testing virial consistency between ACE and ETACE")
+@info("Testing virial consistency between ACE (no pair) and ETACE")
 
 max_virial_diff = 0.0
-for (i, sys) in enumerate(data[1:min(10, length(data))])
-    V_ace = AtomsCalculators.virial(sys, model)
+for (i, sys) in enumerate(data[2:min(10, length(data))])
+    V_ace = AtomsCalculators.virial(sys, model_nopair)
     V_etace = AtomsCalculators.virial(sys, et_calc)
     diff = maximum(abs.(ustrip.(V_ace) - ustrip.(V_etace)))
     global max_virial_diff = max(max_virial_diff, diff)
@@ -188,7 +194,7 @@ println("Training basis assembly: OK")
 @info("Testing StackedCalculator with E0Model")
 
 # Create E0 model with arbitrary E0 value for testing
-E0s = Dict(14 => -158.54496821)  # Si atomic number => E0
+E0s = Dict(:Si => -158.54496821)  # Si symbol => E0
 E0_model = ETM.E0Model(E0s)
 E0_calc = ETM.WrappedSiteCalculator(E0_model)
 
@@ -199,8 +205,8 @@ ace_calc = ETM.WrappedSiteCalculator(wrapped_etace)
 # Stack them
 stacked = ETM.StackedCalculator((E0_calc, ace_calc))
 
-# Test on a few structures
-for (i, sys) in enumerate(data[1:5])
+# Test on a few structures (skip isolated atom)
+for (i, sys) in enumerate(data[2:5])
     E_E0 = AtomsCalculators.potential_energy(sys, E0_calc)
     E_ace = AtomsCalculators.potential_energy(sys, ace_calc)
     E_stacked = AtomsCalculators.potential_energy(sys, stacked)
@@ -217,8 +223,9 @@ println("StackedCalculator: OK")
 
 @info("All ETACE integration tests passed!")
 @info("Summary:")
-@info("  - Energy matches original ACE to < 1e-10 eV")
-@info("  - Forces match original ACE to < 1e-10 eV/Å")
-@info("  - Virial matches original ACE to < 1e-9 eV")
+@info("  - Energy matches ACE (many-body only) to < 1e-10 eV")
+@info("  - Forces match ACE (many-body only) to < 1e-10 eV/Å")
+@info("  - Virial matches ACE (many-body only) to < 1e-9 eV")
 @info("  - Training basis assembly verified")
 @info("  - StackedCalculator composition verified")
+@info("Note: ETACE implements only the many-body basis, not the pair potential.")

From c64168bc1a18ad7ecf5ea3041e5ffb8099d3ad36 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Wed, 24 Dec 2025 00:19:41 +0000
Subject: [PATCH 12/35] Refactor et_calculators.jl and stackedcalc.jl to reduce
 duplication
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Extract core helpers (_core_site_energies, _core_site_grads) for shared
  evaluation logic between ETACEPotential and WrappedETACE
- Refactor WrappedETACE and ETACEPotential to use core helpers
- Simplify stackedcalc.jl: replace manual AST building (_gen_sum,
  _gen_broadcast_sum) with idiomatic @nexprs/@ntuple from Base.Cartesian
- Net reduction of ~50 lines while maintaining identical behavior

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/et_calculators.jl | 58 +++++++++++++++++-----------
 src/et_models/stackedcalc.jl    | 67 ++++++---------------------------
 2 files changed, 49 insertions(+), 76 deletions(-)

diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
index 0dd43c67a..d9d482ff9 100644
--- a/src/et_models/et_calculators.jl
+++ b/src/et_models/et_calculators.jl
@@ -123,12 +123,11 @@ cutoff_radius(w::WrappedETACE) = w.rcut
 
 function site_energies(w::WrappedETACE, G::ET.ETGraph, ps, st)
    # Use wrapper's ps/st, ignore passed ones (they're for StackedCalculator dispatch)
-   Ei, _ = w.model(G, w.ps, w.st)
-   return Ei
+   return _core_site_energies(w.model, G, w.ps, w.st)
 end
 
 function site_energy_grads(w::WrappedETACE, G::ET.ETGraph, ps, st)
-   return site_grads(w.model, G, w.ps, w.st)
+   return _core_site_grads(w.model, G, w.ps, w.st)
 end
 
 
@@ -292,42 +291,59 @@ function _compute_virial(G::ET.ETGraph, ∂G)
    return V
 end
 
+# ============================================================================
+#  Core Evaluation Helpers (shared by ETACEPotential and WrappedSiteCalculator)
+# ============================================================================
+
+"""
+    _core_site_energies(model::ETACE, G::ET.ETGraph, ps, st)
+
+Core site energy computation: forward pass through ETACE model.
+Returns per-site energies (vector of length nnodes(G)).
+"""
+function _core_site_energies(model::ETACE, G::ET.ETGraph, ps, st)
+   Ei, _ = model(G, ps, st)
+   return Ei
+end
+
+"""
+    _core_site_grads(model::ETACE, G::ET.ETGraph, ps, st)
+
+Core site gradient computation: backward pass for forces/virial.
+Returns named tuple with edge_data containing gradient vectors.
+"""
+function _core_site_grads(model::ETACE, G::ET.ETGraph, ps, st)
+   return site_grads(model, G, ps, st)
+end
+
 function _evaluate_energy(calc::ETACEPotential, sys::AbstractSystem)
    G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   Ei, _ = calc.model(G, calc.ps, calc.st)
+   Ei = _core_site_energies(calc.model, G, calc.ps, calc.st)
    return sum(Ei)
 end
 
 function _evaluate_forces(calc::ETACEPotential, sys::AbstractSystem)
    G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   ∂G = site_grads(calc.model, G, calc.ps, calc.st)
+   ∂G = _core_site_grads(calc.model, G, calc.ps, calc.st)
    # Note: forces_from_edge_grads returns +∇E, we need -∇E for forces
    return -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
 end
 
 function _evaluate_virial(calc::ETACEPotential, sys::AbstractSystem)
    G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   ∂G = site_grads(calc.model, G, calc.ps, calc.st)
+   ∂G = _core_site_grads(calc.model, G, calc.ps, calc.st)
    return _compute_virial(G, ∂G)
 end
 
 function _energy_forces_virial(calc::ETACEPotential, sys::AbstractSystem)
    G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-
-   # Forward pass for energy
-   Ei, _ = calc.model(G, calc.ps, calc.st)
-   E = sum(Ei)
-
-   # Backward pass for gradients (forces and virial)
-   ∂G = site_grads(calc.model, G, calc.ps, calc.st)
-
-   # Forces from edge gradients (negate since forces_from_edge_grads returns +∇E)
-   F = -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
-
-   # Virial from edge gradients
-   V = _compute_virial(G, ∂G)
-
-   return (energy=E, forces=F, virial=V)
+   Ei = _core_site_energies(calc.model, G, calc.ps, calc.st)
+   ∂G = _core_site_grads(calc.model, G, calc.ps, calc.st)
+   return (
+      energy = sum(Ei),
+      forces = -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data),
+      virial = _compute_virial(G, ∂G)
+   )
 end
 
 # ============================================================================
diff --git a/src/et_models/stackedcalc.jl b/src/et_models/stackedcalc.jl
index d58a41cf7..ab73186d9 100644
--- a/src/et_models/stackedcalc.jl
+++ b/src/et_models/stackedcalc.jl
@@ -60,78 +60,35 @@ end
 #  Efficient implementations using @generated for compile-time unrolling
 # ============================================================================
 
-# Helper to generate sum expression: E_1 + E_2 + ... + E_N
-function _gen_sum(N, prefix)
-   if N == 1
-      return Symbol(prefix, "_1")
-   else
-      ex = Symbol(prefix, "_1")
-      for i in 2:N
-         ex = :($ex + $(Symbol(prefix, "_", i)))
-      end
-      return ex
-   end
-end
-
-# Helper to generate broadcast sum: F_1 .+ F_2 .+ ... .+ F_N
-function _gen_broadcast_sum(N, prefix)
-   if N == 1
-      return Symbol(prefix, "_1")
-   else
-      ex = Symbol(prefix, "_1")
-      for i in 2:N
-         ex = :($ex .+ $(Symbol(prefix, "_", i)))
-      end
-      return ex
-   end
-end
-
 @generated function _stacked_energy(sys::AbstractSystem, calc::StackedCalculator{N}) where {N}
-   assignments = [:($(Symbol("E_", i)) = AtomsCalculators.potential_energy(sys, calc.calcs[$i])) for i in 1:N]
-   sum_expr = _gen_sum(N, "E")
    quote
-      $(assignments...)
-      return $sum_expr
+      @nexprs $N i -> E_i = AtomsCalculators.potential_energy(sys, calc.calcs[i])
+      return sum(@ntuple $N E)
    end
 end
 
 @generated function _stacked_forces(sys::AbstractSystem, calc::StackedCalculator{N}) where {N}
-   assignments = [:($(Symbol("F_", i)) = AtomsCalculators.forces(sys, calc.calcs[$i])) for i in 1:N]
-   sum_expr = _gen_broadcast_sum(N, "F")
    quote
-      $(assignments...)
-      return $sum_expr
+      @nexprs $N i -> F_i = AtomsCalculators.forces(sys, calc.calcs[i])
+      return reduce(.+, @ntuple $N F)
    end
 end
 
 @generated function _stacked_virial(sys::AbstractSystem, calc::StackedCalculator{N}) where {N}
-   assignments = [:($(Symbol("V_", i)) = AtomsCalculators.virial(sys, calc.calcs[$i])) for i in 1:N]
-   sum_expr = _gen_sum(N, "V")
    quote
-      $(assignments...)
-      return $sum_expr
+      @nexprs $N i -> V_i = AtomsCalculators.virial(sys, calc.calcs[i])
+      return sum(@ntuple $N V)
    end
 end
 
 @generated function _stacked_efv(sys::AbstractSystem, calc::StackedCalculator{N}) where {N}
-   # Generate assignments for each calculator
-   assignments = [:($(Symbol("efv_", i)) = AtomsCalculators.energy_forces_virial(sys, calc.calcs[$i])) for i in 1:N]
-
-   # Generate sum expressions
-   E_exprs = [:($(Symbol("efv_", i)).energy) for i in 1:N]
-   F_exprs = [:($(Symbol("efv_", i)).forces) for i in 1:N]
-   V_exprs = [:($(Symbol("efv_", i)).virial) for i in 1:N]
-
-   E_sum = N == 1 ? E_exprs[1] : reduce((a, b) -> :($a + $b), E_exprs)
-   F_sum = N == 1 ? F_exprs[1] : reduce((a, b) -> :($a .+ $b), F_exprs)
-   V_sum = N == 1 ? V_exprs[1] : reduce((a, b) -> :($a + $b), V_exprs)
-
    quote
-      $(assignments...)
-      E_total = $E_sum
-      F_total = $F_sum
-      V_total = $V_sum
-      return (energy=E_total, forces=F_total, virial=V_total)
+      @nexprs $N i -> efv_i = AtomsCalculators.energy_forces_virial(sys, calc.calcs[i])
+      return (
+         energy = sum(@ntuple $N i -> efv_i.energy),
+         forces = reduce(.+, @ntuple $N i -> efv_i.forces),
+         virial = sum(@ntuple $N i -> efv_i.virial)
+      )
    end
 end
 

From 40bf060e3d5ef8de89fa8b635dabfaa213d931c4 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Wed, 24 Dec 2025 09:02:39 +0000
Subject: [PATCH 13/35] Unify ETACEPotential as type alias for
 WrappedSiteCalculator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removes duplicate AtomsCalculators interface and evaluation logic by:
- Making WrappedETACE mutable with co_ps field for training
- Defining ETACEPotential as const alias for WrappedSiteCalculator{WrappedETACE}
- Removing duplicate _evaluate_* functions and AtomsCalculators methods
- Adding accessor helpers (_etace, _ps, _st) for training functions

The evaluation now flows through WrappedSiteCalculator's generic methods
which call site_energies/site_energy_grads on the WrappedETACE model.
This reduces ~66 lines of duplicated code.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/et_calculators.jl       | 205 +++++++++-----------------
 test/et_models/test_et_calculators.jl |   6 +-
 2 files changed, 72 insertions(+), 139 deletions(-)

diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
index d9d482ff9..d3c850249 100644
--- a/src/et_models/et_calculators.jl
+++ b/src/et_models/et_calculators.jl
@@ -102,32 +102,41 @@ end
 # ============================================================================
 
 """
-    WrappedETACE{MOD<:ETACE, T}
+    WrappedETACE{MOD<:ETACE, PS, ST}
 
 Wraps an ETACE model to implement the SiteEnergyModel interface.
+Mutable to allow parameter updates during training.
 
 # Fields
 - `model::ETACE` - The underlying ETACE model
-- `ps` - Model parameters
+- `ps` - Model parameters (mutable for training)
 - `st` - Model state
 - `rcut::Float64` - Cutoff radius in Ångström
+- `co_ps` - Optional committee parameters for uncertainty quantification
 """
-struct WrappedETACE{MOD<:ETACE, PS, ST}
+mutable struct WrappedETACE{MOD<:ETACE, PS, ST}
    model::MOD
    ps::PS
    st::ST
    rcut::Float64
+   co_ps::Any
+end
+
+# Constructor without committee parameters
+function WrappedETACE(model::ETACE, ps, st, rcut::Real)
+   return WrappedETACE(model, ps, st, Float64(rcut), nothing)
 end
 
 cutoff_radius(w::WrappedETACE) = w.rcut
 
 function site_energies(w::WrappedETACE, G::ET.ETGraph, ps, st)
    # Use wrapper's ps/st, ignore passed ones (they're for StackedCalculator dispatch)
-   return _core_site_energies(w.model, G, w.ps, w.st)
+   Ei, _ = w.model(G, w.ps, w.st)
+   return Ei
 end
 
 function site_energy_grads(w::WrappedETACE, G::ET.ETGraph, ps, st)
-   return _core_site_grads(w.model, G, w.ps, w.st)
+   return site_grads(w.model, G, w.ps, w.st)
 end
 
 
@@ -187,6 +196,16 @@ function _wrapped_forces(calc::WrappedSiteCalculator, sys::AbstractSystem)
    return -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
 end
 
+# Compute virial tensor from edge gradients
+function _compute_virial(G::ET.ETGraph, ∂G)
+   # V = -∑ (∂E/∂𝐫ij) ⊗ 𝐫ij
+   V = zeros(SMatrix{3,3,Float64,9})
+   for (edge, ∂edge) in zip(G.edge_data, ∂G.edge_data)
+      V -= ∂edge.𝐫 * edge.𝐫'
+   end
+   return V
+end
+
 function _wrapped_virial(calc::WrappedSiteCalculator, sys::AbstractSystem)
    G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
    ∂G = site_energy_grads(calc.model, G, nothing, nothing)
@@ -247,135 +266,37 @@ end
 
 
 # ============================================================================
-#  ETACEPotential - Standalone calculator for ETACE models
+#  ETACEPotential - Type alias for WrappedSiteCalculator{WrappedETACE}
 # ============================================================================
 
 """
     ETACEPotential
 
 AtomsCalculators-compatible calculator wrapping an ETACE model.
+This is a type alias for `WrappedSiteCalculator{<:WrappedETACE}`.
 
-# Fields
-- `model::ETACE` - The ETACE model
-- `ps` - Model parameters
-- `st` - Model state
-- `rcut::Float64` - Cutoff radius in Ångström
-- `co_ps` - Optional committee parameters for uncertainty quantification
-"""
-mutable struct ETACEPotential{MOD<:ETACE, T}
-   model::MOD
-   ps::T
-   st::NamedTuple
-   rcut::Float64
-   co_ps::Any
-end
-
-# Constructor without committee parameters
-function ETACEPotential(model::ETACE, ps, st, rcut::Real)
-   return ETACEPotential(model, ps, st, Float64(rcut), nothing)
-end
-
-# Cutoff radius accessor
-cutoff_radius(calc::ETACEPotential) = calc.rcut * u"Å"
-
-# ============================================================================
-#  Internal evaluation functions
-# ============================================================================
-
-function _compute_virial(G::ET.ETGraph, ∂G)
-   # V = -∑ (∂E/∂𝐫ij) ⊗ 𝐫ij
-   V = zeros(SMatrix{3,3,Float64,9})
-   for (edge, ∂edge) in zip(G.edge_data, ∂G.edge_data)
-      V -= ∂edge.𝐫 * edge.𝐫'
-   end
-   return V
-end
-
-# ============================================================================
-#  Core Evaluation Helpers (shared by ETACEPotential and WrappedSiteCalculator)
-# ============================================================================
-
-"""
-    _core_site_energies(model::ETACE, G::ET.ETGraph, ps, st)
+Access underlying components via:
+- `calc.model` - The WrappedETACE wrapper
+- `calc.model.model` - The ETACE model
+- `calc.model.ps` - Model parameters
+- `calc.model.st` - Model state
+- `calc.rcut` - Cutoff radius in Ångström
+- `calc.model.co_ps` - Committee parameters (optional)
 
-Core site energy computation: forward pass through ETACE model.
-Returns per-site energies (vector of length nnodes(G)).
-"""
-function _core_site_energies(model::ETACE, G::ET.ETGraph, ps, st)
-   Ei, _ = model(G, ps, st)
-   return Ei
-end
-
-"""
-    _core_site_grads(model::ETACE, G::ET.ETGraph, ps, st)
-
-Core site gradient computation: backward pass for forces/virial.
-Returns named tuple with edge_data containing gradient vectors.
+# Example
+```julia
+calc = ETACEPotential(et_model, ps, st, 5.5)
+E = potential_energy(sys, calc)
+```
 """
-function _core_site_grads(model::ETACE, G::ET.ETGraph, ps, st)
-   return site_grads(model, G, ps, st)
-end
-
-function _evaluate_energy(calc::ETACEPotential, sys::AbstractSystem)
-   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   Ei = _core_site_energies(calc.model, G, calc.ps, calc.st)
-   return sum(Ei)
-end
-
-function _evaluate_forces(calc::ETACEPotential, sys::AbstractSystem)
-   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   ∂G = _core_site_grads(calc.model, G, calc.ps, calc.st)
-   # Note: forces_from_edge_grads returns +∇E, we need -∇E for forces
-   return -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
-end
-
-function _evaluate_virial(calc::ETACEPotential, sys::AbstractSystem)
-   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   ∂G = _core_site_grads(calc.model, G, calc.ps, calc.st)
-   return _compute_virial(G, ∂G)
-end
+const ETACEPotential{MOD<:ETACE, PS, ST} = WrappedSiteCalculator{WrappedETACE{MOD, PS, ST}}
 
-function _energy_forces_virial(calc::ETACEPotential, sys::AbstractSystem)
-   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   Ei = _core_site_energies(calc.model, G, calc.ps, calc.st)
-   ∂G = _core_site_grads(calc.model, G, calc.ps, calc.st)
-   return (
-      energy = sum(Ei),
-      forces = -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data),
-      virial = _compute_virial(G, ∂G)
-   )
-end
-
-# ============================================================================
-#  AtomsCalculators interface
-# ============================================================================
-
-AtomsCalculators.@generate_interface function AtomsCalculators.potential_energy(
-      sys::AbstractSystem, calc::ETACEPotential; kwargs...)
-   return _evaluate_energy(calc, sys) * u"eV"
-end
-
-AtomsCalculators.@generate_interface function AtomsCalculators.forces(
-      sys::AbstractSystem, calc::ETACEPotential; kwargs...)
-   return _evaluate_forces(calc, sys) .* u"eV/Å"
-end
-
-AtomsCalculators.@generate_interface function AtomsCalculators.virial(
-      sys::AbstractSystem, calc::ETACEPotential; kwargs...)
-   return _evaluate_virial(calc, sys) * u"eV"
-end
-
-function AtomsCalculators.energy_forces_virial(
-      sys::AbstractSystem, calc::ETACEPotential; kwargs...)
-   efv = _energy_forces_virial(calc, sys)
-   return (
-      energy = efv.energy * u"eV",
-      forces = efv.forces .* u"eV/Å",
-      virial = efv.virial * u"eV"
-   )
+# Constructor: creates WrappedSiteCalculator wrapping WrappedETACE
+function ETACEPotential(model::ETACE, ps, st, rcut::Real)
+   wrapped = WrappedETACE(model, ps, st, rcut)
+   return WrappedSiteCalculator(wrapped, Float64(rcut))
 end
 
-
 # ============================================================================
 #  Training Assembly Interface
 # ============================================================================
@@ -391,14 +312,20 @@ end
 # Force basis:  F_atom = -∑ edges ∂E/∂r_edge, computed per basis function
 # Virial basis: V = -∑ edges (∂E/∂r_edge) ⊗ r_edge, computed per basis function
 
+# Accessor helpers for ETACEPotential (which is WrappedSiteCalculator{WrappedETACE})
+_etace(calc::ETACEPotential) = calc.model.model      # Underlying ETACE model
+_ps(calc::ETACEPotential) = calc.model.ps            # Model parameters
+_st(calc::ETACEPotential) = calc.model.st            # Model state
+
 """
     length_basis(calc::ETACEPotential)
 
 Return the number of linear parameters in the model (nbasis * nspecies).
 """
 function length_basis(calc::ETACEPotential)
-   nbasis = calc.model.readout.in_dim
-   nspecies = calc.model.readout.ncat
+   etace = _etace(calc)
+   nbasis = etace.readout.in_dim
+   nspecies = etace.readout.ncat
    return nbasis * nspecies
 end
 
@@ -422,21 +349,22 @@ The linear combination of basis values with parameters gives:
 """
 function energy_forces_virial_basis(sys::AbstractSystem, calc::ETACEPotential)
    G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   etace = _etace(calc)
 
    # Get basis and jacobian
    # 𝔹: (nnodes, nbasis) - basis values per site (Float64)
    # ∂𝔹: (maxneigs, nnodes, nbasis) - directional derivatives (VState objects)
-   𝔹, ∂𝔹 = site_basis_jacobian(calc.model, G, calc.ps, calc.st)
+   𝔹, ∂𝔹 = site_basis_jacobian(etace, G, _ps(calc), _st(calc))
 
    natoms = length(sys)
    nnodes = size(𝔹, 1)
-   nbasis = calc.model.readout.in_dim
-   nspecies = calc.model.readout.ncat
+   nbasis = etace.readout.in_dim
+   nspecies = etace.readout.ncat
    nparams = nbasis * nspecies
    maxneigs = size(∂𝔹, 1)
 
    # Species indices for each node
-   iZ = calc.model.readout.selector.(G.node_data)
+   iZ = etace.readout.selector.(G.node_data)
 
    # Initialize outputs
    E_basis = zeros(nparams)
@@ -508,16 +436,17 @@ Compute only the energy basis (faster when forces/virial not needed).
 """
 function potential_energy_basis(sys::AbstractSystem, calc::ETACEPotential)
    G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   etace = _etace(calc)
 
    # Get basis values
-   𝔹 = site_basis(calc.model, G, calc.ps, calc.st)
+   𝔹 = site_basis(etace, G, _ps(calc), _st(calc))
 
-   nbasis = calc.model.readout.in_dim
-   nspecies = calc.model.readout.ncat
+   nbasis = etace.readout.in_dim
+   nspecies = etace.readout.ncat
    nparams = nbasis * nspecies
 
    # Species indices for each node
-   iZ = calc.model.readout.selector.(G.node_data)
+   iZ = etace.readout.selector.(G.node_data)
 
    # Compute energy basis
    E_basis = zeros(nparams)
@@ -542,7 +471,7 @@ Extract the linear parameters (readout weights) as a flat vector.
 Parameters are ordered as: [W[1,:,1]; W[1,:,2]; ... ; W[1,:,nspecies]]
 """
 function get_linear_parameters(calc::ETACEPotential)
-   return vec(calc.ps.readout.W)
+   return vec(_ps(calc).readout.W)
 end
 
 """
@@ -551,13 +480,15 @@ end
 Set the linear parameters (readout weights) from a flat vector.
 """
 function set_linear_parameters!(calc::ETACEPotential, θ::AbstractVector)
-   nbasis = calc.model.readout.in_dim
-   nspecies = calc.model.readout.ncat
+   etace = _etace(calc)
+   nbasis = etace.readout.in_dim
+   nspecies = etace.readout.ncat
    @assert length(θ) == nbasis * nspecies
 
-   # Reshape and copy into ps
+   # Reshape and copy into ps (via the WrappedETACE which is mutable)
+   ps = _ps(calc)
    new_W = reshape(θ, 1, nbasis, nspecies)
-   calc.ps = merge(calc.ps, (readout = merge(calc.ps.readout, (W = new_W,)),))
+   calc.model.ps = merge(ps, (readout = merge(ps.readout, (W = new_W,)),))
    return calc
 end
 
diff --git a/test/et_models/test_et_calculators.jl b/test/et_models/test_et_calculators.jl
index f9002b2f2..e1ccfbd8a 100644
--- a/test/et_models/test_et_calculators.jl
+++ b/test/et_models/test_et_calculators.jl
@@ -77,11 +77,13 @@ end
 @info("Testing ETACEPotential construction")
 
 # Create calculator from ETACE model
+# ETACEPotential is now WrappedSiteCalculator{WrappedETACE}
 et_calc = ETM.ETACEPotential(et_model, et_ps, et_st, rcut)
 
-@test et_calc.model === et_model
+# Access underlying ETACE via calc.model.model (calc.model is WrappedETACE)
+@test et_calc.model.model === et_model
 @test et_calc.rcut == rcut
-@test et_calc.co_ps === nothing
+@test et_calc.model.co_ps === nothing
 println("ETACEPotential construction: OK")
 
 ##

From 491b7ba8555a8f9f343d44793cbdd321fa993024 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 30 Dec 2025 21:19:40 +0000
Subject: [PATCH 14/35] Update development plan: unified architecture (remove
 E0Model)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove duplicate E0Model in favor of upstream ETOneBody
- Unify WrappedSiteCalculator to work with all ETACE-pattern models directly
- Document that ETACE, ETPairModel, ETOneBody share identical interface
- Plan Phase 6 refactoring to eliminate WrappedETACE indirection
- Update architecture diagrams showing target unified structure

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 docs/plans/et_calculators_plan.md | 535 +++++++++++++++++++++++++-----
 1 file changed, 447 insertions(+), 88 deletions(-)

diff --git a/docs/plans/et_calculators_plan.md b/docs/plans/et_calculators_plan.md
index 342800354..dfe7fd3d3 100644
--- a/docs/plans/et_calculators_plan.md
+++ b/docs/plans/et_calculators_plan.md
@@ -4,9 +4,9 @@
 
 Create calculator wrappers and training assembly for the new ETACE backend, integrating with EquivariantTensors.jl.
 
-**Status**: ✅ Core implementation complete. Awaiting maintainer for E0/PairModel.
+**Status**: 🔄 Refactoring to unified architecture - remove duplicate E0Model, use upstream models directly.
 
-**Branch**: `jrk/etcalculators` (based on `acesuit/co/etback`)
+**Branch**: `jrk/etcalculators` (rebased on `acesuit/co/etback` including `co/etpair` merge)
 
 ---
 
@@ -15,11 +15,36 @@ Create calculator wrappers and training assembly for the new ETACE backend, inte
 | Phase | Description | Status |
 |-------|-------------|--------|
 | Phase 1 | ETACEPotential with AtomsCalculators interface | ✅ Complete |
-| Phase 2 | WrappedSiteCalculator + StackedCalculator | ✅ Complete |
-| Phase 3 | E0Model + PairModel | 🔄 Maintainer will implement |
-| Phase 5 | Training assembly functions | ✅ Complete |
+| Phase 2 | WrappedSiteCalculator + StackedCalculator | 🔄 Refactoring |
+| Phase 3 | E0Model + PairModel | ✅ Upstream (ETOneBody, ETPairModel, convertpair) |
+| Phase 5 | Training assembly functions | ✅ Complete (many-body only) |
+| Phase 6 | Full model integration | 🔄 In Progress |
 | Benchmarks | Performance comparison scripts | ✅ Complete |
 
+### Key Design Decision: Unified Architecture
+
+**All upstream ETACE-pattern models share the same interface:**
+
+| Method | ETACE | ETPairModel | ETOneBody |
+|--------|-------|-------------|-----------|
+| `model(G, ps, st)` | site energies | site energies | site energies |
+| `site_grads(model, G, ps, st)` | edge gradients | edge gradients | zero gradients |
+| `site_basis(model, G, ps, st)` | basis matrix | basis matrix | empty |
+| `site_basis_jacobian(model, G, ps, st)` | (basis, jac) | (basis, jac) | (empty, empty) |
+
+This enables a **unified `WrappedSiteCalculator`** that works with all three model types directly, eliminating the need for multiple wrapper types.
+
+### Current Limitations
+
+**ETACE currently only implements the many-body basis, not pair potential or reference energies.**
+
+In the integration test (`test/et_models/test_et_silicon.jl`), we compare ETACE against ACE with `Wpair=0` (pair disabled) because:
+- `convert2et(model)` converts only the many-body basis
+- `convertpair(model)` converts the pair potential separately (not yet integrated)
+- Reference energies (E0/Vref) need separate handling via `ETOneBody`
+
+Full model conversion will require combining all three components via `StackedCalculator`.
+
 ### Benchmark Results
 
 **Energy (test/benchmark_comparison.jl)**:
@@ -40,125 +65,349 @@ Create calculator wrappers and training assembly for the new ETACE backend, inte
 
 ---
 
-## Files Created/Modified
+## Phase 3: Upstream Implementation (Now Complete)
 
-### New Files
-- `src/et_models/et_calculators.jl` - ETACEPotential, WrappedSiteCalculator, WrappedETACE, training assembly
-- `src/et_models/stackedcalc.jl` - StackedCalculator with @generated loop unrolling
-- `test/et_models/test_et_calculators.jl` - Comprehensive tests
-- `test/benchmark_comparison.jl` - Energy benchmarks (CPU + GPU)
-- `test/benchmark_forces.jl` - Forces benchmarks (CPU)
+The maintainer has implemented E0/PairModel in the `co/etback` branch (merged via PR #316):
 
-### Modified Files
-- `src/et_models/et_models.jl` - Added includes for new files
-- `test/Project.toml` - Updated EquivariantTensors compat to 0.4
+### New Files from Upstream
 
----
+1. **`src/et_models/onebody.jl`** - `ETOneBody` one-body energy model
+2. **`src/et_models/et_pair.jl`** - `ETPairModel` pair potential
+3. **`src/et_models/et_envbranch.jl`** - Environment branch layer utilities
+4. **`test/etmodels/test_etonebody.jl`** - OneBody tests
+5. **`test/etmodels/test_etpair.jl`** - Pair potential tests
 
-## Implementation Details
+### Upstream Interface Pattern
+
+The upstream models implement the **ETACE interface** (different from our SiteEnergyModel):
+
+```julia
+# Upstream interface (ETACE pattern):
+model(G, ps, st)                    # Returns (site_energies, st)
+site_grads(model, G, ps, st)        # Returns edge gradient array
+site_basis(model, G, ps, st)        # Returns basis matrix
+site_basis_jacobian(model, G, ps, st)  # Returns (basis, jacobian)
+```
 
-### ETACEPotential (`et_calculators.jl`)
+```julia
+# Our interface (SiteEnergyModel pattern):
+site_energies(model, G, ps, st)     # Returns site energies vector
+site_energy_grads(model, G, ps, st) # Returns (edge_data = [...],) named tuple
+cutoff_radius(model)                # Returns Float64 in Ångström
+```
 
-Standalone calculator wrapping ETACE with full AtomsCalculators interface:
+### `ETOneBody` Details (`onebody.jl`)
 
 ```julia
-mutable struct ETACEPotential{MOD<:ETACE, T} <: SitePotential
-   model::MOD
-   ps::T
-   st::NamedTuple
-   rcut::Float64
-   co_ps::Any  # optional committee parameters
+struct ETOneBody{NZ, T, CAT, TSEL} <: AbstractLuxLayer
+   E0s::SVector{NZ, T}        # Reference energies per species
+   categories::SVector{NZ, CAT}
+   selector::TSEL             # Maps atom state to species index
 end
+
+# Constructor from Dict
+one_body(D::Dict, catfun) -> ETOneBody
+
+# Interface implementation
+(l::ETOneBody)(X::ETGraph, ps, st)              # Returns site energies
+site_grads(l::ETOneBody, X, ps, st)             # Returns zeros (constant energy)
+site_basis(l::ETOneBody, X, ps, st)             # Returns empty (0 basis functions)
+site_basis_jacobian(l::ETOneBody, X, ps, st)    # Returns empty
 ```
 
-Implements:
-- `potential_energy(sys, calc)`
-- `forces(sys, calc)`
-- `virial(sys, calc)`
-- `energy_forces_virial(sys, calc)`
+Key design decisions:
+- E0s stored in **state** (`st.E0s`) for float type conversion (Float32/Float64)
+- Uses `SVector` for GPU compatibility
+- Returns `fill(VState(), ...)` for zero gradients (maintains edge structure)
+- Returns `(nnodes, 0)` sized arrays for basis (no learnable parameters)
+
+### `ETPairModel` Details (`et_pair.jl`)
+
+```julia
+@concrete struct ETPairModel <: AbstractLuxContainerLayer{(:rembed, :readout)}
+   rembed     # Radial embedding layer (basis)
+   readout    # SelectLinL readout layer
+end
+
+# Interface implementation
+(l::ETPairModel)(X::ETGraph, ps, st)            # Returns site energies
+site_grads(l::ETPairModel, X, ps, st)           # Zygote gradient
+site_basis(l::ETPairModel, X, ps, st)           # Sum over neighbor radial basis
+site_basis_jacobian(l::ETPairModel, X, ps, st)  # Uses ET.evaluate_ed
+```
 
-### WrappedSiteCalculator (`et_calculators.jl`)
+Key design decisions:
+- **Owns its own `ps`/`st`** (Option A from original plan)
+- Uses ET-native implementation (Option B from original plan)
+- Radial basis: `𝔹 = sum(Rnl, dims=1)` - sums radial embeddings over neighbors
+- GPU-compatible via ET's existing kernels
 
-Generic wrapper for models implementing site energy interface:
+### Model Conversion (`convert.jl`)
 
 ```julia
-struct WrappedSiteCalculator{M}
+convertpair(model::ACEModel) -> ETPairModel
+```
+
+Converts ACEModel's pair potential component to ETPairModel:
+- Extracts radial basis parameters
+- Creates `EnvRBranchL` envelope layer
+- Sets up species-pair `SelectLinL` readout
+
+---
+
+## Refactoring Plan: Unified Architecture
+
+### Motivation
+
+The current implementation has **duplicate functionality**:
+- Our `E0Model` duplicates upstream `ETOneBody`
+- Multiple wrapper types (`WrappedETACE`, planned `WrappedETPairModel`, `WrappedETOneBody`) all do the same thing
+
+Since all upstream models share the same interface, we can **unify to a single `WrappedSiteCalculator`**.
+
+### Changes Required
+
+#### 1. Remove `E0Model` (BREAKING)
+
+Delete the `E0Model` struct and related functions. Users should migrate to:
+
+```julia
+# Old (our E0Model):
+E0 = E0Model(Dict(:Si => -0.846, :O => -2.15))
+calc = WrappedSiteCalculator(E0, 5.5)
+
+# New (upstream ETOneBody):
+et_onebody = ETM.one_body(Dict(:Si => -0.846, :O => -2.15), x -> x.z)
+_, st = Lux.setup(rng, et_onebody)
+calc = WrappedSiteCalculator(et_onebody, nothing, st, 3.0)  # rcut=3.0 minimum for graph
+```
+
+#### 2. Unify `WrappedSiteCalculator`
+
+Refactor to store `ps` and `st` and work with ETACE-pattern models directly:
+
+```julia
+"""
+    WrappedSiteCalculator{M, PS, ST}
+
+Wraps any ETACE-pattern model (ETACE, ETPairModel, ETOneBody) and provides
+the AtomsCalculators interface.
+
+All wrapped models must implement:
+- `model(G, ps, st)` → `(site_energies, st)`
+- `site_grads(model, G, ps, st)` → edge gradients
+
+# Fields
+- `model` - ETACE-pattern model (ETACE, ETPairModel, or ETOneBody)
+- `ps` - Model parameters (can be `nothing` for ETOneBody)
+- `st` - Model state
+- `rcut::Float64` - Cutoff radius for graph construction (Å)
+"""
+mutable struct WrappedSiteCalculator{M, PS, ST}
    model::M
+   ps::PS
+   st::ST
+   rcut::Float64
+end
+
+# Convenience constructor with automatic cutoff
+function WrappedSiteCalculator(model, ps, st)
+   rcut = _model_cutoff(model, ps, st)
+   return WrappedSiteCalculator(model, ps, st, max(rcut, 3.0))
 end
+
+# Cutoff extraction (type-specific)
+_model_cutoff(::ETOneBody, ps, st) = 0.0
+_model_cutoff(model::ETPairModel, ps, st) = _extract_rcut_from_rembed(model.rembed)
+_model_cutoff(model::ETACE, ps, st) = _extract_rcut_from_rembed(model.rembed)
+# Fallback: require explicit rcut
 ```
 
-Site energy interface:
-- `site_energies(model, G, ps, st) -> Vector`
-- `site_energy_grads(model, G, ps, st) -> (edge_data = [...],)`
-- `cutoff_radius(model) -> Unitful.Length`
+#### 3. Remove `WrappedETACE`
 
-### StackedCalculator (`stackedcalc.jl`)
+The functionality moves into `WrappedSiteCalculator`:
 
-Combines multiple AtomsCalculators using @generated functions for type-stable loop unrolling:
+```julia
+# Old (with WrappedETACE):
+wrapped = WrappedETACE(et_model, ps, st, rcut)
+calc = WrappedSiteCalculator(wrapped, rcut)
+
+# New (direct):
+calc = WrappedSiteCalculator(et_model, ps, st, rcut)
+```
+
+#### 4. Update `ETACEPotential` Type Alias
 
 ```julia
-struct StackedCalculator{N, C<:Tuple}
-   calcs::C
+# Old:
+const ETACEPotential{MOD, PS, ST} = WrappedSiteCalculator{WrappedETACE{MOD, PS, ST}}
+
+# New:
+const ETACEPotential{MOD<:ETACE, PS, ST} = WrappedSiteCalculator{MOD, PS, ST}
+```
+
+#### 5. Unified Energy/Force/Virial Implementation
+
+```julia
+function _wrapped_energy(calc::WrappedSiteCalculator, sys::AbstractSystem)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   Ei, _ = calc.model(G, calc.ps, calc.st)
+   return sum(Ei)
 end
 
-@generated function _stacked_energy(sys::AbstractSystem, calc::StackedCalculator{N}) where {N}
-   # Generates: E_1 + E_2 + ... + E_N at compile time
+function _wrapped_forces(calc::WrappedSiteCalculator, sys::AbstractSystem)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   ∂G = site_grads(calc.model, G, calc.ps, calc.st)
+   if isempty(∂G.edge_data)
+      return zeros(SVector{3, Float64}, length(sys))
+   end
+   return -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
 end
 ```
 
-### Training Assembly (`et_calculators.jl`)
+### Benefits of Unified Architecture
 
-Functions for linear least squares fitting:
+1. **No code duplication** - Single wrapper handles all model types
+2. **Use upstream directly** - `ETOneBody`, `ETPairModel` work out-of-the-box
+3. **GPU-compatible** - Upstream models use `SVector` for efficient GPU ops
+4. **Simpler mental model** - One wrapper type, one interface
+5. **Easier testing** - Test interface once, works for all models
 
-- `length_basis(calc)` - Total number of linear parameters
-- `get_linear_parameters(calc)` - Extract parameter vector
-- `set_linear_parameters!(calc, θ)` - Set parameters from vector
-- `potential_energy_basis(sys, calc)` - Energy design matrix row
-- `energy_forces_virial_basis(sys, calc)` - Full design matrix row
+### Migration Path
 
----
+| Old | New |
+|-----|-----|
+| `E0Model(Dict(:Si => -0.846))` | `ETM.one_body(Dict(:Si => -0.846), x -> x.z)` |
+| `WrappedETACE(model, ps, st, rcut)` | `WrappedSiteCalculator(model, ps, st, rcut)` |
+| `WrappedSiteCalculator(E0Model(...))` | `WrappedSiteCalculator(ETOneBody(...), nothing, st)` |
 
-## Maintainer Decisions (Phase 3)
+### Backward Compatibility
 
-**Q2: Parameter ownership** → **Option A**: PairModel owns its own `ps`/`st`
+For a transition period, we could keep `E0Model` as a deprecated alias:
 
-**Q3: Implementation approach** → **Option B**: Create new ET-native pair implementation
-- Native GPU support
-- Consistent with ETACE architecture
+```julia
+@deprecate E0Model(d::Dict) begin
+   et = one_body(d, x -> x.z)
+   _, st = Lux.setup(Random.default_rng(), et)
+   (model=et, ps=nothing, st=st)
+end
+```
 
-Maintainer will implement E0Model and PairModel given their ACE experience.
+However, since this is internal API on a feature branch, clean removal is preferred.
 
 ---
 
-## Current State (Already Implemented)
+## Files Created/Modified
 
-### In ACEpotentials (`src/et_models/`)
+### Our Branch (jrk/etcalculators)
+- `src/et_models/et_calculators.jl` - WrappedSiteCalculator (unified), ETACEPotential, training assembly
+  - **To Remove**: `E0Model`, `WrappedETACE`, old `SiteEnergyModel` interface
+- `src/et_models/stackedcalc.jl` - StackedCalculator with @generated loop unrolling
+- `test/et_models/test_et_calculators.jl` - Comprehensive unit tests
+  - **To Update**: Remove E0Model tests, update WrappedSiteCalculator signature
+- `test/et_models/test_et_silicon.jl` - Integration test (compares many-body only)
+- `benchmark/benchmark_comparison.jl` - Energy benchmarks (CPU + GPU)
+- `benchmark/benchmark_forces.jl` - Forces benchmarks (CPU)
+
+### Upstream (now merged via co/etpair)
+- `src/et_models/onebody.jl` - `ETOneBody` Lux layer with `one_body()` constructor (**replaces our E0Model**)
+- `src/et_models/et_pair.jl` - `ETPairModel` Lux layer with site_basis/jacobian
+- `src/et_models/et_envbranch.jl` - `EnvRBranchL` for envelope × radial basis
+- `src/et_models/convert.jl` - Added `convertpair()`, envelope conversion utilities
+- `test/etmodels/test_etonebody.jl` - OneBody tests
+- `test/etmodels/test_etpair.jl` - Pair model tests (shows parameter copying pattern)
+- `test/etmodels/test_etbackend.jl` - General ET backend tests
+
+### Modified Files
+- `src/et_models/et_models.jl` - Includes for all new files
+- `docs/src/all_exported.md` - Added ETModels to autodocs
+
+---
+
+## Implementation Details
+
+### Current Architecture (to be refactored)
+
+The current implementation uses nested wrappers:
+```
+StackedCalculator
+├── WrappedSiteCalculator{E0Model}           # Our duplicate (TO REMOVE)
+├── WrappedSiteCalculator{WrappedETACE}      # Extra indirection (TO REMOVE)
+```
+
+### Target Architecture (unified)
+
+After refactoring, use upstream models directly:
+```
+StackedCalculator
+├── WrappedSiteCalculator{ETOneBody}         # Upstream one-body
+├── WrappedSiteCalculator{ETPairModel}       # Upstream pair
+└── WrappedSiteCalculator{ETACE}             # Upstream many-body
+```
+
+### WrappedSiteCalculator (`et_calculators.jl`) - TARGET
+
+Unified wrapper for any ETACE-pattern model:
+
+```julia
+mutable struct WrappedSiteCalculator{M, PS, ST}
+   model::M      # ETACE, ETPairModel, or ETOneBody
+   ps::PS        # Parameters (nothing for ETOneBody)
+   st::ST        # State
+   rcut::Float64 # Cutoff for graph construction
+end
+
+# All ETACE-pattern models have identical interface:
+function _wrapped_energy(calc::WrappedSiteCalculator, sys)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   Ei, _ = calc.model(G, calc.ps, calc.st)  # Works for all model types!
+   return sum(Ei)
+end
+
+function _wrapped_forces(calc::WrappedSiteCalculator, sys)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   ∂G = site_grads(calc.model, G, calc.ps, calc.st)  # Works for all model types!
+   return -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
+end
+```
+
+### ETACEPotential Type Alias - TARGET
 
-**ETACE struct** (`et_ace.jl:11-16`):
 ```julia
-@concrete struct ETACE <: AbstractLuxContainerLayer{(:rembed, :yembed, :basis, :readout)}
-   rembed     # radial embedding layer
-   yembed     # angular embedding layer
-   basis      # many-body basis layer
-   readout    # selectlinl readout layer
+const ETACEPotential{MOD<:ETACE, PS, ST} = WrappedSiteCalculator{MOD, PS, ST}
+
+function ETACEPotential(model::ETACE, ps, st, rcut::Real)
+   return WrappedSiteCalculator(model, ps, st, Float64(rcut))
+end
+```
+
+### StackedCalculator (`stackedcalc.jl`)
+
+Combines multiple AtomsCalculators using @generated functions for type-stable loop unrolling:
+
+```julia
+struct StackedCalculator{N, C<:Tuple}
+   calcs::C
+end
+
+@generated function _stacked_energy(sys::AbstractSystem, calc::StackedCalculator{N}) where {N}
+   # Generates: E_1 + E_2 + ... + E_N at compile time
 end
 ```
 
-**Core functions** (`et_ace.jl`):
-- ✅ `(l::ETACE)(X::ETGraph, ps, st)` - forward evaluation, returns site energies
-- ✅ `site_grads(l::ETACE, X::ETGraph, ps, st)` - Zygote gradient for forces
-- ✅ `site_basis(l::ETACE, X::ETGraph, ps, st)` - basis values per site
-- ✅ `site_basis_jacobian(l::ETACE, X::ETGraph, ps, st)` - basis + jacobians
+### Training Assembly (`et_calculators.jl`)
 
-**Model conversion** (`convert.jl`):
-- ✅ `convert2et(model::ACEModel)` - full conversion from ACEModel to ETACE
+Functions for linear least squares fitting:
 
-### In EquivariantTensors.jl (v0.4.0)
+- `length_basis(calc)` - Total number of linear parameters
+- `get_linear_parameters(calc)` - Extract parameter vector
+- `set_linear_parameters!(calc, θ)` - Set parameters from vector
+- `potential_energy_basis(sys, calc)` - Energy design matrix row
+- `energy_forces_virial_basis(sys, calc)` - Full design matrix row
 
-**Atoms extension** (`ext/NeighbourListsExt.jl`):
-- ✅ `ET.Atoms.interaction_graph(sys, rcut)` - ETGraph from AtomsBase system
-- ✅ `ET.Atoms.forces_from_edge_grads(sys, G, ∇E_edges)` - edge gradients to atomic forces
-- ✅ `ET.rev_reshape_embedding` - neighbor-indexed to edge-indexed conversion
+**Note**: Training assembly currently only works with `ETACE` (many-body).
+Extension to `ETPairModel` will use the same `site_basis_jacobian` interface.
+`ETOneBody` has no learnable parameters (empty basis).
 
 ---
 
@@ -177,26 +426,133 @@ Tests in `test/et_models/test_et_calculators.jl`:
 9. ✅ Training assembly: potential_energy_basis
 10. ✅ Training assembly: energy_forces_virial_basis
 
+Upstream tests in `test/etmodels/`:
+- ✅ `test_etonebody.jl` - ETOneBody evaluation and gradients
+- ✅ `test_etpair.jl` - ETPairModel evaluation, gradients, basis, jacobian
+
 ---
 
 ## Remaining Work
 
-### For Maintainer (Phase 3)
+### Phase 6: Unified Architecture Refactoring
+
+**Goal**: Simplify codebase by using upstream models directly with unified `WrappedSiteCalculator`.
+
+#### 6.1 Refactor `WrappedSiteCalculator` (et_calculators.jl)
+
+1. Change struct to store `ps` and `st`:
+   ```julia
+   mutable struct WrappedSiteCalculator{M, PS, ST}
+      model::M
+      ps::PS
+      st::ST
+      rcut::Float64
+   end
+   ```
+
+2. Update `_wrapped_energy`, `_wrapped_forces`, `_wrapped_virial` to call ETACE interface directly
+
+3. Add cutoff extraction helpers:
+   ```julia
+   _model_cutoff(::ETOneBody, ps, st) = 0.0
+   _model_cutoff(model::ETPairModel, ps, st) = ...  # extract from rembed
+   _model_cutoff(model::ETACE, ps, st) = ...  # extract from rembed
+   ```
+
+#### 6.2 Remove Redundant Code
+
+1. **Delete `E0Model`** - replaced by upstream `ETOneBody`
+2. **Delete `WrappedETACE`** - functionality merged into `WrappedSiteCalculator`
+3. **Remove old SiteEnergyModel interface** - use ETACE interface directly
+
+#### 6.3 Update `ETACEPotential` Type Alias
+
+```julia
+const ETACEPotential{MOD<:ETACE, PS, ST} = WrappedSiteCalculator{MOD, PS, ST}
+
+function ETACEPotential(model::ETACE, ps, st, rcut::Real)
+   return WrappedSiteCalculator(model, ps, st, Float64(rcut))
+end
+```
+
+#### 6.4 Full Model Conversion Function
+
+```julia
+"""
+    convert2et_full(model::ACEModel, ps, st; rng=Random.default_rng()) -> StackedCalculator
+
+Convert a complete ACE model (E0 + Pair + Many-body) to an ETACE calculator.
+Returns a StackedCalculator combining ETOneBody, ETPairModel, and ETACE.
+"""
+function convert2et_full(model, ps, st; rng=Random.default_rng())
+   rcut = maximum(a.rcut for a in model.pairbasis.rin0cuts)
+
+   # 1. Convert E0/Vref to ETOneBody
+   E0s = model.Vref.E0  # Dict{Int, Float64}
+   zlist = ChemicalSpecies.(model.rbasis._i2z)
+   E0_dict = Dict(z => E0s[z.number] for z in zlist)
+   et_onebody = one_body(E0_dict, x -> x.z)
+   _, onebody_st = Lux.setup(rng, et_onebody)
+   onebody_calc = WrappedSiteCalculator(et_onebody, nothing, onebody_st, 3.0)
+
+   # 2. Convert pair potential to ETPairModel
+   et_pair = convertpair(model)
+   et_pair_ps, et_pair_st = Lux.setup(rng, et_pair)
+   _copy_pair_params!(et_pair_ps, ps, model)
+   pair_calc = WrappedSiteCalculator(et_pair, et_pair_ps, et_pair_st, rcut)
+
+   # 3. Convert many-body to ETACE
+   et_ace = convert2et(model)
+   et_ace_ps, et_ace_st = Lux.setup(rng, et_ace)
+   _copy_ace_params!(et_ace_ps, ps, model)
+   ace_calc = WrappedSiteCalculator(et_ace, et_ace_ps, et_ace_st, rcut)
+
+   # 4. Stack all components
+   return StackedCalculator((onebody_calc, pair_calc, ace_calc))
+end
+```
+
+#### 6.5 Parameter Copying Utilities
+
+From `test/etmodels/test_etpair.jl`, pair parameter copying for multi-species:
+```julia
+function _copy_pair_params!(et_ps, ps, model)
+   NZ = length(model.rbasis._i2z)
+   for i in 1:NZ, j in 1:NZ
+      idx = (i-1)*NZ + j
+      et_ps.rembed.rbasis.post.W[:, :, idx] = ps.pairbasis.Wnlq[:, :, i, j]
+   end
+   for s in 1:NZ
+      et_ps.readout.W[1, :, s] .= ps.Wpair[:, s]
+   end
+end
+```
+
+#### 6.6 Update Tests
+
+1. Update `test/et_models/test_et_calculators.jl`:
+   - Remove `E0Model` tests
+   - Add `ETOneBody` integration tests
+   - Update `WrappedSiteCalculator` tests for new signature
+
+2. Update `test/et_models/test_et_silicon.jl`:
+   - Use `ETOneBody` instead of `E0Model` if testing E0
+
+#### 6.7 Training Assembly Updates
 
-1. **E0Model**: One-body reference energies
-   - Store E0s in state for float type conversion
-   - Implement site energy interface (zero gradients)
+1. Extend `energy_forces_virial_basis` to work with unified `WrappedSiteCalculator`:
+   - Detect model type and call appropriate `site_basis_jacobian`
+   - Works with `ETACE`, `ETPairModel` (both have `site_basis_jacobian`)
+   - `ETOneBody` returns empty basis (no learnable params)
 
-2. **PairModel**: ET-native pair potential
-   - New implementation using `ET.Atoms` patterns
-   - GPU-compatible
-   - Implement site energy interface
+2. Update `length_basis`, `get_linear_parameters`, `set_linear_parameters!`
 
 ### Future Enhancements
 
-- GPU forces benchmark (requires GPU gradient support)
-- ACEfit.assemble dispatch integration
-- Committee support for ETACEPotential
+- GPU forces benchmark (requires GPU gradient support in ET)
+- ACEfit.assemble dispatch integration for full models
+- Committee support for combined calculators
+- Training assembly for pair model (similar structure to many-body)
 
 ---
 
@@ -206,3 +562,6 @@ Tests in `test/et_models/test_et_calculators.jl`:
 - GPU time nearly constant regardless of system size (~0.5ms)
 - Forces speedup (8-11x) larger than energy speedup (1.5-2.5x) on CPU
 - StackedCalculator uses @generated functions for zero-overhead composition
+- Upstream `ETOneBody` stores E0s in state (`st.E0s`) for float type flexibility (Float32/Float64)
+- All upstream models use `VState` for gradients in `site_grads()` return value
+- `site_grads` returns edge gradients as `∂G` with `.edge_data` field containing `VState` objects

From 389fdd1d16c39c009068022471947883b1f3c231 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 30 Dec 2025 21:24:16 +0000
Subject: [PATCH 15/35] Refactor to unified WrappedSiteCalculator (Phase
 6.1-6.3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Refactor WrappedSiteCalculator to store ps, st, rcut directly
- Remove E0Model (use upstream ETOneBody instead)
- Remove WrappedETACE (functionality merged into WrappedSiteCalculator)
- Remove old SiteEnergyModel interface (site_energies, site_energy_grads)
- Update ETACEPotential to be type alias for WrappedSiteCalculator{ETACE}
- Update training assembly accessors for new flat structure

All ETACE-pattern models (ETACE, ETPairModel, ETOneBody) now work
directly with WrappedSiteCalculator via their common interface:
- model(G, ps, st) -> (site_energies, st)
- site_grads(model, G, ps, st) -> edge gradients

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/et_calculators.jl | 217 ++++++++------------------------
 1 file changed, 55 insertions(+), 162 deletions(-)

diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
index d3c850249..a3d9ed994 100644
--- a/src/et_models/et_calculators.jl
+++ b/src/et_models/et_calculators.jl
@@ -3,11 +3,13 @@
 # Provides AtomsCalculators-compatible energy/forces/virial evaluation
 #
 # Architecture:
-# - SiteEnergyModel interface: Any model producing per-site energies can implement this
-# - E0Model: One-body reference energies (constant per species)
-# - WrappedETACE: Wraps ETACE model with the SiteEnergyModel interface
-# - WrappedSiteCalculator: Converts SiteEnergyModel to AtomsCalculators interface
-# - ETACEPotential: Standalone calculator for simple use cases
+# - WrappedSiteCalculator: Unified wrapper for ETACE-pattern models (ETACE, ETPairModel, ETOneBody)
+# - ETACEPotential: Type alias for WrappedSiteCalculator with ETACE model
+# - StackedCalculator: Combines multiple calculators (see stackedcalc.jl)
+#
+# All wrapped models must implement the ETACE interface:
+#   model(G, ps, st) -> (site_energies, st)
+#   site_grads(model, G, ps, st) -> edge gradients
 #
 # See also: stackedcalc.jl for StackedCalculator (combines multiple calculators)
 
@@ -19,176 +21,69 @@ using StaticArrays
 using Unitful
 using LinearAlgebra: norm
 
-# ============================================================================
-#  SiteEnergyModel Interface
-# ============================================================================
-#
-# Any model producing per-site (per-atom) energies can implement this interface:
-#
-#   site_energies(model, G::ETGraph, ps, st) -> Vector  # per-atom energies
-#   site_energy_grads(model, G::ETGraph, ps, st) -> ∂G  # edge gradients for forces
-#   cutoff_radius(model) -> Float64                     # in Ångström
-#
-# This enables composition via StackedCalculator for:
-# - One-body reference energies (E0Model)
-# - Pairwise interactions (PairModel)
-# - Many-body ACE (WrappedETACE)
-# - Future: dispersion, coulomb, etc.
-
-"""
-    site_energies(model, G, ps, st)
-
-Compute per-site (per-atom) energies for the given interaction graph.
-Returns a vector of length `nnodes(G)`.
-"""
-function site_energies end
-
-"""
-    site_energy_grads(model, G, ps, st)
-
-Compute gradients of site energies w.r.t. edge positions.
-Returns a named tuple with `edge_data` field containing gradient vectors.
-"""
-function site_energy_grads end
-
-"""
-    cutoff_radius(model)
-
-Return the cutoff radius in Ångström for the model.
-"""
-function cutoff_radius end
-
 
 # ============================================================================
-#  E0Model - One-body reference energies
+#  WrappedSiteCalculator - Unified wrapper for ETACE-pattern models
 # ============================================================================
 
 """
-    E0Model{T}
-
-One-body reference energy model. Assigns constant energy per atomic species.
-No forces (energy is position-independent).
+    WrappedSiteCalculator{M, PS, ST}
 
-# Example
-```julia
-E0 = E0Model(Dict(ChemicalSpecies(:Si) => -0.846, ChemicalSpecies(:O) => -2.15))
-```
-"""
-struct E0Model{T<:Real}
-   E0s::Dict{ChemicalSpecies, T}
-end
-
-# Constructor from element symbols
-function E0Model(E0s::Dict{Symbol, T}) where T<:Real
-   return E0Model(Dict(ChemicalSpecies(k) => v for (k, v) in E0s))
-end
+Wraps any ETACE-pattern model (ETACE, ETPairModel, ETOneBody) and provides
+the AtomsCalculators interface.
 
-cutoff_radius(::E0Model) = 0.0  # No neighbors needed
-
-function site_energies(model::E0Model, G::ET.ETGraph, ps, st)
-   T = valtype(model.E0s)
-   return T[model.E0s[node.z] for node in G.node_data]
-end
-
-function site_energy_grads(model::E0Model{T}, G::ET.ETGraph, ps, st) where T
-   # Constant energy → zero gradients
-   zero_grad = PState(𝐫 = zero(SVector{3, T}))
-   return (edge_data = fill(zero_grad, length(G.edge_data)),)
-end
-
-
-# ============================================================================
-#  WrappedETACE - ETACE model with SiteEnergyModel interface
-# ============================================================================
+All wrapped models must implement the ETACE interface:
+- `model(G, ps, st)` → `(site_energies, st)`
+- `site_grads(model, G, ps, st)` → edge gradients
 
-"""
-    WrappedETACE{MOD<:ETACE, PS, ST}
-
-Wraps an ETACE model to implement the SiteEnergyModel interface.
 Mutable to allow parameter updates during training.
 
-# Fields
-- `model::ETACE` - The underlying ETACE model
-- `ps` - Model parameters (mutable for training)
-- `st` - Model state
-- `rcut::Float64` - Cutoff radius in Ångström
-- `co_ps` - Optional committee parameters for uncertainty quantification
-"""
-mutable struct WrappedETACE{MOD<:ETACE, PS, ST}
-   model::MOD
-   ps::PS
-   st::ST
-   rcut::Float64
-   co_ps::Any
-end
-
-# Constructor without committee parameters
-function WrappedETACE(model::ETACE, ps, st, rcut::Real)
-   return WrappedETACE(model, ps, st, Float64(rcut), nothing)
-end
-
-cutoff_radius(w::WrappedETACE) = w.rcut
-
-function site_energies(w::WrappedETACE, G::ET.ETGraph, ps, st)
-   # Use wrapper's ps/st, ignore passed ones (they're for StackedCalculator dispatch)
-   Ei, _ = w.model(G, w.ps, w.st)
-   return Ei
-end
-
-function site_energy_grads(w::WrappedETACE, G::ET.ETGraph, ps, st)
-   return site_grads(w.model, G, w.ps, w.st)
-end
-
-
-# ============================================================================
-#  WrappedSiteCalculator - Converts SiteEnergyModel to AtomsCalculators
-# ============================================================================
-
-"""
-    WrappedSiteCalculator{M}
-
-Wraps a SiteEnergyModel and provides the AtomsCalculators interface.
-Converts site quantities (per-atom energies, edge gradients) to global
-quantities (total energy, atomic forces, virial tensor).
-
 # Example
 ```julia
-E0 = E0Model(Dict(:Si => -0.846, :O => -2.15))
-calc = WrappedSiteCalculator(E0, 5.5)  # cutoff for graph construction
+# With ETACE model
+calc = WrappedSiteCalculator(et_model, ps, st, 5.5)
+
+# With ETOneBody (upstream)
+et_onebody = ETM.one_body(Dict(:Si => -0.846), x -> x.z)
+_, onebody_st = Lux.setup(rng, et_onebody)
+calc = WrappedSiteCalculator(et_onebody, nothing, onebody_st, 3.0)
 
 E = potential_energy(sys, calc)
 F = forces(sys, calc)
 ```
 
 # Fields
-- `model` - Model implementing SiteEnergyModel interface
+- `model` - ETACE-pattern model (ETACE, ETPairModel, or ETOneBody)
+- `ps` - Model parameters (can be `nothing` for ETOneBody)
+- `st` - Model state
 - `rcut::Float64` - Cutoff radius for graph construction (Å)
+- `co_ps` - Optional committee parameters for uncertainty quantification
 """
-struct WrappedSiteCalculator{M}
+mutable struct WrappedSiteCalculator{M, PS, ST}
    model::M
+   ps::PS
+   st::ST
    rcut::Float64
+   co_ps::Any
 end
 
-function WrappedSiteCalculator(model)
-   rcut = cutoff_radius(model)
-   # Ensure minimum cutoff for graph construction (must be > 0 for neighbor list)
-   # Use 3.0 Å as minimum - smaller than typical bond lengths
-   rcut = max(rcut, 3.0)
-   return WrappedSiteCalculator(model, rcut)
+# Constructor without committee parameters
+function WrappedSiteCalculator(model, ps, st, rcut::Real)
+   return WrappedSiteCalculator(model, ps, st, Float64(rcut), nothing)
 end
 
 cutoff_radius(calc::WrappedSiteCalculator) = calc.rcut * u"Å"
 
 function _wrapped_energy(calc::WrappedSiteCalculator, sys::AbstractSystem)
    G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   Ei = site_energies(calc.model, G, nothing, nothing)
+   Ei, _ = calc.model(G, calc.ps, calc.st)
    return sum(Ei)
 end
 
 function _wrapped_forces(calc::WrappedSiteCalculator, sys::AbstractSystem)
    G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   ∂G = site_energy_grads(calc.model, G, nothing, nothing)
-   # Handle empty edge case (e.g., E0 model with small cutoff)
+   ∂G = site_grads(calc.model, G, calc.ps, calc.st)
+   # Handle empty edge case (e.g., ETOneBody with small cutoff)
    if isempty(∂G.edge_data)
       return zeros(SVector{3, Float64}, length(sys))
    end
@@ -208,7 +103,7 @@ end
 
 function _wrapped_virial(calc::WrappedSiteCalculator, sys::AbstractSystem)
    G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   ∂G = site_energy_grads(calc.model, G, nothing, nothing)
+   ∂G = site_grads(calc.model, G, calc.ps, calc.st)
    # Handle empty edge case
    if isempty(∂G.edge_data)
       return zeros(SMatrix{3,3,Float64,9})
@@ -219,14 +114,14 @@ end
 function _wrapped_energy_forces_virial(calc::WrappedSiteCalculator, sys::AbstractSystem)
    G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
 
-   # Energy from site energies
-   Ei = site_energies(calc.model, G, nothing, nothing)
+   # Energy from site energies (call model directly - ETACE interface)
+   Ei, _ = calc.model(G, calc.ps, calc.st)
    E = sum(Ei)
 
    # Forces and virial from edge gradients
-   ∂G = site_energy_grads(calc.model, G, nothing, nothing)
+   ∂G = site_grads(calc.model, G, calc.ps, calc.st)
 
-   # Handle empty edge case (e.g., E0 model with small cutoff)
+   # Handle empty edge case (e.g., ETOneBody with small cutoff)
    if isempty(∂G.edge_data)
       F = zeros(SVector{3, Float64}, length(sys))
       V = zeros(SMatrix{3,3,Float64,9})
@@ -266,22 +161,21 @@ end
 
 
 # ============================================================================
-#  ETACEPotential - Type alias for WrappedSiteCalculator{WrappedETACE}
+#  ETACEPotential - Type alias for WrappedSiteCalculator{ETACE}
 # ============================================================================
 
 """
     ETACEPotential
 
 AtomsCalculators-compatible calculator wrapping an ETACE model.
-This is a type alias for `WrappedSiteCalculator{<:WrappedETACE}`.
+This is a type alias for `WrappedSiteCalculator{<:ETACE, PS, ST}`.
 
 Access underlying components via:
-- `calc.model` - The WrappedETACE wrapper
-- `calc.model.model` - The ETACE model
-- `calc.model.ps` - Model parameters
-- `calc.model.st` - Model state
+- `calc.model` - The ETACE model
+- `calc.ps` - Model parameters
+- `calc.st` - Model state
 - `calc.rcut` - Cutoff radius in Ångström
-- `calc.model.co_ps` - Committee parameters (optional)
+- `calc.co_ps` - Committee parameters (optional)
 
 # Example
 ```julia
@@ -289,12 +183,11 @@ calc = ETACEPotential(et_model, ps, st, 5.5)
 E = potential_energy(sys, calc)
 ```
 """
-const ETACEPotential{MOD<:ETACE, PS, ST} = WrappedSiteCalculator{WrappedETACE{MOD, PS, ST}}
+const ETACEPotential{MOD<:ETACE, PS, ST} = WrappedSiteCalculator{MOD, PS, ST}
 
-# Constructor: creates WrappedSiteCalculator wrapping WrappedETACE
+# Constructor: creates WrappedSiteCalculator with ETACE model directly
 function ETACEPotential(model::ETACE, ps, st, rcut::Real)
-   wrapped = WrappedETACE(model, ps, st, rcut)
-   return WrappedSiteCalculator(wrapped, Float64(rcut))
+   return WrappedSiteCalculator(model, ps, st, Float64(rcut))
 end
 
 # ============================================================================
@@ -312,10 +205,10 @@ end
 # Force basis:  F_atom = -∑ edges ∂E/∂r_edge, computed per basis function
 # Virial basis: V = -∑ edges (∂E/∂r_edge) ⊗ r_edge, computed per basis function
 
-# Accessor helpers for ETACEPotential (which is WrappedSiteCalculator{WrappedETACE})
-_etace(calc::ETACEPotential) = calc.model.model      # Underlying ETACE model
-_ps(calc::ETACEPotential) = calc.model.ps            # Model parameters
-_st(calc::ETACEPotential) = calc.model.st            # Model state
+# Accessor helpers for ETACEPotential (which is WrappedSiteCalculator{ETACE})
+_etace(calc::ETACEPotential) = calc.model      # Underlying ETACE model (direct)
+_ps(calc::ETACEPotential) = calc.ps            # Model parameters
+_st(calc::ETACEPotential) = calc.st            # Model state
 
 """
     length_basis(calc::ETACEPotential)
@@ -485,10 +378,10 @@ function set_linear_parameters!(calc::ETACEPotential, θ::AbstractVector)
    nspecies = etace.readout.ncat
    @assert length(θ) == nbasis * nspecies
 
-   # Reshape and copy into ps (via the WrappedETACE which is mutable)
+   # Reshape and copy into ps (WrappedSiteCalculator is mutable)
    ps = _ps(calc)
    new_W = reshape(θ, 1, nbasis, nspecies)
-   calc.model.ps = merge(ps, (readout = merge(ps.readout, (W = new_W,)),))
+   calc.ps = merge(ps, (readout = merge(ps.readout, (W = new_W,)),))
    return calc
 end
 

From feff6a6451112066a0307027c6acc85a9756e26a Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 30 Dec 2025 21:25:22 +0000
Subject: [PATCH 16/35] Add convert2et_full and parameter copying utilities
 (Phase 6.4-6.5)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add convert2et_full() to convert complete ACE model to StackedCalculator
  - Combines ETOneBody (E0), ETPairModel (pair), and ETACE (many-body)
  - Returns StackedCalculator compatible with AtomsCalculators

- Add _copy_ace_params!() for many-body parameter copying
  - Copies radial basis Wnlq parameters
  - Copies readout WB parameters

- Add _copy_pair_params!() for pair potential parameter copying
  - Based on mapping from test/etmodels/test_etpair.jl
  - Copies pair radial basis and readout parameters

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/et_calculators.jl | 122 ++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)

diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
index a3d9ed994..d638ac957 100644
--- a/src/et_models/et_calculators.jl
+++ b/src/et_models/et_calculators.jl
@@ -385,3 +385,125 @@ function set_linear_parameters!(calc::ETACEPotential, θ::AbstractVector)
    return calc
 end
 
+
+# ============================================================================
+#  Full Model Conversion
+# ============================================================================
+
+using Random: AbstractRNG, default_rng
+using Lux: setup
+
+"""
+    convert2et_full(model, ps, st; rng=default_rng()) -> StackedCalculator
+
+Convert a complete ACE model (E0 + Pair + Many-body) to an ETACE-based
+StackedCalculator. This creates a calculator that combines:
+1. ETOneBody - reference energies per species
+2. ETPairModel - pair potential
+3. ETACE - many-body ACE potential
+
+The returned StackedCalculator is fully compatible with AtomsCalculators
+and can be used for energy, forces, and virial evaluation.
+
+# Arguments
+- `model`: ACE model (from ACEpotentials.Models)
+- `ps`: Model parameters (from Lux.setup)
+- `st`: Model state (from Lux.setup)
+- `rng`: Random number generator (default: `default_rng()`)
+
+# Returns
+- `StackedCalculator` combining ETOneBody, ETPairModel, and ETACE
+
+# Example
+```julia
+model = ace_model(elements=[:Si], order=3, totaldegree=8)
+ps, st = Lux.setup(rng, model)
+# ... fit model ...
+calc = convert2et_full(model, ps, st)
+E = potential_energy(sys, calc)
+```
+"""
+function convert2et_full(model, ps, st; rng::AbstractRNG=default_rng())
+   # Extract cutoff radius from pair basis
+   rcut = maximum(a.rcut for a in model.pairbasis.rin0cuts)
+
+   # 1. Convert E0/Vref to ETOneBody
+   E0s = model.Vref.E0  # Dict{Int, Float64}
+   zlist = ChemicalSpecies.(model.rbasis._i2z)
+   E0_dict = Dict(z => E0s[z.number] for z in zlist)
+   et_onebody = one_body(E0_dict, x -> x.z)
+   _, onebody_st = setup(rng, et_onebody)
+   # Use minimum cutoff for graph construction (ETOneBody needs no neighbors)
+   onebody_calc = WrappedSiteCalculator(et_onebody, nothing, onebody_st, 3.0)
+
+   # 2. Convert pair potential to ETPairModel
+   et_pair = convertpair(model)
+   et_pair_ps, et_pair_st = setup(rng, et_pair)
+   _copy_pair_params!(et_pair_ps, ps, model)
+   pair_calc = WrappedSiteCalculator(et_pair, et_pair_ps, et_pair_st, rcut)
+
+   # 3. Convert many-body to ETACE
+   et_ace = convert2et(model)
+   et_ace_ps, et_ace_st = setup(rng, et_ace)
+   _copy_ace_params!(et_ace_ps, ps, model)
+   ace_calc = WrappedSiteCalculator(et_ace, et_ace_ps, et_ace_st, rcut)
+
+   # 4. Stack all components
+   return StackedCalculator((onebody_calc, pair_calc, ace_calc))
+end
+
+
+# ============================================================================
+#  Parameter Copying Utilities
+# ============================================================================
+
+"""
+    _copy_ace_params!(et_ps, ps, model)
+
+Copy many-body (ACE) parameters from ACE model format to ETACE format.
+"""
+function _copy_ace_params!(et_ps, ps, model)
+   NZ = length(model.rbasis._i2z)
+
+   # Copy radial basis parameters (Wnlq)
+   # ACE format: Wnlq[:, :, iz, jz] for species pair (iz, jz)
+   # ETACE format: W[:, :, idx] where idx = (i-1)*NZ + j (or symmetric idx)
+   for i in 1:NZ, j in 1:NZ
+      idx = (i-1)*NZ + j
+      et_ps.rembed.basis.linl.W[:, :, idx] .= ps.rbasis.Wnlq[:, :, i, j]
+   end
+
+   # Copy readout (many-body) parameters
+   # ACE format: WB[:, s] for species s
+   # ETACE format: W[1, :, s]
+   for s in 1:NZ
+      et_ps.readout.W[1, :, s] .= ps.WB[:, s]
+   end
+end
+
+
+"""
+    _copy_pair_params!(et_ps, ps, model)
+
+Copy pair potential parameters from ACE model format to ETPairModel format.
+Based on parameter mapping from test/etmodels/test_etpair.jl.
+"""
+function _copy_pair_params!(et_ps, ps, model)
+   NZ = length(model.pairbasis._i2z)
+
+   # Copy pair radial basis parameters
+   # ACE format: pairbasis.Wnlq[:, :, i, j] for species pair (i, j)
+   # ETACE format: rembed.basis.rbasis.linl.W[:, :, idx] where idx = (i-1)*NZ + j
+   for i in 1:NZ, j in 1:NZ
+      idx = (i-1)*NZ + j
+      et_ps.rembed.basis.rbasis.linl.W[:, :, idx] .= ps.pairbasis.Wnlq[:, :, i, j]
+   end
+
+   # Copy pair readout parameters
+   # ACE format: Wpair[:, s] for species s
+   # ETACE format: readout.W[1, :, s]
+   for s in 1:NZ
+      et_ps.readout.W[1, :, s] .= ps.Wpair[:, s]
+   end
+end
+

From 16f5ce31b72cfc20c99dc978fd5edbce983deafa Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 30 Dec 2025 21:27:26 +0000
Subject: [PATCH 17/35] Update tests for unified WrappedSiteCalculator (Phase
 6.6)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace E0Model tests with ETOneBody (upstream) tests
- Remove WrappedETACE tests (no longer exists)
- Update WrappedSiteCalculator tests for new (model, ps, st, rcut) signature
- Update ETACEPotential construction test for direct model access
- Update silicon integration test to use ETOneBody and unified wrapper

Tests now use upstream models directly:
- ETOneBody instead of E0Model
- WrappedSiteCalculator(model, ps, st, rcut) instead of nested wrappers

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 test/et_models/test_et_calculators.jl | 98 +++++++++++----------------
 test/et_models/test_et_silicon.jl     | 16 ++---
 2 files changed, 47 insertions(+), 67 deletions(-)

diff --git a/test/et_models/test_et_calculators.jl b/test/et_models/test_et_calculators.jl
index e1ccfbd8a..48b36ff0d 100644
--- a/test/et_models/test_et_calculators.jl
+++ b/test/et_models/test_et_calculators.jl
@@ -77,13 +77,13 @@ end
 @info("Testing ETACEPotential construction")
 
 # Create calculator from ETACE model
-# ETACEPotential is now WrappedSiteCalculator{WrappedETACE}
+# ETACEPotential is now WrappedSiteCalculator{ETACE} (direct, no WrappedETACE)
 et_calc = ETM.ETACEPotential(et_model, et_ps, et_st, rcut)
 
-# Access underlying ETACE via calc.model.model (calc.model is WrappedETACE)
-@test et_calc.model.model === et_model
+# Access underlying ETACE directly via calc.model
+@test et_calc.model === et_model
 @test et_calc.rcut == rcut
-@test et_calc.model.co_ps === nothing
+@test et_calc.co_ps === nothing
 println("ETACEPotential construction: OK")
 
 ##
@@ -310,28 +310,27 @@ end
 @info("All Phase 1 tests passed!")
 
 # ============================================================================
-#  Phase 2 Tests: SiteEnergyModel Interface, WrappedSiteCalculator, StackedCalculator
+#  Phase 2 Tests: WrappedSiteCalculator and StackedCalculator
 # ============================================================================
 
-@info("Testing Phase 2: SiteEnergyModel interface and calculators")
+@info("Testing Phase 2: WrappedSiteCalculator and StackedCalculator")
 
 ##
 
-@info("Testing E0Model")
+@info("Testing ETOneBody (upstream one-body model)")
 
-# Create E0 model with reference energies
+using Lux
+
+# Create ETOneBody model with reference energies (using upstream interface)
 E0_Si = -0.846
 E0_O = -2.15
-E0 = ETM.E0Model(Dict(:Si => E0_Si, :O => E0_O))
-
-# Test cutoff radius
-@test ETM.cutoff_radius(E0) == 0.0
-println("E0Model cutoff_radius: OK")
+et_onebody = ETM.one_body(Dict(:Si => E0_Si, :O => E0_O), x -> x.z)
+_, onebody_st = Lux.setup(rng, et_onebody)
 
-# Test site energies
+# Test site energies via direct model call
 sys = rand_struct()
 G = ET.Atoms.interaction_graph(sys, rcut * u"Å")
-Ei_E0 = ETM.site_energies(E0, G, nothing, nothing)
+Ei_E0, _ = et_onebody(G, nothing, onebody_st)
 
 # Count Si and O atoms
 n_Si = count(node -> node.z == AtomsBase.ChemicalSpecies(:Si), G.node_data)
@@ -340,51 +339,23 @@ expected_E0 = n_Si * E0_Si + n_O * E0_O
 
 @test length(Ei_E0) == length(sys)
 @test sum(Ei_E0) ≈ expected_E0
-println("E0Model site_energies: OK")
+println("ETOneBody site energies: OK")
 
-# Test site energy gradients (should be zero)
-∂G_E0 = ETM.site_energy_grads(E0, G, nothing, nothing)
+# Test site gradients (should be zero for constant energies)
+∂G_E0 = ETM.site_grads(et_onebody, G, nothing, onebody_st)
 @test all(norm(e.𝐫) == 0 for e in ∂G_E0.edge_data)
-println("E0Model site_energy_grads (zero): OK")
-
-##
-
-@info("Testing WrappedETACE")
-
-# Create wrapped ETACE model
-wrapped_ace = ETM.WrappedETACE(et_model, et_ps, et_st, rcut)
-
-# Test cutoff radius
-@test ETM.cutoff_radius(wrapped_ace) == rcut
-println("WrappedETACE cutoff_radius: OK")
-
-# Test site energies match direct evaluation
-Ei_wrapped = ETM.site_energies(wrapped_ace, G, nothing, nothing)
-Ei_direct, _ = et_model(G, et_ps, et_st)
-@test Ei_wrapped ≈ Ei_direct
-println("WrappedETACE site_energies: OK")
-
-# Test site energy gradients match direct evaluation
-∂G_wrapped = ETM.site_energy_grads(wrapped_ace, G, nothing, nothing)
-∂G_direct = ETM.site_grads(et_model, G, et_ps, et_st)
-@test all(∂G_wrapped.edge_data[i].𝐫 ≈ ∂G_direct.edge_data[i].𝐫 for i in 1:length(G.edge_data))
-println("WrappedETACE site_energy_grads: OK")
+println("ETOneBody site_grads (zero): OK")
 
 ##
 
-@info("Testing WrappedSiteCalculator")
+@info("Testing WrappedSiteCalculator with ETOneBody")
 
-# Wrap E0 model in a calculator
-E0_calc = ETM.WrappedSiteCalculator(E0)
+# Wrap ETOneBody in a calculator (using new unified interface)
+E0_calc = ETM.WrappedSiteCalculator(et_onebody, nothing, onebody_st, 3.0)
 @test ustrip(u"Å", ETM.cutoff_radius(E0_calc)) == 3.0  # minimum cutoff
-println("WrappedSiteCalculator(E0) cutoff_radius: OK")
+println("WrappedSiteCalculator(ETOneBody) cutoff_radius: OK")
 
-# Wrap ETACE model in a calculator
-ace_site_calc = ETM.WrappedSiteCalculator(wrapped_ace)
-@test ustrip(u"Å", ETM.cutoff_radius(ace_site_calc)) == rcut
-println("WrappedSiteCalculator(ETACE) cutoff_radius: OK")
-
-# Test E0 calculator energy
+# Test ETOneBody calculator energy
 sys = rand_struct()
 E_E0_calc = AtomsCalculators.potential_energy(sys, E0_calc)
 G = ET.Atoms.interaction_graph(sys, 3.0 * u"Å")
@@ -392,12 +363,21 @@ n_Si = count(node -> node.z == AtomsBase.ChemicalSpecies(:Si), G.node_data)
 n_O = count(node -> node.z == AtomsBase.ChemicalSpecies(:O), G.node_data)
 expected_E = (n_Si * E0_Si + n_O * E0_O) * u"eV"
 @test ustrip(E_E0_calc) ≈ ustrip(expected_E)
-println("WrappedSiteCalculator(E0) energy: OK")
+println("WrappedSiteCalculator(ETOneBody) energy: OK")
 
-# Test E0 calculator forces (should be zero)
+# Test ETOneBody calculator forces (should be zero)
 F_E0_calc = AtomsCalculators.forces(sys, E0_calc)
 @test all(norm(ustrip.(f)) < 1e-14 for f in F_E0_calc)
-println("WrappedSiteCalculator(E0) forces (zero): OK")
+println("WrappedSiteCalculator(ETOneBody) forces (zero): OK")
+
+##
+
+@info("Testing WrappedSiteCalculator with ETACE")
+
+# Wrap ETACE model in a calculator (unified interface)
+ace_site_calc = ETM.WrappedSiteCalculator(et_model, et_ps, et_st, rcut)
+@test ustrip(u"Å", ETM.cutoff_radius(ace_site_calc)) == rcut
+println("WrappedSiteCalculator(ETACE) cutoff_radius: OK")
 
 # Test ETACE calculator matches ETACEPotential
 sys = rand_struct()
@@ -487,9 +467,9 @@ println()
 
 ##
 
-@info("Testing StackedCalculator with E0 only")
+@info("Testing StackedCalculator with ETOneBody only")
 
-# Create stacked calculator with just E0
+# Create stacked calculator with just ETOneBody (E0_calc is WrappedSiteCalculator{ETOneBody})
 E0_only_stacked = ETM.StackedCalculator((E0_calc,))
 
 sys = rand_struct()
@@ -499,11 +479,11 @@ F = AtomsCalculators.forces(sys, E0_only_stacked)
 # Energy should match E0_calc
 E_direct = AtomsCalculators.potential_energy(sys, E0_calc)
 @test ustrip(E) ≈ ustrip(E_direct)
-println("StackedCalculator(E0 only) energy: OK")
+println("StackedCalculator(ETOneBody only) energy: OK")
 
 # Forces should be zero
 @test all(norm(ustrip.(f)) < 1e-14 for f in F)
-println("StackedCalculator(E0 only) forces (zero): OK")
+println("StackedCalculator(ETOneBody only) forces (zero): OK")
 
 ##
 
diff --git a/test/et_models/test_et_silicon.jl b/test/et_models/test_et_silicon.jl
index 2d0949fbe..204a52f99 100644
--- a/test/et_models/test_et_silicon.jl
+++ b/test/et_models/test_et_silicon.jl
@@ -189,18 +189,18 @@ max_V_diff = maximum(abs.(V_from_basis - V_direct))
 
 println("Training basis assembly: OK")
 
-## ----- Test StackedCalculator with E0 -----
+## ----- Test StackedCalculator with ETOneBody -----
 
-@info("Testing StackedCalculator with E0Model")
+@info("Testing StackedCalculator with ETOneBody")
 
-# Create E0 model with arbitrary E0 value for testing
+# Create ETOneBody model with arbitrary E0 value for testing (upstream interface)
 E0s = Dict(:Si => -158.54496821)  # Si symbol => E0
-E0_model = ETM.E0Model(E0s)
-E0_calc = ETM.WrappedSiteCalculator(E0_model)
+et_onebody = ETM.one_body(E0s, x -> x.z)
+_, onebody_st = Lux.setup(rng, et_onebody)
+E0_calc = ETM.WrappedSiteCalculator(et_onebody, nothing, onebody_st, 3.0)
 
-# Create wrapped ETACE
-wrapped_etace = ETM.WrappedETACE(et_model, et_ps, et_st, rcut)
-ace_calc = ETM.WrappedSiteCalculator(wrapped_etace)
+# Create wrapped ETACE (unified interface)
+ace_calc = ETM.WrappedSiteCalculator(et_model, et_ps, et_st, rcut)
 
 # Stack them
 stacked = ETM.StackedCalculator((E0_calc, ace_calc))

From bee26365cd1d6d740dd8342546431af19feb97ea Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 30 Dec 2025 21:47:45 +0000
Subject: [PATCH 18/35] Fix ETOneBody.site_grads to return consistent interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Return NamedTuple with empty edge_data matching ETACE/ETPairModel interface
- Remove unnecessary Zygote import (hand-coded since gradient is trivially zero)
- Update test to check isempty(∂G.edge_data) instead of zero norms

The calling code in et_calculators.jl checks isempty(∂G.edge_data) and
returns zero forces/virial when true, which is the correct behavior for
ETOneBody (energy depends only on atom types, not positions).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/onebody.jl              | 9 +++++++--
 test/et_models/test_et_calculators.jl | 5 +++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/et_models/onebody.jl b/src/et_models/onebody.jl
index 25f4732af..32dc8868c 100644
--- a/src/et_models/onebody.jl
+++ b/src/et_models/onebody.jl
@@ -58,8 +58,13 @@ ___apply_onebody(selector, X::AbstractVector, E0s) =
          map(x -> E0s[selector(x)], X)
 
 
-site_grads(l::ETOneBody, X::ET.ETGraph, ps, st) = 
-         fill(VState(), (ET.maxneigs(X), ET.nnodes(X), ))
+# ETOneBody energy only depends on atom types (categorical), not positions.
+# Gradient w.r.t. positions is always zero.
+# Return NamedTuple matching Zygote gradient structure with empty edge_data.
+# The calling code checks isempty(∂G.edge_data) and returns zero forces/virial.
+function site_grads(l::ETOneBody, X::ET.ETGraph, ps, st)
+   return (; edge_data = similar(X.edge_data, 0))
+end
 
 site_basis(l::ETOneBody, X::ET.ETGraph, ps, st) = 
          fill(zero(eltype(st.E0s)), (ET.nnodes(X), 0))
diff --git a/test/et_models/test_et_calculators.jl b/test/et_models/test_et_calculators.jl
index 48b36ff0d..3ad26c9e8 100644
--- a/test/et_models/test_et_calculators.jl
+++ b/test/et_models/test_et_calculators.jl
@@ -341,9 +341,10 @@ expected_E0 = n_Si * E0_Si + n_O * E0_O
 @test sum(Ei_E0) ≈ expected_E0
 println("ETOneBody site energies: OK")
 
-# Test site gradients (should be zero for constant energies)
+# Test site gradients (should be empty for constant energies)
+# Returns NamedTuple with empty edge_data, matching ETACE/ETPairModel interface
 ∂G_E0 = ETM.site_grads(et_onebody, G, nothing, onebody_st)
-@test all(norm(e.𝐫) == 0 for e in ∂G_E0.edge_data)
+@test isempty(∂G_E0.edge_data)
 println("ETOneBody site_grads (zero): OK")
 
 ##

From 146cae96fb614f0cc7f399ec82c3bedb0fa591df Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Tue, 30 Dec 2025 22:55:53 +0000
Subject: [PATCH 19/35] Fix test suite issues: project activation and ETOneBody
 interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Comment out Pkg.activate in test_committee.jl that was switching
  away from the test project environment
- Update test_etonebody.jl gradient test to check for NamedTuple
  return type with .edge_data field (matching the updated ETOneBody
  interface that returns consistent structure with ETACE/ETPairModel)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 test/etmodels/test_etonebody.jl | 8 +++++---
 test/models/test_committee.jl   | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/test/etmodels/test_etonebody.jl b/test/etmodels/test_etonebody.jl
index b74d13c9c..c891b5c69 100644
--- a/test/etmodels/test_etonebody.jl
+++ b/test/etmodels/test_etonebody.jl
@@ -100,14 +100,16 @@ println()
 
 ##
 
-@info("Confirm correctness of gradient") 
+@info("Confirm correctness of gradient")
 
 sys = rand_struct()
 G = ET.Atoms.interaction_graph(sys, 5.0 * u"Å")
 ∂G1 = ETM.site_grads(et_V0, G, ps, st)
 
-println_slim(@test size(∂G1) == (G.maxneigs, length(sys))) 
-println_slim(@test all( norm.(∂G1) .== 0 ) )
+# ETOneBody returns NamedTuple with empty edge_data (gradient is zero for constant energies)
+println_slim(@test ∂G1 isa NamedTuple)
+println_slim(@test haskey(∂G1, :edge_data))
+println_slim(@test isempty(∂G1.edge_data))
 
 ##
 
diff --git a/test/models/test_committee.jl b/test/models/test_committee.jl
index e8291e884..c45d1ea11 100644
--- a/test/models/test_committee.jl
+++ b/test/models/test_committee.jl
@@ -1,5 +1,5 @@
 
-using Pkg; Pkg.activate(joinpath(@__DIR__(), "..", ".."))
+# using Pkg; Pkg.activate(joinpath(@__DIR__(), "..", ".."))
 ##
 
 using Test, ACEbase, LinearAlgebra

From 9af1174cea24a0746edad2a3c18562a4260579a7 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Wed, 31 Dec 2025 00:11:38 +0000
Subject: [PATCH 20/35] Fix ET ACE and ET Pair test failures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ET ACE (site_basis_jacobian):
- Remove ps.basis and st.basis from _jacobian_X call
- The upstream ET._jacobian_X for SparseACEbasis only takes 5 args:
  (basis, Rnl, Ylm, dRnl, dYlm)

ET Pair (site_grads):
- Implement hand-coded gradient using evaluate_ed instead of Zygote
- Avoids Zygote InplaceableThunk issue with upstream EdgeEmbed rrule
- Matches the pattern used in site_basis_jacobian

Also inline _apply_etpairmodel to avoid calling site_basis (cleaner).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/et_ace.jl  |  5 +++--
 src/et_models/et_pair.jl | 40 ++++++++++++++++++++++++++++++++++------
 2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/src/et_models/et_ace.jl b/src/et_models/et_ace.jl
index 9c57f1d2c..2e9f7d028 100644
--- a/src/et_models/et_ace.jl
+++ b/src/et_models/et_ace.jl
@@ -69,9 +69,10 @@ function site_basis(l::ETACE, X::ET.ETGraph, ps, st)
 end
 
 
-function site_basis_jacobian(l::ETACE, X::ET.ETGraph, ps, st)    
+function site_basis_jacobian(l::ETACE, X::ET.ETGraph, ps, st)
    (R, ∂R), _ = ET.evaluate_ed(l.rembed, X, ps.rembed, st.rembed)
    (Y, ∂Y), _ = ET.evaluate_ed(l.yembed, X, ps.yembed, st.yembed)
-   (𝔹,), (∂𝔹,) = ET._jacobian_X(l.basis, R, Y, ∂R, ∂Y, ps.basis, st.basis)
+   # _jacobian_X for SparseACEbasis takes (basis, Rnl, Ylm, dRnl, dYlm) - no ps/st
+   (𝔹,), (∂𝔹,) = ET._jacobian_X(l.basis, R, Y, ∂R, ∂Y)
    return 𝔹, ∂𝔹
 end
diff --git a/src/et_models/et_pair.jl b/src/et_models/et_pair.jl
index 1a3ce5f11..4fcb2f3ba 100644
--- a/src/et_models/et_pair.jl
+++ b/src/et_models/et_pair.jl
@@ -22,11 +22,14 @@ end
 (l::ETPairModel)(X::ET.ETGraph, ps, st) = _apply_etpairmodel(l, X, ps, st), st 
       
       
-function _apply_etpairmodel(l::ETPairModel, X::ET.ETGraph, ps, st)      
-   # evaluate the basis 
-   𝔹 = site_basis(l, X, ps, st)
+function _apply_etpairmodel(l::ETPairModel, X::ET.ETGraph, ps, st)
+   # embed edges (inline to avoid Zygote thunk issues with site_basis)
+   Rnl, _ = l.rembed(X, ps.rembed, st.rembed)
+
+   # sum over neighbours for each node
+   𝔹 = dropdims(sum(Rnl, dims=1), dims=1)
 
-   # readout layer 
+   # readout layer
    φ, _ = l.readout((𝔹, X.node_data), ps.readout, st.readout)
 
    return φ
@@ -36,8 +39,33 @@ end
 
 
 function site_grads(l::ETPairModel, X::ET.ETGraph, ps, st)
-   ∂X = Zygote.gradient( X -> sum(_apply_etpairmodel(l, X, ps, st)), X)[1]
-   return ∂X
+   # Use evaluate_ed to get basis and derivatives, avoiding Zygote thunk issues
+   (R, ∂R), _ = ET.evaluate_ed(l.rembed, X, ps.rembed, st.rembed)
+
+   # R has shape (maxneigs, nnodes, nbasis) after embedding
+   # 𝔹 = sum over neighbours: shape (nnodes, nbasis)
+   𝔹 = dropdims(sum(R, dims=1), dims=1)
+
+   # Get readout weights
+   iZ = l.readout.selector.(X.node_data)
+   WW = ps.readout.W
+
+   # ∂E/∂R = W[1, :, iZ[i]] for each node, broadcast over neighbours
+   # ∂R has shape (maxneigs, nnodes, nbasis)
+   nnodes = length(X.node_data)
+   ∂E_∂𝔹 = reduce(hcat, WW[1, :, iZ[i]] for i in 1:nnodes)'  # (nnodes, nbasis)
+
+   # ∂E/∂R[j, i, k] = ∂E/∂𝔹[i, k] (same for all neighbours j)
+   ∂E_∂R = reshape(∂E_∂𝔹, 1, size(∂E_∂𝔹)...)  # (1, nnodes, nbasis)
+
+   # Chain rule: ∂E/∂X = sum over k of (∂E/∂R * ∂R/∂X)
+   # ∂R has shape (maxneigs, nnodes, nbasis), contains VState gradients
+   ∂E_edges = dropdims(sum(∂E_∂R .* ∂R, dims=3), dims=3)  # (maxneigs, nnodes)
+
+   # Reshape to match edge_data format
+   ∂E_edges_vec = ET.rev_reshape_embedding(∂E_edges, X)
+
+   return (; edge_data = ∂E_edges_vec)
 end
 
 

From 7e28b054dc6cb776e5013ca9d3aeb47fd976123b Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Wed, 31 Dec 2025 08:59:07 +0000
Subject: [PATCH 21/35] Fix parameter paths in convert2et_full and add full
 model benchmark
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix z.number → z.atomic_number in E0_dict creation
- Fix _copy_ace_params! path: rembed.basis.linl.W → rembed.post.W
- Fix _copy_pair_params! path: rembed.basis.rbasis.linl.W → rembed.rbasis.post.W
- Add benchmark comparing ACE vs ETACE StackedCalculator for full model

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 benchmark/benchmark_full_model.jl | 198 ++++++++++++++++++++++++++++++
 src/et_models/et_calculators.jl   |  12 +-
 2 files changed, 205 insertions(+), 5 deletions(-)
 create mode 100644 benchmark/benchmark_full_model.jl

diff --git a/benchmark/benchmark_full_model.jl b/benchmark/benchmark_full_model.jl
new file mode 100644
index 000000000..1a4c859f0
--- /dev/null
+++ b/benchmark/benchmark_full_model.jl
@@ -0,0 +1,198 @@
+# Benchmark: Full model (1+2+many body) with StackedCalculator
+# Compares ACE CPU vs ETACE CPU vs ETACE GPU for energy and forces
+
+using ACEpotentials
+M = ACEpotentials.Models
+ETM = ACEpotentials.ETModels
+
+import EquivariantTensors as ET
+import AtomsCalculators
+using StaticArrays, Lux, Random, LuxCore, LinearAlgebra
+using AtomsBase, AtomsBuilder, Unitful
+using BenchmarkTools
+using Printf
+
+# GPU detection
+dev = identity
+has_cuda = false
+
+try
+   using CUDA
+   if CUDA.functional()
+      @info "Using CUDA"
+      CUDA.versioninfo()
+      global has_cuda = true
+      global dev = cu
+   else
+      @info "CUDA is not functional"
+   end
+catch e
+   @info "Couldn't load CUDA: $e"
+end
+
+if !has_cuda
+   @info "No GPU available. Using CPU only."
+end
+
+rng = Random.MersenneTwister(1234)
+
+# Build models with E0s and pair potential enabled
+elements = (:Si, :O)
+level = M.TotalDegree()
+max_level = 8
+order = 2
+maxl = 4
+
+rin0cuts = M._default_rin0cuts(elements)
+rin0cuts = (x -> (rin = x.rin, r0 = x.r0, rcut = 5.5)).(rin0cuts)
+
+# E0s for one-body
+E0s = Dict(:Si => -158.54496821, :O => -2042.0330099956639)
+
+model = M.ace_model(; elements = elements, order = order,
+            Ytype = :solid, level = level, max_level = max_level,
+            maxl = maxl, pair_maxn = max_level,
+            rin0cuts = rin0cuts,
+            init_WB = :glorot_normal, init_Wpair = :glorot_normal,
+            pair_learnable = true,  # Keep learnable for ET conversion
+            E0s = E0s)
+
+ps, st = Lux.setup(rng, model)
+
+# Create old ACE calculator (full model with E0s and pair)
+ace_calc = M.ACEPotential(model, ps, st)
+
+# Convert to full ETACE with StackedCalculator
+et_calc = ETM.convert2et_full(model, ps, st)
+
+rcut = maximum(a.rcut for a in model.pairbasis.rin0cuts)
+
+# Function to create system of given size
+function make_system(n_repeat)
+    sys = AtomsBuilder.bulk(:Si, cubic=true) * n_repeat
+    rattle!(sys, 0.1u"Å")
+    AtomsBuilder.randz!(sys, [:Si => 0.5, :O => 0.5])
+    return sys
+end
+
+# Benchmark configurations
+configs = [
+    (2, 2, 2),   # 64 atoms
+    (3, 3, 2),   # 144 atoms
+    (4, 4, 2),   # 256 atoms
+    (4, 4, 4),   # 512 atoms
+    (5, 5, 4),   # 800 atoms
+]
+
+println()
+println("=" ^ 90)
+println("BENCHMARK: Full Model (1+2+many body) - ACE vs ETACE StackedCalculator")
+println("=" ^ 90)
+println()
+
+# --- ENERGY BENCHMARK ---
+println("### ENERGY ###")
+println()
+
+if has_cuda
+    println("| Atoms | Edges   | ACE CPU (ms) | ETACE CPU (ms) | ETACE GPU (ms) | CPU Speedup | GPU Speedup |")
+    println("|-------|---------|--------------|----------------|----------------|-------------|-------------|")
+else
+    println("| Atoms | Edges   | ACE CPU (ms) | ETACE CPU (ms) | CPU Speedup |")
+    println("|-------|---------|--------------|----------------|-------------|")
+end
+
+for cfg in configs
+    sys = make_system(cfg)
+    natoms = length(sys)
+
+    # Count edges
+    G = ET.Atoms.interaction_graph(sys, rcut * u"Å")
+    nedges = length(G.edge_data)
+
+    # Warmup ACE
+    _ = AtomsCalculators.potential_energy(sys, ace_calc)
+
+    # Warmup ETACE CPU
+    _ = AtomsCalculators.potential_energy(sys, et_calc)
+
+    # Benchmark ACE CPU
+    t_ace = @belapsed AtomsCalculators.potential_energy($sys, $ace_calc) samples=5 evals=3
+    t_ace_ms = t_ace * 1000
+
+    # Benchmark ETACE CPU
+    t_etace_cpu = @belapsed AtomsCalculators.potential_energy($sys, $et_calc) samples=5 evals=3
+    t_etace_cpu_ms = t_etace_cpu * 1000
+
+    cpu_speedup = t_ace_ms / t_etace_cpu_ms
+
+    if has_cuda
+        # For GPU we need to handle the StackedCalculator with GPU-capable models
+        # TODO: GPU version of StackedCalculator
+        t_etace_gpu_ms = NaN
+        gpu_speedup = NaN
+
+        @printf("| %5d | %7d | %12.2f | %14.2f | %14s | %10.1fx | %10s |\n",
+                natoms, nedges, t_ace_ms, t_etace_cpu_ms, "N/A", cpu_speedup, "N/A")
+    else
+        @printf("| %5d | %7d | %12.2f | %14.2f | %10.1fx |\n",
+                natoms, nedges, t_ace_ms, t_etace_cpu_ms, cpu_speedup)
+    end
+end
+
+println()
+
+# --- FORCES BENCHMARK ---
+println("### FORCES ###")
+println()
+
+if has_cuda
+    println("| Atoms | Edges   | ACE CPU (ms) | ETACE CPU (ms) | ETACE GPU (ms) | CPU Speedup | GPU Speedup |")
+    println("|-------|---------|--------------|----------------|----------------|-------------|-------------|")
+else
+    println("| Atoms | Edges   | ACE CPU (ms) | ETACE CPU (ms) | CPU Speedup |")
+    println("|-------|---------|--------------|----------------|-------------|")
+end
+
+for cfg in configs
+    sys = make_system(cfg)
+    natoms = length(sys)
+
+    # Count edges
+    G = ET.Atoms.interaction_graph(sys, rcut * u"Å")
+    nedges = length(G.edge_data)
+
+    # Warmup ACE
+    _ = AtomsCalculators.forces(sys, ace_calc)
+
+    # Warmup ETACE CPU
+    _ = AtomsCalculators.forces(sys, et_calc)
+
+    # Benchmark ACE CPU
+    t_ace = @belapsed AtomsCalculators.forces($sys, $ace_calc) samples=5 evals=3
+    t_ace_ms = t_ace * 1000
+
+    # Benchmark ETACE CPU
+    t_etace_cpu = @belapsed AtomsCalculators.forces($sys, $et_calc) samples=5 evals=3
+    t_etace_cpu_ms = t_etace_cpu * 1000
+
+    cpu_speedup = t_ace_ms / t_etace_cpu_ms
+
+    if has_cuda
+        t_etace_gpu_ms = NaN
+        gpu_speedup = NaN
+
+        @printf("| %5d | %7d | %12.2f | %14.2f | %14s | %10.1fx | %10s |\n",
+                natoms, nedges, t_ace_ms, t_etace_cpu_ms, "N/A", cpu_speedup, "N/A")
+    else
+        @printf("| %5d | %7d | %12.2f | %14.2f | %10.1fx |\n",
+                natoms, nedges, t_ace_ms, t_etace_cpu_ms, cpu_speedup)
+    end
+end
+
+println()
+println("Notes:")
+println("- ACE CPU: Original ACEpotentials model (full: E0 + pair + many-body)")
+println("- ETACE CPU: StackedCalculator with ETOneBody + ETPairModel + ETACE")
+println("- CPU Speedup = ACE CPU / ETACE CPU")
+println("- Graph construction time included in ETACE timings")
diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
index d638ac957..4550407e0 100644
--- a/src/et_models/et_calculators.jl
+++ b/src/et_models/et_calculators.jl
@@ -430,7 +430,7 @@ function convert2et_full(model, ps, st; rng::AbstractRNG=default_rng())
    # 1. Convert E0/Vref to ETOneBody
    E0s = model.Vref.E0  # Dict{Int, Float64}
    zlist = ChemicalSpecies.(model.rbasis._i2z)
-   E0_dict = Dict(z => E0s[z.number] for z in zlist)
+   E0_dict = Dict(z => E0s[z.atomic_number] for z in zlist)
    et_onebody = one_body(E0_dict, x -> x.z)
    _, onebody_st = setup(rng, et_onebody)
    # Use minimum cutoff for graph construction (ETOneBody needs no neighbors)
@@ -467,10 +467,11 @@ function _copy_ace_params!(et_ps, ps, model)
 
    # Copy radial basis parameters (Wnlq)
    # ACE format: Wnlq[:, :, iz, jz] for species pair (iz, jz)
-   # ETACE format: W[:, :, idx] where idx = (i-1)*NZ + j (or symmetric idx)
+   # ETACE format: rembed.post.W[:, :, idx] where idx = (i-1)*NZ + j
+   # (post is the SelectLinL layer in EmbedDP)
    for i in 1:NZ, j in 1:NZ
       idx = (i-1)*NZ + j
-      et_ps.rembed.basis.linl.W[:, :, idx] .= ps.rbasis.Wnlq[:, :, i, j]
+      et_ps.rembed.post.W[:, :, idx] .= ps.rbasis.Wnlq[:, :, i, j]
    end
 
    # Copy readout (many-body) parameters
@@ -493,10 +494,11 @@ function _copy_pair_params!(et_ps, ps, model)
 
    # Copy pair radial basis parameters
    # ACE format: pairbasis.Wnlq[:, :, i, j] for species pair (i, j)
-   # ETACE format: rembed.basis.rbasis.linl.W[:, :, idx] where idx = (i-1)*NZ + j
+   # ETACE format: rembed.rbasis.post.W[:, :, idx] where idx = (i-1)*NZ + j
+   # (post is the SelectLinL layer in EmbedDP)
    for i in 1:NZ, j in 1:NZ
       idx = (i-1)*NZ + j
-      et_ps.rembed.basis.rbasis.linl.W[:, :, idx] .= ps.pairbasis.Wnlq[:, :, i, j]
+      et_ps.rembed.rbasis.post.W[:, :, idx] .= ps.pairbasis.Wnlq[:, :, i, j]
    end
 
    # Copy pair readout parameters

From e2bd3f3526e176de8f94e86921c8f5ca7c98bbbe Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Wed, 31 Dec 2025 09:14:57 +0000
Subject: [PATCH 22/35] Improve memory efficiency in ETPairModel site_grads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address moderator concern about commit 50ed668:
- Avoid forming O(nnodes * nbasis) dense intermediate matrix
- Compute edge gradients directly using loops
- Same numerical results, better memory characteristics

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/et_models/et_pair.jl | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/et_models/et_pair.jl b/src/et_models/et_pair.jl
index 4fcb2f3ba..176b1be34 100644
--- a/src/et_models/et_pair.jl
+++ b/src/et_models/et_pair.jl
@@ -40,27 +40,28 @@ end
 
 function site_grads(l::ETPairModel, X::ET.ETGraph, ps, st)
    # Use evaluate_ed to get basis and derivatives, avoiding Zygote thunk issues
+   # (Zygote has InplaceableThunk issues with upstream EdgeEmbed rrule)
    (R, ∂R), _ = ET.evaluate_ed(l.rembed, X, ps.rembed, st.rembed)
 
-   # R has shape (maxneigs, nnodes, nbasis) after embedding
-   # 𝔹 = sum over neighbours: shape (nnodes, nbasis)
-   𝔹 = dropdims(sum(R, dims=1), dims=1)
-
-   # Get readout weights
+   # Get readout weights and species indices
    iZ = l.readout.selector.(X.node_data)
    WW = ps.readout.W
 
-   # ∂E/∂R = W[1, :, iZ[i]] for each node, broadcast over neighbours
-   # ∂R has shape (maxneigs, nnodes, nbasis)
-   nnodes = length(X.node_data)
-   ∂E_∂𝔹 = reduce(hcat, WW[1, :, iZ[i]] for i in 1:nnodes)'  # (nnodes, nbasis)
-
-   # ∂E/∂R[j, i, k] = ∂E/∂𝔹[i, k] (same for all neighbours j)
-   ∂E_∂R = reshape(∂E_∂𝔹, 1, size(∂E_∂𝔹)...)  # (1, nnodes, nbasis)
-
-   # Chain rule: ∂E/∂X = sum over k of (∂E/∂R * ∂R/∂X)
    # ∂R has shape (maxneigs, nnodes, nbasis), contains VState gradients
-   ∂E_edges = dropdims(sum(∂E_∂R .* ∂R, dims=3), dims=3)  # (maxneigs, nnodes)
+   # Compute: ∂E_edges[j, i] = Σₖ WW[1, k, iZ[i]] * ∂R[j, i, k]
+   # This is the chain rule through the linear readout
+   maxneigs, nnodes, nbasis = size(∂R)
+
+   # Compute edge gradients directly without forming intermediate matrix
+   # (avoids O(nnodes * nbasis) memory allocation)
+   ∂E_edges = zeros(eltype(∂R), maxneigs, nnodes)
+   @inbounds for i in 1:nnodes
+      iz = iZ[i]
+      @inbounds for k in 1:nbasis
+         w = WW[1, k, iz]
+         @views ∂E_edges[:, i] .+= w .* ∂R[:, i, k]
+      end
+   end
 
    # Reshape to match edge_data format
    ∂E_edges_vec = ET.rev_reshape_embedding(∂E_edges, X)

From 41378df6f89b6f5b9bf6338b75dc82c3ebcff7a5 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Wed, 31 Dec 2025 10:06:44 +0000
Subject: [PATCH 23/35] Update EquivariantTensors to 0.4.2 and improve ET pair
 memory efficiency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Bump EquivariantTensors compat to 0.4.2 in main and test Project.toml
- Simplify site_basis_jacobian to use 5-arg _jacobian_X API (requires ET >= 0.4.2)
- Improve ETPairModel site_grads memory efficiency:
  - Avoid O(nnodes * nbasis) intermediate matrix allocation
  - Compute edge gradients directly using loops

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 Project.toml            | 2 +-
 src/et_models/et_ace.jl | 3 ++-
 test/Project.toml       | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/Project.toml b/Project.toml
index bef7a094d..ae0540ac0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -65,7 +65,7 @@ ConcreteStructs = "0.2.3"
 DecoratedParticles = "0.1.3"
 DynamicPolynomials = "0.6"
 EmpiricalPotentials = "0.2"
-EquivariantTensors = "0.4"
+EquivariantTensors = "0.4.2"
 ExtXYZ = "0.2.0"
 Folds = "0.2"
 ForwardDiff = "0.10, 1"
diff --git a/src/et_models/et_ace.jl b/src/et_models/et_ace.jl
index 2e9f7d028..d9484b45e 100644
--- a/src/et_models/et_ace.jl
+++ b/src/et_models/et_ace.jl
@@ -72,7 +72,8 @@ end
 function site_basis_jacobian(l::ETACE, X::ET.ETGraph, ps, st)
    (R, ∂R), _ = ET.evaluate_ed(l.rembed, X, ps.rembed, st.rembed)
    (Y, ∂Y), _ = ET.evaluate_ed(l.yembed, X, ps.yembed, st.yembed)
-   # _jacobian_X for SparseACEbasis takes (basis, Rnl, Ylm, dRnl, dYlm) - no ps/st
+   # _jacobian_X for SparseACEbasis takes (basis, Rnl, Ylm, dRnl, dYlm)
+   # Requires EquivariantTensors >= 0.4.2
    (𝔹,), (∂𝔹,) = ET._jacobian_X(l.basis, R, Y, ∂R, ∂Y)
    return 𝔹, ∂𝔹
 end
diff --git a/test/Project.toml b/test/Project.toml
index 46ca25b8a..06df4df4b 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -37,5 +37,5 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 ACEpotentials = {path = ".."}
 
 [compat]
-EquivariantTensors = "0.4"
+EquivariantTensors = "0.4.2"
 StaticArrays = "1"

From 42d6b08bb47d4ef0a1dace7989b08ce3ad5dcc18 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Wed, 31 Dec 2025 11:05:45 +0000
Subject: [PATCH 24/35] Revert et_pair.jl to Zygote-based site_grads and fix
 et_ace.jl API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that test project uses ET 0.4.2 (which fixed InplaceableThunk bug
in EdgeEmbed rrule), we can use the simpler Zygote-based gradient
computation for ETPairModel.

Also fix _jacobian_X call in ETACE to use 7-arg API (requires ps, st).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 src/et_models/et_ace.jl  |  4 ++--
 src/et_models/et_pair.jl | 49 ++++++++--------------------------------
 2 files changed, 12 insertions(+), 41 deletions(-)

diff --git a/src/et_models/et_ace.jl b/src/et_models/et_ace.jl
index d9484b45e..df2446453 100644
--- a/src/et_models/et_ace.jl
+++ b/src/et_models/et_ace.jl
@@ -72,8 +72,8 @@ end
 function site_basis_jacobian(l::ETACE, X::ET.ETGraph, ps, st)
    (R, ∂R), _ = ET.evaluate_ed(l.rembed, X, ps.rembed, st.rembed)
    (Y, ∂Y), _ = ET.evaluate_ed(l.yembed, X, ps.yembed, st.yembed)
-   # _jacobian_X for SparseACEbasis takes (basis, Rnl, Ylm, dRnl, dYlm)
+   # _jacobian_X for SparseACEbasis takes (basis, Rnl, Ylm, dRnl, dYlm, ps, st)
    # Requires EquivariantTensors >= 0.4.2
-   (𝔹,), (∂𝔹,) = ET._jacobian_X(l.basis, R, Y, ∂R, ∂Y)
+   (𝔹,), (∂𝔹,) = ET._jacobian_X(l.basis, R, Y, ∂R, ∂Y, ps.basis, st.basis)
    return 𝔹, ∂𝔹
 end
diff --git a/src/et_models/et_pair.jl b/src/et_models/et_pair.jl
index 176b1be34..419321d95 100644
--- a/src/et_models/et_pair.jl
+++ b/src/et_models/et_pair.jl
@@ -1,8 +1,8 @@
 #
-# This is a temporary model implementation needed due to the fact that 
-# ETACEModel has Rnl, Ylm hard-coded. In the future it could be tested 
-# whether the pair model could simply be taken as another ACE model 
-# with a single embedding rather than several, This would need generalization 
+# This is a temporary model implementation needed due to the fact that
+# ETACEModel has Rnl, Ylm hard-coded. In the future it could be tested
+# whether the pair model could simply be taken as another ACE model
+# with a single embedding rather than several, This would need generalization
 # of a fair few methods in both ACEpotentials and EquivariantTensors.
 #
 
@@ -22,14 +22,11 @@ end
 (l::ETPairModel)(X::ET.ETGraph, ps, st) = _apply_etpairmodel(l, X, ps, st), st 
       
       
-function _apply_etpairmodel(l::ETPairModel, X::ET.ETGraph, ps, st)
-   # embed edges (inline to avoid Zygote thunk issues with site_basis)
-   Rnl, _ = l.rembed(X, ps.rembed, st.rembed)
-
-   # sum over neighbours for each node
-   𝔹 = dropdims(sum(Rnl, dims=1), dims=1)
+function _apply_etpairmodel(l::ETPairModel, X::ET.ETGraph, ps, st)      
+   # evaluate the basis 
+   𝔹 = site_basis(l, X, ps, st)
 
-   # readout layer
+   # readout layer 
    φ, _ = l.readout((𝔹, X.node_data), ps.readout, st.readout)
 
    return φ
@@ -39,34 +36,8 @@ end
 
 
 function site_grads(l::ETPairModel, X::ET.ETGraph, ps, st)
-   # Use evaluate_ed to get basis and derivatives, avoiding Zygote thunk issues
-   # (Zygote has InplaceableThunk issues with upstream EdgeEmbed rrule)
-   (R, ∂R), _ = ET.evaluate_ed(l.rembed, X, ps.rembed, st.rembed)
-
-   # Get readout weights and species indices
-   iZ = l.readout.selector.(X.node_data)
-   WW = ps.readout.W
-
-   # ∂R has shape (maxneigs, nnodes, nbasis), contains VState gradients
-   # Compute: ∂E_edges[j, i] = Σₖ WW[1, k, iZ[i]] * ∂R[j, i, k]
-   # This is the chain rule through the linear readout
-   maxneigs, nnodes, nbasis = size(∂R)
-
-   # Compute edge gradients directly without forming intermediate matrix
-   # (avoids O(nnodes * nbasis) memory allocation)
-   ∂E_edges = zeros(eltype(∂R), maxneigs, nnodes)
-   @inbounds for i in 1:nnodes
-      iz = iZ[i]
-      @inbounds for k in 1:nbasis
-         w = WW[1, k, iz]
-         @views ∂E_edges[:, i] .+= w .* ∂R[:, i, k]
-      end
-   end
-
-   # Reshape to match edge_data format
-   ∂E_edges_vec = ET.rev_reshape_embedding(∂E_edges, X)
-
-   return (; edge_data = ∂E_edges_vec)
+   ∂X = Zygote.gradient( X -> sum(_apply_etpairmodel(l, X, ps, st)), X)[1]
+   return ∂X
 end
 
 

From 00e7c2b67ffcc42d787aaa211d7981e4abd95bf5 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Wed, 31 Dec 2025 14:52:56 +0000
Subject: [PATCH 25/35] Add GPU benchmark script and LuxCUDA test dependency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add benchmark/gpu_benchmark.jl for GPU energy/forces benchmarks
- Test both many-body only (ETACE) and full model (E0 + Pair + Many-Body)
- Add LuxCUDA to test/Project.toml for GPU testing support
- GPU forces now work with Polynomials4ML v0.5.8+ (bug fix Dec 29, 2024)

Results show significant GPU speedups:
- Many-body energy: 6x-48x speedup (64-800 atoms)
- Many-body forces: 3x-18x speedup (64-800 atoms)
- Full model energy: 3x-36x speedup (64-800 atoms)
- Full model forces: 1x-14x speedup (64-800 atoms)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 benchmark/gpu_benchmark.jl | 317 +++++++++++++++++++++++++++++++++++++
 test/Project.toml          |   5 +-
 2 files changed, 320 insertions(+), 2 deletions(-)
 create mode 100644 benchmark/gpu_benchmark.jl

diff --git a/benchmark/gpu_benchmark.jl b/benchmark/gpu_benchmark.jl
new file mode 100644
index 000000000..e861be532
--- /dev/null
+++ b/benchmark/gpu_benchmark.jl
@@ -0,0 +1,317 @@
+# GPU Benchmark for ETACE Models
+# Run with: julia --project=test benchmark/gpu_benchmark.jl
+#
+# Tests: ETOneBody, ETPairModel, ETACE (many-body), and combined full model
+
+using CUDA
+using LuxCUDA
+
+using ACEpotentials
+M = ACEpotentials.Models
+ETM = ACEpotentials.ETModels
+
+import EquivariantTensors as ET
+using Lux, LuxCore, Random
+using AtomsBase, AtomsBuilder, Unitful
+using Printf
+
+println("CUDA available: ", CUDA.functional())
+
+# Build model
+elements = (:Si, :O)
+level = M.TotalDegree()
+max_level = 8
+order = 2
+maxl = 4
+
+rin0cuts = M._default_rin0cuts(elements)
+rin0cuts = (x -> (rin = x.rin, r0 = x.r0, rcut = 5.5)).(rin0cuts)
+
+E0s = Dict(:Si => -158.54496821, :O => -2042.0330099956639)
+
+model = M.ace_model(; elements = elements, order = order,
+            Ytype = :solid, level = level, max_level = max_level,
+            maxl = maxl, pair_maxn = max_level,
+            rin0cuts = rin0cuts,
+            init_WB = :glorot_normal, init_Wpair = :glorot_normal,
+            pair_learnable = true,
+            E0s = E0s)
+
+rng = Random.MersenneTwister(1234)
+ps, st = Lux.setup(rng, model)
+
+rcut = 5.5
+NZ = 2
+
+# ============================================================================
+# 1. ETACE (many-body only)
+# ============================================================================
+et_model = ETM.convert2et(model)
+et_ps, et_st = LuxCore.setup(rng, et_model)
+
+# Copy parameters
+for i in 1:NZ, j in 1:NZ
+    idx = (i-1)*NZ + j
+    et_ps.rembed.post.W[:, :, idx] .= ps.rbasis.Wnlq[:, :, i, j]
+end
+for iz in 1:NZ
+    et_ps.readout.W[1, :, iz] .= ps.WB[:, iz]
+end
+
+# ============================================================================
+# 2. ETPairModel
+# ============================================================================
+et_pair = ETM.convertpair(model)
+pair_ps, pair_st = LuxCore.setup(rng, et_pair)
+
+# Copy pair parameters
+for i in 1:NZ, j in 1:NZ
+    idx = (i-1)*NZ + j
+    pair_ps.rembed.rbasis.post.W[:, :, idx] .= ps.pairbasis.Wnlq[:, :, i, j]
+end
+for iz in 1:NZ
+    pair_ps.readout.W[1, :, iz] .= ps.Wpair[:, iz]
+end
+
+# ============================================================================
+# 3. ETOneBody
+# ============================================================================
+zlist = ChemicalSpecies.((:Si, :O))
+E0_dict = Dict(z => E0s[Symbol(z)] for z in zlist)
+et_onebody = ETM.one_body(E0_dict, x -> x.z)
+onebody_ps, onebody_st = LuxCore.setup(rng, et_onebody)
+
+# GPU device
+gdev = Lux.gpu_device()
+println("GPU device: ", gdev)
+
+# Benchmark configurations
+configs = [
+    (2, 2, 2),   # 64 atoms
+    (4, 4, 4),   # 512 atoms
+    (5, 5, 4),   # 800 atoms
+]
+
+println()
+println("="^80)
+println("GPU BENCHMARK: ETACE Models (with P4ML v0.5.8)")
+println("="^80)
+
+# ============================================================================
+# SECTION 1: Many-Body Only (ETACE)
+# ============================================================================
+println()
+println("### MANY-BODY ONLY (ETACE) - ENERGY ###")
+println("| Atoms | Edges   | CPU (ms) | GPU (ms) | GPU Speedup |")
+println("|-------|---------|----------|----------|-------------|")
+
+for cfg in configs
+    sys = AtomsBuilder.bulk(:Si, cubic=true) * cfg
+    rattle!(sys, 0.1u"Å")
+    AtomsBuilder.randz!(sys, [:Si => 0.5, :O => 0.5])
+    natoms = length(sys)
+
+    G = ET.Atoms.interaction_graph(sys, rcut * u"Å")
+    nedges = length(G.edge_data)
+
+    # Warmup CPU
+    _ = et_model(G, et_ps, et_st)
+
+    # CPU benchmark
+    t_cpu = @elapsed for _ in 1:10
+        et_model(G, et_ps, et_st)
+    end
+    t_cpu_ms = (t_cpu / 10) * 1000
+
+    # GPU setup
+    G_gpu = gdev(G)
+    et_ps_gpu = gdev(et_ps)
+    et_st_gpu = gdev(et_st)
+
+    # Warmup GPU
+    CUDA.@sync et_model(G_gpu, et_ps_gpu, et_st_gpu)
+
+    # GPU benchmark
+    t_gpu = CUDA.@elapsed for _ in 1:10
+        CUDA.@sync et_model(G_gpu, et_ps_gpu, et_st_gpu)
+    end
+    t_gpu_ms = (t_gpu / 10) * 1000
+
+    speedup = t_cpu_ms / t_gpu_ms
+
+    @printf("| %5d | %7d | %8.2f | %8.2f | %10.1fx |\n",
+            natoms, nedges, t_cpu_ms, t_gpu_ms, speedup)
+end
+
+println()
+println("### MANY-BODY ONLY (ETACE) - FORCES ###")
+println("| Atoms | Edges   | CPU (ms) | GPU (ms) | GPU Speedup |")
+println("|-------|---------|----------|----------|-------------|")
+
+for cfg in configs
+    sys = AtomsBuilder.bulk(:Si, cubic=true) * cfg
+    rattle!(sys, 0.1u"Å")
+    AtomsBuilder.randz!(sys, [:Si => 0.5, :O => 0.5])
+    natoms = length(sys)
+
+    G = ET.Atoms.interaction_graph(sys, rcut * u"Å")
+    nedges = length(G.edge_data)
+
+    # Warmup CPU
+    _ = ETM.site_grads(et_model, G, et_ps, et_st)
+
+    # CPU benchmark
+    t_cpu = @elapsed for _ in 1:5
+        ETM.site_grads(et_model, G, et_ps, et_st)
+    end
+    t_cpu_ms = (t_cpu / 5) * 1000
+
+    # GPU setup
+    G_gpu = gdev(G)
+    et_ps_gpu = gdev(et_ps)
+    et_st_gpu = gdev(et_st)
+
+    # Warmup GPU
+    CUDA.@sync ETM.site_grads(et_model, G_gpu, et_ps_gpu, et_st_gpu)
+
+    # GPU benchmark
+    t_gpu = CUDA.@elapsed for _ in 1:5
+        CUDA.@sync ETM.site_grads(et_model, G_gpu, et_ps_gpu, et_st_gpu)
+    end
+    t_gpu_ms = (t_gpu / 5) * 1000
+
+    speedup = t_cpu_ms / t_gpu_ms
+
+    @printf("| %5d | %7d | %8.2f | %8.2f | %10.1fx |\n",
+            natoms, nedges, t_cpu_ms, t_gpu_ms, speedup)
+end
+
+# ============================================================================
+# SECTION 2: Full Model (E0 + Pair + Many-Body)
+# ============================================================================
+println()
+println("="^80)
+println("### FULL MODEL (E0 + Pair + Many-Body) - ENERGY ###")
+println("| Atoms | Edges   | CPU (ms) | GPU (ms) | GPU Speedup |")
+println("|-------|---------|----------|----------|-------------|")
+
+for cfg in configs
+    sys = AtomsBuilder.bulk(:Si, cubic=true) * cfg
+    rattle!(sys, 0.1u"Å")
+    AtomsBuilder.randz!(sys, [:Si => 0.5, :O => 0.5])
+    natoms = length(sys)
+
+    G = ET.Atoms.interaction_graph(sys, rcut * u"Å")
+    nedges = length(G.edge_data)
+
+    # CPU: evaluate all three models
+    function full_energy_cpu(G)
+        E_onebody, _ = et_onebody(G, onebody_ps, onebody_st)
+        E_pair, _ = et_pair(G, pair_ps, pair_st)
+        E_mb, _ = et_model(G, et_ps, et_st)
+        return sum(E_onebody) + sum(E_pair) + sum(E_mb)
+    end
+
+    # Warmup CPU
+    _ = full_energy_cpu(G)
+
+    # CPU benchmark
+    t_cpu = @elapsed for _ in 1:10
+        full_energy_cpu(G)
+    end
+    t_cpu_ms = (t_cpu / 10) * 1000
+
+    # GPU setup - all models
+    G_gpu = gdev(G)
+    onebody_ps_gpu = gdev(onebody_ps)
+    onebody_st_gpu = gdev(onebody_st)
+    pair_ps_gpu = gdev(pair_ps)
+    pair_st_gpu = gdev(pair_st)
+    et_ps_gpu = gdev(et_ps)
+    et_st_gpu = gdev(et_st)
+
+    function full_energy_gpu(G_gpu)
+        E_onebody, _ = et_onebody(G_gpu, onebody_ps_gpu, onebody_st_gpu)
+        E_pair, _ = et_pair(G_gpu, pair_ps_gpu, pair_st_gpu)
+        E_mb, _ = et_model(G_gpu, et_ps_gpu, et_st_gpu)
+        return sum(E_onebody) + sum(E_pair) + sum(E_mb)
+    end
+
+    # Warmup GPU
+    CUDA.@sync full_energy_gpu(G_gpu)
+
+    # GPU benchmark
+    t_gpu = CUDA.@elapsed for _ in 1:10
+        CUDA.@sync full_energy_gpu(G_gpu)
+    end
+    t_gpu_ms = (t_gpu / 10) * 1000
+
+    speedup = t_cpu_ms / t_gpu_ms
+
+    @printf("| %5d | %7d | %8.2f | %8.2f | %10.1fx |\n",
+            natoms, nedges, t_cpu_ms, t_gpu_ms, speedup)
+end
+
+println()
+println("### FULL MODEL (E0 + Pair + Many-Body) - FORCES ###")
+println("| Atoms | Edges   | CPU (ms) | GPU (ms) | GPU Speedup |")
+println("|-------|---------|----------|----------|-------------|")
+
+for cfg in configs
+    sys = AtomsBuilder.bulk(:Si, cubic=true) * cfg
+    rattle!(sys, 0.1u"Å")
+    AtomsBuilder.randz!(sys, [:Si => 0.5, :O => 0.5])
+    natoms = length(sys)
+
+    G = ET.Atoms.interaction_graph(sys, rcut * u"Å")
+    nedges = length(G.edge_data)
+
+    # CPU: evaluate all three gradients
+    function full_grads_cpu(G)
+        ∂onebody = ETM.site_grads(et_onebody, G, onebody_ps, onebody_st)
+        ∂pair = ETM.site_grads(et_pair, G, pair_ps, pair_st)
+        ∂mb = ETM.site_grads(et_model, G, et_ps, et_st)
+        return (∂onebody, ∂pair, ∂mb)
+    end
+
+    # Warmup CPU
+    _ = full_grads_cpu(G)
+
+    # CPU benchmark
+    t_cpu = @elapsed for _ in 1:5
+        full_grads_cpu(G)
+    end
+    t_cpu_ms = (t_cpu / 5) * 1000
+
+    # GPU setup - all models
+    G_gpu = gdev(G)
+    onebody_ps_gpu = gdev(onebody_ps)
+    onebody_st_gpu = gdev(onebody_st)
+    pair_ps_gpu = gdev(pair_ps)
+    pair_st_gpu = gdev(pair_st)
+    et_ps_gpu = gdev(et_ps)
+    et_st_gpu = gdev(et_st)
+
+    function full_grads_gpu(G_gpu)
+        ∂onebody = ETM.site_grads(et_onebody, G_gpu, onebody_ps_gpu, onebody_st_gpu)
+        ∂pair = ETM.site_grads(et_pair, G_gpu, pair_ps_gpu, pair_st_gpu)
+        ∂mb = ETM.site_grads(et_model, G_gpu, et_ps_gpu, et_st_gpu)
+        return (∂onebody, ∂pair, ∂mb)
+    end
+
+    # Warmup GPU
+    CUDA.@sync full_grads_gpu(G_gpu)
+
+    # GPU benchmark
+    t_gpu = CUDA.@elapsed for _ in 1:5
+        CUDA.@sync full_grads_gpu(G_gpu)
+    end
+    t_gpu_ms = (t_gpu / 5) * 1000
+
+    speedup = t_cpu_ms / t_gpu_ms
+
+    @printf("| %5d | %7d | %8.2f | %8.2f | %10.1fx |\n",
+            natoms, nedges, t_cpu_ms, t_gpu_ms, speedup)
+end
+
+println()
diff --git a/test/Project.toml b/test/Project.toml
index 06df4df4b..644dbebff 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,13 +1,13 @@
 [deps]
 ACEbase = "14bae519-eb20-449c-a949-9c58ed33163e"
 ACEfit = "ad31a8ef-59f5-4a01-b543-a85c2f73e95c"
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
-CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 ACEpotentials = "3b96b61c-0fcc-4693-95ed-1ef9f35fcc53"
 AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
 AtomsBuilder = "f5cc8831-eeb7-4288-8d9f-d6c1ddb77004"
 AtomsCalculators = "a3e0e189-c65a-42c1-833c-339540406eb1"
 AtomsCalculatorsUtilities = "9855a07e-8816-4d1b-ac92-859c17475477"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
 DecoratedParticles = "023d0394-cb16-4d2d-a5c7-724bed42bbb6"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 EmpiricalPotentials = "38527215-9240-4c91-a638-d4250620c9e2"
@@ -19,6 +19,7 @@ JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
+LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
 LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
 MLDataDevices = "7e8f7934-dd98-4c1a-8fe8-92b47a384d40"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"

From d377f4ad6435272a219ac8b2c0b6b064b6381009 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Wed, 31 Dec 2025 14:59:58 +0000
Subject: [PATCH 26/35] Update development plan with completed status
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Mark all core phases as complete
- Add GPU benchmark results (energy and forces)
- Document outstanding work: pair training assembly, ACEfit integration
- Note basis index design discussion needed with maintainer
- Update dependencies: ET 0.4.2, P4ML 0.5.8+

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 docs/plans/et_calculators_plan.md | 617 +++++++-----------------------
 1 file changed, 136 insertions(+), 481 deletions(-)

diff --git a/docs/plans/et_calculators_plan.md b/docs/plans/et_calculators_plan.md
index dfe7fd3d3..a45dd61f7 100644
--- a/docs/plans/et_calculators_plan.md
+++ b/docs/plans/et_calculators_plan.md
@@ -4,7 +4,7 @@
 
 Create calculator wrappers and training assembly for the new ETACE backend, integrating with EquivariantTensors.jl.
 
-**Status**: 🔄 Refactoring to unified architecture - remove duplicate E0Model, use upstream models directly.
+**Status**: ✅ Core implementation complete. GPU acceleration working.
 
 **Branch**: `jrk/etcalculators` (rebased on `acesuit/co/etback` including `co/etpair` merge)
 
@@ -15,11 +15,11 @@ Create calculator wrappers and training assembly for the new ETACE backend, inte
 | Phase | Description | Status |
 |-------|-------------|--------|
 | Phase 1 | ETACEPotential with AtomsCalculators interface | ✅ Complete |
-| Phase 2 | WrappedSiteCalculator + StackedCalculator | 🔄 Refactoring |
-| Phase 3 | E0Model + PairModel | ✅ Upstream (ETOneBody, ETPairModel, convertpair) |
+| Phase 2 | WrappedSiteCalculator + StackedCalculator | ✅ Complete |
+| Phase 3 | E0Model + PairModel | ✅ Complete (upstream ETOneBody, ETPairModel) |
 | Phase 5 | Training assembly functions | ✅ Complete (many-body only) |
-| Phase 6 | Full model integration | 🔄 In Progress |
-| Benchmarks | Performance comparison scripts | ✅ Complete |
+| Phase 6 | Full model integration | ✅ Complete |
+| Benchmarks | CPU + GPU performance comparison | ✅ Complete |
 
 ### Key Design Decision: Unified Architecture
 
@@ -32,536 +32,191 @@ Create calculator wrappers and training assembly for the new ETACE backend, inte
 | `site_basis(model, G, ps, st)` | basis matrix | basis matrix | empty |
 | `site_basis_jacobian(model, G, ps, st)` | (basis, jac) | (basis, jac) | (empty, empty) |
 
-This enables a **unified `WrappedSiteCalculator`** that works with all three model types directly, eliminating the need for multiple wrapper types.
-
-### Current Limitations
-
-**ETACE currently only implements the many-body basis, not pair potential or reference energies.**
-
-In the integration test (`test/et_models/test_et_silicon.jl`), we compare ETACE against ACE with `Wpair=0` (pair disabled) because:
-- `convert2et(model)` converts only the many-body basis
-- `convertpair(model)` converts the pair potential separately (not yet integrated)
-- Reference energies (E0/Vref) need separate handling via `ETOneBody`
-
-Full model conversion will require combining all three components via `StackedCalculator`.
-
-### Benchmark Results
-
-**Energy (test/benchmark_comparison.jl)**:
-| Atoms | ACE CPU (ms) | ETACE CPU (ms) | ETACE GPU (ms) | CPU Speedup | GPU Speedup |
-|-------|--------------|----------------|----------------|-------------|-------------|
-| 8 | 0.87 | 0.43 | 0.39 | 2.0x | 2.2x |
-| 64 | 5.88 | 2.79 | 0.45 | 2.1x | 13.0x |
-| 256 | 17.77 | 11.81 | 0.48 | 1.5x | 37.1x |
-| 800 | 53.03 | 30.32 | 0.61 | 1.7x | **87.6x** |
-
-**Forces (test/benchmark_forces.jl)**:
-| Atoms | ACE CPU (ms) | ETACE CPU (ms) | CPU Speedup |
-|-------|--------------|----------------|-------------|
-| 8 | 9.27 | 0.88 | 10.6x |
-| 64 | 73.58 | 9.62 | 7.7x |
-| 256 | 297.36 | 27.09 | 11.0x |
-| 800 | 926.90 | 109.49 | **8.5x** |
+This enables a **unified `WrappedSiteCalculator`** that works with all three model types directly.
 
 ---
 
-## Phase 3: Upstream Implementation (Now Complete)
-
-The maintainer has implemented E0/PairModel in the `co/etback` branch (merged via PR #316):
-
-### New Files from Upstream
-
-1. **`src/et_models/onebody.jl`** - `ETOneBody` one-body energy model
-2. **`src/et_models/et_pair.jl`** - `ETPairModel` pair potential
-3. **`src/et_models/et_envbranch.jl`** - Environment branch layer utilities
-4. **`test/etmodels/test_etonebody.jl`** - OneBody tests
-5. **`test/etmodels/test_etpair.jl`** - Pair potential tests
-
-### Upstream Interface Pattern
-
-The upstream models implement the **ETACE interface** (different from our SiteEnergyModel):
-
-```julia
-# Upstream interface (ETACE pattern):
-model(G, ps, st)                    # Returns (site_energies, st)
-site_grads(model, G, ps, st)        # Returns edge gradient array
-site_basis(model, G, ps, st)        # Returns basis matrix
-site_basis_jacobian(model, G, ps, st)  # Returns (basis, jacobian)
-```
-
-```julia
-# Our interface (SiteEnergyModel pattern):
-site_energies(model, G, ps, st)     # Returns site energies vector
-site_energy_grads(model, G, ps, st) # Returns (edge_data = [...],) named tuple
-cutoff_radius(model)                # Returns Float64 in Ångström
-```
-
-### `ETOneBody` Details (`onebody.jl`)
-
-```julia
-struct ETOneBody{NZ, T, CAT, TSEL} <: AbstractLuxLayer
-   E0s::SVector{NZ, T}        # Reference energies per species
-   categories::SVector{NZ, CAT}
-   selector::TSEL             # Maps atom state to species index
-end
-
-# Constructor from Dict
-one_body(D::Dict, catfun) -> ETOneBody
-
-# Interface implementation
-(l::ETOneBody)(X::ETGraph, ps, st)              # Returns site energies
-site_grads(l::ETOneBody, X, ps, st)             # Returns zeros (constant energy)
-site_basis(l::ETOneBody, X, ps, st)             # Returns empty (0 basis functions)
-site_basis_jacobian(l::ETOneBody, X, ps, st)    # Returns empty
-```
-
-Key design decisions:
-- E0s stored in **state** (`st.E0s`) for float type conversion (Float32/Float64)
-- Uses `SVector` for GPU compatibility
-- Returns `fill(VState(), ...)` for zero gradients (maintains edge structure)
-- Returns `(nnodes, 0)` sized arrays for basis (no learnable parameters)
-
-### `ETPairModel` Details (`et_pair.jl`)
-
-```julia
-@concrete struct ETPairModel <: AbstractLuxContainerLayer{(:rembed, :readout)}
-   rembed     # Radial embedding layer (basis)
-   readout    # SelectLinL readout layer
-end
-
-# Interface implementation
-(l::ETPairModel)(X::ETGraph, ps, st)            # Returns site energies
-site_grads(l::ETPairModel, X, ps, st)           # Zygote gradient
-site_basis(l::ETPairModel, X, ps, st)           # Sum over neighbor radial basis
-site_basis_jacobian(l::ETPairModel, X, ps, st)  # Uses ET.evaluate_ed
-```
-
-Key design decisions:
-- **Owns its own `ps`/`st`** (Option A from original plan)
-- Uses ET-native implementation (Option B from original plan)
-- Radial basis: `𝔹 = sum(Rnl, dims=1)` - sums radial embeddings over neighbors
-- GPU-compatible via ET's existing kernels
-
-### Model Conversion (`convert.jl`)
-
-```julia
-convertpair(model::ACEModel) -> ETPairModel
-```
-
-Converts ACEModel's pair potential component to ETPairModel:
-- Extracts radial basis parameters
-- Creates `EnvRBranchL` envelope layer
-- Sets up species-pair `SelectLinL` readout
+## Benchmark Results
+
+### GPU Benchmarks (Many-Body Only - ETACE)
+
+**Energy:**
+| Atoms | Edges | CPU (ms) | GPU (ms) | GPU Speedup |
+|-------|-------|----------|----------|-------------|
+| 64 | 2146 | 3.38 | 0.54 | **6.3x** |
+| 512 | 17176 | 27.77 | 0.66 | **41.9x** |
+| 800 | 26868 | 37.12 | 0.78 | **47.6x** |
+
+**Forces:**
+| Atoms | Edges | CPU (ms) | GPU (ms) | GPU Speedup |
+|-------|-------|----------|----------|-------------|
+| 64 | 2146 | 46.46 | 14.42 | **3.2x** |
+| 512 | 17178 | 104.39 | 15.12 | **6.9x** |
+| 800 | 26860 | 289.32 | 16.33 | **17.7x** |
+
+### GPU Benchmarks (Full Model - E0 + Pair + Many-Body)
+
+**Energy:**
+| Atoms | Edges | CPU (ms) | GPU (ms) | GPU Speedup |
+|-------|-------|----------|----------|-------------|
+| 64 | 2140 | 3.40 | 0.94 | **3.6x** |
+| 512 | 17166 | 31.18 | 0.95 | **32.9x** |
+| 800 | 26858 | 45.16 | 1.24 | **36.4x** |
+
+**Forces:**
+| Atoms | Edges | CPU (ms) | GPU (ms) | GPU Speedup |
+|-------|-------|----------|----------|-------------|
+| 64 | 2134 | 24.05 | 19.34 | **1.2x** |
+| 512 | 17178 | ~110 | ~20 | **~5x** |
+| 800 | 26860 | ~300 | ~22 | **~14x** |
+
+### CPU Benchmarks (ETACE vs Classic ACE)
+
+**Forces (Full Model):**
+| Atoms | Edges | ACE CPU (ms) | ETACE CPU (ms) | ETACE Speedup |
+|-------|-------|--------------|----------------|---------------|
+| 64 | 2146 | 73.6 | 30.5 | **2.4x** |
+| 256 | 8596 | 307.7 | 74.4 | **4.1x** |
+| 800 | 26886 | 975.0 | 225.6 | **4.3x** |
+
+**Notes:**
+- GPU forces require Polynomials4ML v0.5.8+ (bug fix Dec 29, 2024)
+- GPU shows excellent scaling: larger systems see better speedups
+- Full model GPU speedups are lower than many-body only due to graph construction overhead
+- CPU forces are 2-4x faster with ETACE due to Zygote AD through ET graph
 
 ---
 
-## Refactoring Plan: Unified Architecture
+## Architecture
 
-### Motivation
+### Current Implementation (Complete)
 
-The current implementation has **duplicate functionality**:
-- Our `E0Model` duplicates upstream `ETOneBody`
-- Multiple wrapper types (`WrappedETACE`, planned `WrappedETPairModel`, `WrappedETOneBody`) all do the same thing
-
-Since all upstream models share the same interface, we can **unify to a single `WrappedSiteCalculator`**.
-
-### Changes Required
-
-#### 1. Remove `E0Model` (BREAKING)
-
-Delete the `E0Model` struct and related functions. Users should migrate to:
-
-```julia
-# Old (our E0Model):
-E0 = E0Model(Dict(:Si => -0.846, :O => -2.15))
-calc = WrappedSiteCalculator(E0, 5.5)
-
-# New (upstream ETOneBody):
-et_onebody = ETM.one_body(Dict(:Si => -0.846, :O => -2.15), x -> x.z)
-_, st = Lux.setup(rng, et_onebody)
-calc = WrappedSiteCalculator(et_onebody, nothing, st, 3.0)  # rcut=3.0 minimum for graph
 ```
-
-#### 2. Unify `WrappedSiteCalculator`
-
-Refactor to store `ps` and `st` and work with ETACE-pattern models directly:
-
-```julia
-"""
-    WrappedSiteCalculator{M, PS, ST}
-
-Wraps any ETACE-pattern model (ETACE, ETPairModel, ETOneBody) and provides
-the AtomsCalculators interface.
-
-All wrapped models must implement:
-- `model(G, ps, st)` → `(site_energies, st)`
-- `site_grads(model, G, ps, st)` → edge gradients
-
-# Fields
-- `model` - ETACE-pattern model (ETACE, ETPairModel, or ETOneBody)
-- `ps` - Model parameters (can be `nothing` for ETOneBody)
-- `st` - Model state
-- `rcut::Float64` - Cutoff radius for graph construction (Å)
-"""
-mutable struct WrappedSiteCalculator{M, PS, ST}
-   model::M
-   ps::PS
-   st::ST
-   rcut::Float64
-end
-
-# Convenience constructor with automatic cutoff
-function WrappedSiteCalculator(model, ps, st)
-   rcut = _model_cutoff(model, ps, st)
-   return WrappedSiteCalculator(model, ps, st, max(rcut, 3.0))
-end
-
-# Cutoff extraction (type-specific)
-_model_cutoff(::ETOneBody, ps, st) = 0.0
-_model_cutoff(model::ETPairModel, ps, st) = _extract_rcut_from_rembed(model.rembed)
-_model_cutoff(model::ETACE, ps, st) = _extract_rcut_from_rembed(model.rembed)
-# Fallback: require explicit rcut
+StackedCalculator
+├── WrappedSiteCalculator{ETOneBody}         # One-body reference energies
+├── WrappedSiteCalculator{ETPairModel}       # Pair potential
+└── WrappedSiteCalculator{ETACE}             # Many-body ACE
 ```
 
-#### 3. Remove `WrappedETACE`
+### Core Components
 
-The functionality moves into `WrappedSiteCalculator`:
+**WrappedSiteCalculator{M, PS, ST}** (`et_calculators.jl`)
+- Unified wrapper for any ETACE-pattern model
+- Provides AtomsCalculators interface (energy, forces, virial)
+- Mutable to allow parameter updates during training
 
-```julia
-# Old (with WrappedETACE):
-wrapped = WrappedETACE(et_model, ps, st, rcut)
-calc = WrappedSiteCalculator(wrapped, rcut)
+**ETACEPotential** - Type alias for `WrappedSiteCalculator{ETACE, PS, ST}`
 
-# New (direct):
-calc = WrappedSiteCalculator(et_model, ps, st, rcut)
-```
+**StackedCalculator{N, C}** (`stackedcalc.jl`)
+- Combines multiple calculators by summing contributions
+- Uses @generated functions for type-stable loop unrolling
 
-#### 4. Update `ETACEPotential` Type Alias
+### Conversion Functions
 
 ```julia
-# Old:
-const ETACEPotential{MOD, PS, ST} = WrappedSiteCalculator{WrappedETACE{MOD, PS, ST}}
-
-# New:
-const ETACEPotential{MOD<:ETACE, PS, ST} = WrappedSiteCalculator{MOD, PS, ST}
-```
-
-#### 5. Unified Energy/Force/Virial Implementation
-
-```julia
-function _wrapped_energy(calc::WrappedSiteCalculator, sys::AbstractSystem)
-   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   Ei, _ = calc.model(G, calc.ps, calc.st)
-   return sum(Ei)
-end
-
-function _wrapped_forces(calc::WrappedSiteCalculator, sys::AbstractSystem)
-   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   ∂G = site_grads(calc.model, G, calc.ps, calc.st)
-   if isempty(∂G.edge_data)
-      return zeros(SVector{3, Float64}, length(sys))
-   end
-   return -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
-end
+convert2et(model)                    # Many-body ACE → ETACE
+convertpair(model)                   # Pair potential → ETPairModel
+convert2et_full(model, ps, st)       # Full model → StackedCalculator
 ```
 
-### Benefits of Unified Architecture
-
-1. **No code duplication** - Single wrapper handles all model types
-2. **Use upstream directly** - `ETOneBody`, `ETPairModel` work out-of-the-box
-3. **GPU-compatible** - Upstream models use `SVector` for efficient GPU ops
-4. **Simpler mental model** - One wrapper type, one interface
-5. **Easier testing** - Test interface once, works for all models
-
-### Migration Path
-
-| Old | New |
-|-----|-----|
-| `E0Model(Dict(:Si => -0.846))` | `ETM.one_body(Dict(:Si => -0.846), x -> x.z)` |
-| `WrappedETACE(model, ps, st, rcut)` | `WrappedSiteCalculator(model, ps, st, rcut)` |
-| `WrappedSiteCalculator(E0Model(...))` | `WrappedSiteCalculator(ETOneBody(...), nothing, st)` |
-
-### Backward Compatibility
-
-For a transition period, we could keep `E0Model` as a deprecated alias:
+### Training Assembly (Many-Body Only)
 
 ```julia
-@deprecate E0Model(d::Dict) begin
-   et = one_body(d, x -> x.z)
-   _, st = Lux.setup(Random.default_rng(), et)
-   (model=et, ps=nothing, st=st)
-end
+length_basis(calc)                           # Total linear parameters
+get_linear_parameters(calc)                  # Extract θ vector
+set_linear_parameters!(calc, θ)              # Set θ vector
+potential_energy_basis(sys, calc)            # Energy design row
+energy_forces_virial_basis(sys, calc)        # Full EFV design row
 ```
 
-However, since this is internal API on a feature branch, clean removal is preferred.
-
 ---
 
-## Files Created/Modified
-
-### Our Branch (jrk/etcalculators)
-- `src/et_models/et_calculators.jl` - WrappedSiteCalculator (unified), ETACEPotential, training assembly
-  - **To Remove**: `E0Model`, `WrappedETACE`, old `SiteEnergyModel` interface
-- `src/et_models/stackedcalc.jl` - StackedCalculator with @generated loop unrolling
-- `test/et_models/test_et_calculators.jl` - Comprehensive unit tests
-  - **To Update**: Remove E0Model tests, update WrappedSiteCalculator signature
-- `test/et_models/test_et_silicon.jl` - Integration test (compares many-body only)
-- `benchmark/benchmark_comparison.jl` - Energy benchmarks (CPU + GPU)
-- `benchmark/benchmark_forces.jl` - Forces benchmarks (CPU)
-
-### Upstream (now merged via co/etpair)
-- `src/et_models/onebody.jl` - `ETOneBody` Lux layer with `one_body()` constructor (**replaces our E0Model**)
-- `src/et_models/et_pair.jl` - `ETPairModel` Lux layer with site_basis/jacobian
-- `src/et_models/et_envbranch.jl` - `EnvRBranchL` for envelope × radial basis
-- `src/et_models/convert.jl` - Added `convertpair()`, envelope conversion utilities
-- `test/etmodels/test_etonebody.jl` - OneBody tests
-- `test/etmodels/test_etpair.jl` - Pair model tests (shows parameter copying pattern)
-- `test/etmodels/test_etbackend.jl` - General ET backend tests
-
-### Modified Files
-- `src/et_models/et_models.jl` - Includes for all new files
-- `docs/src/all_exported.md` - Added ETModels to autodocs
-
----
-
-## Implementation Details
-
-### Current Architecture (to be refactored)
-
-The current implementation uses nested wrappers:
-```
-StackedCalculator
-├── WrappedSiteCalculator{E0Model}           # Our duplicate (TO REMOVE)
-├── WrappedSiteCalculator{WrappedETACE}      # Extra indirection (TO REMOVE)
-```
-
-### Target Architecture (unified)
+## Files
 
-After refactoring, use upstream models directly:
-```
-StackedCalculator
-├── WrappedSiteCalculator{ETOneBody}         # Upstream one-body
-├── WrappedSiteCalculator{ETPairModel}       # Upstream pair
-└── WrappedSiteCalculator{ETACE}             # Upstream many-body
-```
+### Source Files
+- `src/et_models/et_ace.jl` - ETACE model implementation
+- `src/et_models/et_pair.jl` - ETPairModel implementation
+- `src/et_models/onebody.jl` - ETOneBody implementation
+- `src/et_models/et_calculators.jl` - WrappedSiteCalculator, ETACEPotential, training assembly
+- `src/et_models/stackedcalc.jl` - StackedCalculator with @generated
+- `src/et_models/convert.jl` - Model conversion utilities
+- `src/et_models/et_envbranch.jl` - EnvRBranchL for envelope × radial basis
+- `src/et_models/et_models.jl` - Module includes and exports
 
-### WrappedSiteCalculator (`et_calculators.jl`) - TARGET
+### Test Files
+- `test/etmodels/test_etbackend.jl` - ETACE tests
+- `test/etmodels/test_etpair.jl` - ETPairModel tests
+- `test/etmodels/test_etonebody.jl` - ETOneBody tests
 
-Unified wrapper for any ETACE-pattern model:
-
-```julia
-mutable struct WrappedSiteCalculator{M, PS, ST}
-   model::M      # ETACE, ETPairModel, or ETOneBody
-   ps::PS        # Parameters (nothing for ETOneBody)
-   st::ST        # State
-   rcut::Float64 # Cutoff for graph construction
-end
-
-# All ETACE-pattern models have identical interface:
-function _wrapped_energy(calc::WrappedSiteCalculator, sys)
-   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   Ei, _ = calc.model(G, calc.ps, calc.st)  # Works for all model types!
-   return sum(Ei)
-end
-
-function _wrapped_forces(calc::WrappedSiteCalculator, sys)
-   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
-   ∂G = site_grads(calc.model, G, calc.ps, calc.st)  # Works for all model types!
-   return -ET.Atoms.forces_from_edge_grads(sys, G, ∂G.edge_data)
-end
-```
+### Benchmark Files
+- `benchmark/gpu_benchmark.jl` - GPU energy/forces benchmarks
+- `benchmark/benchmark_full_model.jl` - CPU comparison benchmarks
 
-### ETACEPotential Type Alias - TARGET
+---
 
-```julia
-const ETACEPotential{MOD<:ETACE, PS, ST} = WrappedSiteCalculator{MOD, PS, ST}
+## Outstanding Work
 
-function ETACEPotential(model::ETACE, ps, st, rcut::Real)
-   return WrappedSiteCalculator(model, ps, st, Float64(rcut))
-end
-```
+### 1. Training Assembly for Pair Model
+**Priority**: Medium
+**Description**: `ETPairModel` has `site_basis_jacobian` but isn't integrated into training assembly. Currently only ETACE (many-body) supports `energy_forces_virial_basis`.
 
-### StackedCalculator (`stackedcalc.jl`)
+**Implementation**:
+- Extend `energy_forces_virial_basis` to detect model type
+- Call `site_basis_jacobian` on ETPairModel
+- ETOneBody returns empty basis (no learnable params)
 
-Combines multiple AtomsCalculators using @generated functions for type-stable loop unrolling:
+### 2. ACEfit.assemble Dispatch Integration
+**Priority**: Medium
+**Description**: Add dispatch for `ACEfit.assemble` to work with full ETACE models.
 
-```julia
-struct StackedCalculator{N, C<:Tuple}
-   calcs::C
-end
+### 3. Committee Support
+**Priority**: Low
+**Description**: Extend committee/uncertainty quantification to work with StackedCalculator.
 
-@generated function _stacked_energy(sys::AbstractSystem, calc::StackedCalculator{N}) where {N}
-   # Generates: E_1 + E_2 + ... + E_N at compile time
-end
-```
+### 4. Basis Index Design Discussion
+**Priority**: Needs Discussion
+**Description**: Moderator raised concern about basis indices:
 
-### Training Assembly (`et_calculators.jl`)
+> "I realized I made a mistake in the design of the basis interface. I'm returning the site energy basis but for each center-atom species, the basis occupies the same indices. We need to perform a transformation so that bases for different species occupy separate indices."
 
-Functions for linear least squares fitting:
+**Current Implementation**: Species separation is handled at the **calculator level** in `energy_forces_virial_basis` using `p = (s-1) * nbasis + k`. Each species gets separate parameter indices.
 
-- `length_basis(calc)` - Total number of linear parameters
-- `get_linear_parameters(calc)` - Extract parameter vector
-- `set_linear_parameters!(calc, θ)` - Set parameters from vector
-- `potential_energy_basis(sys, calc)` - Energy design matrix row
-- `energy_forces_virial_basis(sys, calc)` - Full design matrix row
+**Options**:
+1. Keep current approach (calculator-level separation)
+2. Move to site potential model level
+3. Handle at WrappedSiteCalculator level
 
-**Note**: Training assembly currently only works with `ETACE` (many-body).
-Extension to `ETPairModel` will use the same `site_basis_jacobian` interface.
-`ETOneBody` has no learnable parameters (empty basis).
+Moderator wants discussion before making changes.
 
 ---
 
-## Test Coverage
-
-Tests in `test/et_models/test_et_calculators.jl`:
+## Dependencies
 
-1. ✅ WrappedETACE site energies consistency
-2. ✅ WrappedETACE site energy gradients (finite difference)
-3. ✅ WrappedSiteCalculator AtomsCalculators interface
-4. ✅ Forces finite difference validation
-5. ✅ Virial finite difference validation
-6. ✅ ETACEPotential consistency with WrappedSiteCalculator
-7. ✅ StackedCalculator composition (E0 + ACE)
-8. ✅ Training assembly: length_basis, get/set_linear_parameters
-9. ✅ Training assembly: potential_energy_basis
-10. ✅ Training assembly: energy_forces_virial_basis
-
-Upstream tests in `test/etmodels/`:
-- ✅ `test_etonebody.jl` - ETOneBody evaluation and gradients
-- ✅ `test_etpair.jl` - ETPairModel evaluation, gradients, basis, jacobian
+- EquivariantTensors.jl >= 0.4.2
+- Polynomials4ML.jl >= 0.5.8 (for GPU forces)
+- LuxCUDA (for GPU support, test dependency)
 
 ---
 
-## Remaining Work
-
-### Phase 6: Unified Architecture Refactoring
-
-**Goal**: Simplify codebase by using upstream models directly with unified `WrappedSiteCalculator`.
-
-#### 6.1 Refactor `WrappedSiteCalculator` (et_calculators.jl)
-
-1. Change struct to store `ps` and `st`:
-   ```julia
-   mutable struct WrappedSiteCalculator{M, PS, ST}
-      model::M
-      ps::PS
-      st::ST
-      rcut::Float64
-   end
-   ```
-
-2. Update `_wrapped_energy`, `_wrapped_forces`, `_wrapped_virial` to call ETACE interface directly
-
-3. Add cutoff extraction helpers:
-   ```julia
-   _model_cutoff(::ETOneBody, ps, st) = 0.0
-   _model_cutoff(model::ETPairModel, ps, st) = ...  # extract from rembed
-   _model_cutoff(model::ETACE, ps, st) = ...  # extract from rembed
-   ```
-
-#### 6.2 Remove Redundant Code
-
-1. **Delete `E0Model`** - replaced by upstream `ETOneBody`
-2. **Delete `WrappedETACE`** - functionality merged into `WrappedSiteCalculator`
-3. **Remove old SiteEnergyModel interface** - use ETACE interface directly
-
-#### 6.3 Update `ETACEPotential` Type Alias
-
-```julia
-const ETACEPotential{MOD<:ETACE, PS, ST} = WrappedSiteCalculator{MOD, PS, ST}
-
-function ETACEPotential(model::ETACE, ps, st, rcut::Real)
-   return WrappedSiteCalculator(model, ps, st, Float64(rcut))
-end
-```
-
-#### 6.4 Full Model Conversion Function
+## Test Status
 
-```julia
-"""
-    convert2et_full(model::ACEModel, ps, st; rng=Random.default_rng()) -> StackedCalculator
-
-Convert a complete ACE model (E0 + Pair + Many-body) to an ETACE calculator.
-Returns a StackedCalculator combining ETOneBody, ETPairModel, and ETACE.
-"""
-function convert2et_full(model, ps, st; rng=Random.default_rng())
-   rcut = maximum(a.rcut for a in model.pairbasis.rin0cuts)
-
-   # 1. Convert E0/Vref to ETOneBody
-   E0s = model.Vref.E0  # Dict{Int, Float64}
-   zlist = ChemicalSpecies.(model.rbasis._i2z)
-   E0_dict = Dict(z => E0s[z.number] for z in zlist)
-   et_onebody = one_body(E0_dict, x -> x.z)
-   _, onebody_st = Lux.setup(rng, et_onebody)
-   onebody_calc = WrappedSiteCalculator(et_onebody, nothing, onebody_st, 3.0)
-
-   # 2. Convert pair potential to ETPairModel
-   et_pair = convertpair(model)
-   et_pair_ps, et_pair_st = Lux.setup(rng, et_pair)
-   _copy_pair_params!(et_pair_ps, ps, model)
-   pair_calc = WrappedSiteCalculator(et_pair, et_pair_ps, et_pair_st, rcut)
-
-   # 3. Convert many-body to ETACE
-   et_ace = convert2et(model)
-   et_ace_ps, et_ace_st = Lux.setup(rng, et_ace)
-   _copy_ace_params!(et_ace_ps, ps, model)
-   ace_calc = WrappedSiteCalculator(et_ace, et_ace_ps, et_ace_st, rcut)
-
-   # 4. Stack all components
-   return StackedCalculator((onebody_calc, pair_calc, ace_calc))
-end
-```
+All tests pass: **945 passed, 1 broken** (known Julia 1.12 hash ordering issue)
 
-#### 6.5 Parameter Copying Utilities
+```bash
+# Run ET model tests
+julia --project=test -e 'using Pkg; Pkg.test("ACEpotentials"; test_args=["etmodels"])'
 
-From `test/etmodels/test_etpair.jl`, pair parameter copying for multi-species:
-```julia
-function _copy_pair_params!(et_ps, ps, model)
-   NZ = length(model.rbasis._i2z)
-   for i in 1:NZ, j in 1:NZ
-      idx = (i-1)*NZ + j
-      et_ps.rembed.rbasis.post.W[:, :, idx] = ps.pairbasis.Wnlq[:, :, i, j]
-   end
-   for s in 1:NZ
-      et_ps.readout.W[1, :, s] .= ps.Wpair[:, s]
-   end
-end
+# Run GPU benchmark
+julia --project=test benchmark/gpu_benchmark.jl
 ```
 
-#### 6.6 Update Tests
-
-1. Update `test/et_models/test_et_calculators.jl`:
-   - Remove `E0Model` tests
-   - Add `ETOneBody` integration tests
-   - Update `WrappedSiteCalculator` tests for new signature
-
-2. Update `test/et_models/test_et_silicon.jl`:
-   - Use `ETOneBody` instead of `E0Model` if testing E0
-
-#### 6.7 Training Assembly Updates
-
-1. Extend `energy_forces_virial_basis` to work with unified `WrappedSiteCalculator`:
-   - Detect model type and call appropriate `site_basis_jacobian`
-   - Works with `ETACE`, `ETPairModel` (both have `site_basis_jacobian`)
-   - `ETOneBody` returns empty basis (no learnable params)
-
-2. Update `length_basis`, `get_linear_parameters`, `set_linear_parameters!`
-
-### Future Enhancements
-
-- GPU forces benchmark (requires GPU gradient support in ET)
-- ACEfit.assemble dispatch integration for full models
-- Committee support for combined calculators
-- Training assembly for pair model (similar structure to many-body)
-
 ---
 
 ## Notes
 
 - Virial formula: `V = -∑ ∂E/∂𝐫ij ⊗ 𝐫ij`
-- GPU time nearly constant regardless of system size (~0.5ms)
-- Forces speedup (8-11x) larger than energy speedup (1.5-2.5x) on CPU
+- GPU time scales sub-linearly with system size
+- Forces speedup (CPU) larger than energy speedup due to Zygote AD efficiency
 - StackedCalculator uses @generated functions for zero-overhead composition
-- Upstream `ETOneBody` stores E0s in state (`st.E0s`) for float type flexibility (Float32/Float64)
-- All upstream models use `VState` for gradients in `site_grads()` return value
-- `site_grads` returns edge gradients as `∂G` with `.edge_data` field containing `VState` objects
+- Upstream `ETOneBody` stores E0s in state (`st.E0s`) for float type flexibility
+- All models use `VState` for edge gradients in `site_grads()` return

From e609e2408c5fa5e49f89cf9190055fd2a29bd200 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Wed, 31 Dec 2025 16:34:57 +0000
Subject: [PATCH 27/35] Add training assembly support for ETPairModel and
 ACEfit integration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add ETPairPotential and ETOneBodyPotential type aliases
- Implement length_basis, energy_forces_virial_basis, potential_energy_basis
  for ETPairPotential, ETOneBodyPotential, and StackedCalculator
- Add get/set_linear_parameters for all calculator types
- Add ACEfit.basis_size dispatch for all calculator types
- Import and extend length_basis, energy_forces_virial_basis from Models
- ACEfit.assemble now works with full ETACE StackedCalculator

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 docs/plans/et_calculators_plan.md |  30 ++--
 src/et_models/et_calculators.jl   | 273 ++++++++++++++++++++++++++++++
 src/et_models/stackedcalc.jl      | 102 +++++++++++
 src/models/models.jl              |   4 +-
 4 files changed, 396 insertions(+), 13 deletions(-)

diff --git a/docs/plans/et_calculators_plan.md b/docs/plans/et_calculators_plan.md
index a45dd61f7..db7c4bda4 100644
--- a/docs/plans/et_calculators_plan.md
+++ b/docs/plans/et_calculators_plan.md
@@ -156,18 +156,24 @@ energy_forces_virial_basis(sys, calc)        # Full EFV design row
 
 ## Outstanding Work
 
-### 1. Training Assembly for Pair Model
-**Priority**: Medium
-**Description**: `ETPairModel` has `site_basis_jacobian` but isn't integrated into training assembly. Currently only ETACE (many-body) supports `energy_forces_virial_basis`.
-
-**Implementation**:
-- Extend `energy_forces_virial_basis` to detect model type
-- Call `site_basis_jacobian` on ETPairModel
-- ETOneBody returns empty basis (no learnable params)
-
-### 2. ACEfit.assemble Dispatch Integration
-**Priority**: Medium
-**Description**: Add dispatch for `ACEfit.assemble` to work with full ETACE models.
+### ~~1. Training Assembly for Pair Model~~ ✅ Complete
+**Status**: Implemented in `et_calculators.jl` and `stackedcalc.jl`
+
+**What was done**:
+- Added `ETPairPotential` type alias with full training assembly support
+- Added `ETOneBodyPotential` type alias (returns empty arrays - no learnable params)
+- Implemented `length_basis`, `energy_forces_virial_basis`, `potential_energy_basis`, `get_linear_parameters`, `set_linear_parameters!` for all calculator types
+- Extended `StackedCalculator` to concatenate basis functions from all components
+- Added `ACEfit.basis_size` dispatch for all calculator types
+
+### ~~2. ACEfit.assemble Dispatch Integration~~ ✅ Complete
+**Status**: Works out-of-the-box after extending `length_basis` and `energy_forces_virial_basis`
+
+**What was done**:
+- Added empty function declarations in `models/models.jl` for `length_basis`, `energy_forces_virial_basis`, `potential_energy_basis`
+- ETModels now imports and extends these functions
+- `ACEfit.feature_matrix(d::AtomsData, calc)` works with ETACE calculators
+- `ACEfit.assemble(data, calc)` works with `StackedCalculator`
 
 ### 3. Committee Support
 **Priority**: Low
diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
index 4550407e0..9fc30ad3f 100644
--- a/src/et_models/et_calculators.jl
+++ b/src/et_models/et_calculators.jl
@@ -21,6 +21,9 @@ using StaticArrays
 using Unitful
 using LinearAlgebra: norm
 
+# Import from parent Models module to extend these functions
+import ..Models: length_basis, energy_forces_virial_basis, potential_energy_basis
+
 
 # ============================================================================
 #  WrappedSiteCalculator - Unified wrapper for ETACE-pattern models
@@ -386,6 +389,276 @@ function set_linear_parameters!(calc::ETACEPotential, θ::AbstractVector)
 end
 
 
+# ============================================================================
+#  ETPairPotential - Type alias for WrappedSiteCalculator{ETPairModel}
+# ============================================================================
+
+"""
+    ETPairPotential
+
+AtomsCalculators-compatible calculator wrapping an ETPairModel.
+This is a type alias for `WrappedSiteCalculator{<:ETPairModel, PS, ST}`.
+
+Supports training assembly functions:
+- `length_basis(calc)` - Total linear parameters
+- `energy_forces_virial_basis(sys, calc)` - Full EFV design row
+- `potential_energy_basis(sys, calc)` - Energy design row
+- `get_linear_parameters(calc)` / `set_linear_parameters!(calc, θ)`
+
+# Example
+```julia
+et_pair = convertpair(model)
+ps, st = Lux.setup(rng, et_pair)
+calc = ETPairPotential(et_pair, ps, st, 5.5)
+E = potential_energy(sys, calc)
+```
+"""
+const ETPairPotential{MOD<:ETPairModel, PS, ST} = WrappedSiteCalculator{MOD, PS, ST}
+
+function ETPairPotential(model::ETPairModel, ps, st, rcut::Real)
+   return WrappedSiteCalculator(model, ps, st, Float64(rcut))
+end
+
+# ============================================================================
+#  ETPairPotential Training Assembly
+# ============================================================================
+
+# Accessor helpers
+_pair(calc::ETPairPotential) = calc.model
+_ps(calc::ETPairPotential) = calc.ps
+_st(calc::ETPairPotential) = calc.st
+
+"""
+    length_basis(calc::ETPairPotential)
+
+Return the number of linear parameters in the pair model (nbasis * nspecies).
+"""
+function length_basis(calc::ETPairPotential)
+   pair = _pair(calc)
+   nbasis = pair.readout.in_dim
+   nspecies = pair.readout.ncat
+   return nbasis * nspecies
+end
+
+ACEfit.basis_size(calc::ETPairPotential) = length_basis(calc)
+
+"""
+    energy_forces_virial_basis(sys::AbstractSystem, calc::ETPairPotential)
+
+Compute the basis functions for energy, forces, and virial for pair potential.
+"""
+function energy_forces_virial_basis(sys::AbstractSystem, calc::ETPairPotential)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   pair = _pair(calc)
+
+   # Get basis and jacobian
+   𝔹, ∂𝔹 = site_basis_jacobian(pair, G, _ps(calc), _st(calc))
+
+   natoms = length(sys)
+   nnodes = size(𝔹, 1)
+   nbasis = pair.readout.in_dim
+   nspecies = pair.readout.ncat
+   nparams = nbasis * nspecies
+   maxneigs = size(∂𝔹, 1)
+
+   # Species indices for each node
+   iZ = pair.readout.selector.(G.node_data)
+
+   # Initialize outputs
+   E_basis = zeros(nparams)
+   F_basis = zeros(SVector{3, Float64}, natoms, nparams)
+   V_basis = zeros(SMatrix{3, 3, Float64, 9}, nparams)
+
+   # Pre-allocate work buffer
+   ∇Ei_buf = similar(∂𝔹, maxneigs, nnodes)
+   zero_grad = zero(∂𝔹[1, 1, 1])
+   edge_𝐫 = [edge.𝐫 for edge in G.edge_data]
+
+   # Compute basis values for each parameter (k, s) pair
+   for s in 1:nspecies
+      for k in 1:nbasis
+         p = (s - 1) * nbasis + k
+
+         # Energy basis
+         for i in 1:nnodes
+            if iZ[i] == s
+               E_basis[p] += 𝔹[i, k]
+            end
+         end
+
+         # Fill gradient buffer
+         for i in 1:nnodes
+            if iZ[i] == s
+               @views ∇Ei_buf[:, i] .= ∂𝔹[:, i, k]
+            else
+               @views ∇Ei_buf[:, i] .= Ref(zero_grad)
+            end
+         end
+
+         # Convert to edge format and compute forces/virial
+         ∇Ei_3d = reshape(∇Ei_buf, maxneigs, nnodes, 1)
+         ∇E_edges = ET.rev_reshape_embedding(∇Ei_3d, G)[:]
+         F_basis[:, p] = -ET.Atoms.forces_from_edge_grads(sys, G, ∇E_edges)
+
+         V = zero(SMatrix{3, 3, Float64, 9})
+         for (e, ∂edge) in enumerate(∇E_edges)
+            V -= ∂edge.𝐫 * edge_𝐫[e]'
+         end
+         V_basis[p] = V
+      end
+   end
+
+   return (
+      energy = E_basis * u"eV",
+      forces = F_basis .* u"eV/Å",
+      virial = V_basis * u"eV"
+   )
+end
+
+"""
+    potential_energy_basis(sys::AbstractSystem, calc::ETPairPotential)
+
+Compute only the energy basis for pair potential.
+"""
+function potential_energy_basis(sys::AbstractSystem, calc::ETPairPotential)
+   G = ET.Atoms.interaction_graph(sys, calc.rcut * u"Å")
+   pair = _pair(calc)
+
+   𝔹 = site_basis(pair, G, _ps(calc), _st(calc))
+
+   nbasis = pair.readout.in_dim
+   nspecies = pair.readout.ncat
+   nparams = nbasis * nspecies
+
+   iZ = pair.readout.selector.(G.node_data)
+
+   E_basis = zeros(nparams)
+   for s in 1:nspecies
+      for k in 1:nbasis
+         p = (s - 1) * nbasis + k
+         for i in 1:length(G.node_data)
+            if iZ[i] == s
+               E_basis[p] += 𝔹[i, k]
+            end
+         end
+      end
+   end
+
+   return E_basis * u"eV"
+end
+
+"""
+    get_linear_parameters(calc::ETPairPotential)
+
+Extract the linear parameters (readout weights) as a flat vector.
+"""
+function get_linear_parameters(calc::ETPairPotential)
+   return vec(_ps(calc).readout.W)
+end
+
+"""
+    set_linear_parameters!(calc::ETPairPotential, θ::AbstractVector)
+
+Set the linear parameters (readout weights) from a flat vector.
+"""
+function set_linear_parameters!(calc::ETPairPotential, θ::AbstractVector)
+   pair = _pair(calc)
+   nbasis = pair.readout.in_dim
+   nspecies = pair.readout.ncat
+   @assert length(θ) == nbasis * nspecies
+
+   ps = _ps(calc)
+   new_W = reshape(θ, 1, nbasis, nspecies)
+   calc.ps = merge(ps, (readout = merge(ps.readout, (W = new_W,)),))
+   return calc
+end
+
+
+# ============================================================================
+#  ETOneBodyPotential - Type alias for WrappedSiteCalculator{ETOneBody}
+# ============================================================================
+
+"""
+    ETOneBodyPotential
+
+AtomsCalculators-compatible calculator wrapping an ETOneBody model.
+This is a type alias for `WrappedSiteCalculator{<:ETOneBody, PS, ST}`.
+
+ETOneBody has no learnable parameters, so training assembly returns empty results:
+- `length_basis(calc)` returns 0
+- `energy_forces_virial_basis(sys, calc)` returns empty arrays
+- Forces and virial are always zero (energy only depends on atom types)
+
+# Example
+```julia
+et_onebody = one_body(Dict(:Si => -0.846), x -> x.z)
+_, st = Lux.setup(rng, et_onebody)
+calc = ETOneBodyPotential(et_onebody, nothing, st, 3.0)
+E = potential_energy(sys, calc)
+```
+"""
+const ETOneBodyPotential{MOD<:ETOneBody, PS, ST} = WrappedSiteCalculator{MOD, PS, ST}
+
+function ETOneBodyPotential(model::ETOneBody, ps, st, rcut::Real)
+   return WrappedSiteCalculator(model, ps, st, Float64(rcut))
+end
+
+# ============================================================================
+#  ETOneBodyPotential Training Assembly (empty - no learnable parameters)
+# ============================================================================
+
+_onebody(calc::ETOneBodyPotential) = calc.model
+_ps(calc::ETOneBodyPotential) = calc.ps
+_st(calc::ETOneBodyPotential) = calc.st
+
+"""
+    length_basis(calc::ETOneBodyPotential)
+
+Return 0 - ETOneBody has no learnable linear parameters.
+"""
+length_basis(calc::ETOneBodyPotential) = 0
+
+ACEfit.basis_size(calc::ETOneBodyPotential) = 0
+
+"""
+    energy_forces_virial_basis(sys::AbstractSystem, calc::ETOneBodyPotential)
+
+Return empty arrays - ETOneBody has no learnable parameters.
+"""
+function energy_forces_virial_basis(sys::AbstractSystem, calc::ETOneBodyPotential)
+   natoms = length(sys)
+   return (
+      energy = zeros(0) * u"eV",
+      forces = zeros(SVector{3, Float64}, natoms, 0) .* u"eV/Å",
+      virial = zeros(SMatrix{3, 3, Float64, 9}, 0) * u"eV"
+   )
+end
+
+"""
+    potential_energy_basis(sys::AbstractSystem, calc::ETOneBodyPotential)
+
+Return empty array - ETOneBody has no learnable parameters.
+"""
+potential_energy_basis(sys::AbstractSystem, calc::ETOneBodyPotential) = zeros(0) * u"eV"
+
+"""
+    get_linear_parameters(calc::ETOneBodyPotential)
+
+Return empty vector - ETOneBody has no learnable parameters.
+"""
+get_linear_parameters(calc::ETOneBodyPotential) = Float64[]
+
+"""
+    set_linear_parameters!(calc::ETOneBodyPotential, θ::AbstractVector)
+
+No-op for ETOneBody (no learnable parameters).
+"""
+function set_linear_parameters!(calc::ETOneBodyPotential, θ::AbstractVector)
+   @assert length(θ) == 0 "ETOneBody has no learnable parameters"
+   return calc
+end
+
+
 # ============================================================================
 #  Full Model Conversion
 # ============================================================================
diff --git a/src/et_models/stackedcalc.jl b/src/et_models/stackedcalc.jl
index ab73186d9..cdcb737b4 100644
--- a/src/et_models/stackedcalc.jl
+++ b/src/et_models/stackedcalc.jl
@@ -115,3 +115,105 @@ function AtomsCalculators.energy_forces_virial(
       sys::AbstractSystem, calc::StackedCalculator; kwargs...)
    return _stacked_efv(sys, calc)
 end
+
+# ============================================================================
+#  Training Assembly Interface for StackedCalculator
+# ============================================================================
+
+import ACEfit
+
+"""
+    length_basis(calc::StackedCalculator)
+
+Return total number of linear parameters across all stacked calculators.
+"""
+function length_basis(calc::StackedCalculator)
+   return sum(length_basis(c) for c in calc.calcs)
+end
+
+ACEfit.basis_size(calc::StackedCalculator) = length_basis(calc)
+
+"""
+    energy_forces_virial_basis(sys::AbstractSystem, calc::StackedCalculator)
+
+Compute concatenated basis for all stacked calculators.
+"""
+function energy_forces_virial_basis(sys::AbstractSystem, calc::StackedCalculator)
+   # Collect basis from each calculator
+   results = [energy_forces_virial_basis(sys, c) for c in calc.calcs]
+
+   natoms = length(sys)
+
+   # Concatenate results - energy is Vector of Quantity{Float64}
+   E_basis = vcat([_strip_energy_units(r.energy) for r in results]...)
+
+   # For forces, need to hcat the matrices
+   # Strip units element by element for matrices of SVectors with units
+   F_parts = [_strip_force_units(r.forces) for r in results]
+   F_basis = isempty(F_parts) ? zeros(SVector{3, Float64}, natoms, 0) : hcat(F_parts...)
+
+   # Virial is Vector of SMatrix with units
+   V_basis = vcat([_strip_virial_units(r.virial) for r in results]...)
+
+   return (
+      energy = E_basis * u"eV",
+      forces = F_basis .* u"eV/Å",
+      virial = V_basis * u"eV"
+   )
+end
+
+# Helper to strip units from energy (Vector of Quantity{Float64})
+function _strip_energy_units(E)
+   return map(e -> ustrip(e), E)
+end
+
+# Helper to strip units from force matrices (Matrix of SVector with units)
+function _strip_force_units(F)
+   # F is Matrix{SVector{3, Quantity}}
+   # We need to strip the units from the inner SVectors
+   return map(f -> SVector{3, Float64}(ustrip.(f)), F)
+end
+
+# Helper to strip units from virial (Vector of SMatrix with units)
+function _strip_virial_units(V)
+   # V is Vector{SMatrix{3,3, Quantity}}
+   return map(v -> SMatrix{3, 3, Float64, 9}(ustrip.(v)), V)
+end
+
+"""
+    potential_energy_basis(sys::AbstractSystem, calc::StackedCalculator)
+
+Compute concatenated energy basis for all stacked calculators.
+"""
+function potential_energy_basis(sys::AbstractSystem, calc::StackedCalculator)
+   results = [potential_energy_basis(sys, c) for c in calc.calcs]
+   E_basis = vcat([ustrip.(u"eV", r) for r in results]...)
+   return E_basis * u"eV"
+end
+
+"""
+    get_linear_parameters(calc::StackedCalculator)
+
+Get concatenated linear parameters from all stacked calculators.
+"""
+function get_linear_parameters(calc::StackedCalculator)
+   return vcat([get_linear_parameters(c) for c in calc.calcs]...)
+end
+
+"""
+    set_linear_parameters!(calc::StackedCalculator, θ::AbstractVector)
+
+Set linear parameters for all stacked calculators from concatenated vector.
+"""
+function set_linear_parameters!(calc::StackedCalculator, θ::AbstractVector)
+   offset = 0
+   for c in calc.calcs
+      n = length_basis(c)
+      if n > 0
+         set_linear_parameters!(c, θ[offset+1:offset+n])
+      end
+      offset += n
+   end
+   @assert offset == length(θ) "Parameter count mismatch"
+   return calc
+end
diff --git a/src/models/models.jl b/src/models/models.jl
index 05527efb9..5bbeb88e2 100644
--- a/src/models/models.jl
+++ b/src/models/models.jl
@@ -18,7 +18,9 @@ import LuxCore: AbstractLuxLayer,
                initialparameters,
                initialstates      
                
-function length_basis end 
+function length_basis end
+function energy_forces_virial_basis end
+function potential_energy_basis end
 
 include("elements.jl")
 

From e1474968159e1bf83ed125ea0f0036d21e79af37 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Wed, 31 Dec 2025 17:24:20 +0000
Subject: [PATCH 28/35] Add comprehensive tests for training assembly of
 ETPairPotential, ETOneBodyPotential, StackedCalculator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tests cover:
- ETOneBodyPotential returns empty arrays (0 learnable parameters)
- ETPairPotential training assembly with learnable pair basis
- StackedCalculator concatenation of basis from all components
- Linear combinations reproduce energy/forces/virial
- get/set_linear_parameters round-trip
- ACEfit.basis_size dispatch

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 test/et_models/test_et_calculators.jl | 196 ++++++++++++++++++++++++++
 1 file changed, 196 insertions(+)

diff --git a/test/et_models/test_et_calculators.jl b/test/et_models/test_et_calculators.jl
index 3ad26c9e8..710a6020f 100644
--- a/test/et_models/test_et_calculators.jl
+++ b/test/et_models/test_et_calculators.jl
@@ -739,3 +739,199 @@ println("Species-specific basis contributions: OK")
 ##
 
 @info("All Phase 5b extended tests passed!")
+
+## ============================================================================
+##  Phase 5c: Training Assembly for ETPairPotential, ETOneBodyPotential, StackedCalculator
+## ============================================================================
+
+@info("Testing Phase 5c: Training assembly for pair, onebody, and stacked calculators")
+
+##
+
+@info("Testing ETOneBodyPotential training assembly (empty - no learnable params)")
+
+# Create ETOneBody calculator
+E0s = model.Vref.E0
+zlist = ChemicalSpecies.(model.rbasis._i2z)
+E0_dict = Dict(z => E0s[z.atomic_number] for z in zlist)
+et_onebody = ETM.one_body(E0_dict, x -> x.z)
+_, onebody_st = Lux.setup(rng, et_onebody)
+onebody_calc = ETM.WrappedSiteCalculator(et_onebody, nothing, onebody_st, 3.0)
+
+# Test length_basis returns 0
+@test ETM.length_basis(onebody_calc) == 0
+println("ETOneBodyPotential length_basis: OK (0 parameters)")
+
+# Test energy_forces_virial_basis returns empty arrays
+sys = rand_struct()
+efv_onebody = ETM.energy_forces_virial_basis(sys, onebody_calc)
+@test length(efv_onebody.energy) == 0
+@test size(efv_onebody.forces, 2) == 0
+@test length(efv_onebody.virial) == 0
+println("ETOneBodyPotential energy_forces_virial_basis: OK (empty arrays)")
+
+# Test get/set_linear_parameters
+@test length(ETM.get_linear_parameters(onebody_calc)) == 0
+ETM.set_linear_parameters!(onebody_calc, Float64[])  # Should not error
+println("ETOneBodyPotential get/set_linear_parameters: OK")
+
+# Test ACEfit.basis_size
+@test ACEfit.basis_size(onebody_calc) == 0
+println("ETOneBodyPotential ACEfit.basis_size: OK")
+
+##
+
+@info("Testing ETPairPotential training assembly")
+
+# Need a model with learnable pair basis for this test
+# Create a new model with pair_learnable=true
+elements_pair = (:Si, :O)
+level_pair = M.TotalDegree()
+max_level_pair = 10
+order_pair = 3
+maxl_pair = 4
+
+rin0cuts_pair = M._default_rin0cuts(elements_pair)
+rin0cuts_pair = (x -> (rin = x.rin, r0 = x.r0, rcut = 5.5)).(rin0cuts_pair)
+
+model_pair = M.ace_model(; elements = elements_pair, order = order_pair,
+            Ytype = :solid, level = level_pair, max_level = max_level_pair,
+            maxl = maxl_pair, pair_maxn = max_level_pair,
+            rin0cuts = rin0cuts_pair,
+            pair_learnable = true,
+            init_WB = :glorot_normal, init_Wpair = :glorot_normal)
+ps_pair, st_pair = Lux.setup(rng, model_pair)
+
+# Convert pair potential
+et_pair = ETM.convertpair(model_pair)
+et_pair_ps, et_pair_st = Lux.setup(rng, et_pair)
+
+# Copy pair parameters
+NZ_pair = length(model_pair.pairbasis._i2z)
+for i in 1:NZ_pair, j in 1:NZ_pair
+   idx = (i-1)*NZ_pair + j
+   et_pair_ps.rembed.rbasis.post.W[:, :, idx] .= ps_pair.pairbasis.Wnlq[:, :, i, j]
+end
+for s in 1:NZ_pair
+   et_pair_ps.readout.W[1, :, s] .= ps_pair.Wpair[:, s]
+end
+
+rcut_pair = maximum(a.rcut for a in model_pair.pairbasis.rin0cuts)
+pair_calc = ETM.ETPairPotential(et_pair, et_pair_ps, et_pair_st, rcut_pair)
+
+# Test length_basis
+pair_nbasis = et_pair.readout.in_dim
+pair_nspecies = et_pair.readout.ncat
+@test ETM.length_basis(pair_calc) == pair_nbasis * pair_nspecies
+println("ETPairPotential length_basis: OK ($(pair_nbasis * pair_nspecies) parameters)")
+
+# Test energy_forces_virial_basis
+sys_pair = rand_struct()  # Uses Si/O system from earlier
+efv_pair = ETM.energy_forces_virial_basis(sys_pair, pair_calc)
+natoms_pair = length(sys_pair)
+nparams_pair = ETM.length_basis(pair_calc)
+
+@test length(efv_pair.energy) == nparams_pair
+@test size(efv_pair.forces) == (natoms_pair, nparams_pair)
+@test length(efv_pair.virial) == nparams_pair
+println("ETPairPotential energy_forces_virial_basis shapes: OK")
+
+# Test linear combination gives correct energy
+θ_pair = ETM.get_linear_parameters(pair_calc)
+E_from_pair_basis = dot(ustrip.(efv_pair.energy), θ_pair)
+E_pair_direct = ustrip(u"eV", AtomsCalculators.potential_energy(sys_pair, pair_calc))
+print_tf(@test E_from_pair_basis ≈ E_pair_direct rtol=1e-10)
+println()
+println("ETPairPotential energy from basis: OK")
+
+# Test get/set round-trip
+θ_pair_test = randn(nparams_pair)
+ETM.set_linear_parameters!(pair_calc, θ_pair_test)
+@test ETM.get_linear_parameters(pair_calc) ≈ θ_pair_test
+ETM.set_linear_parameters!(pair_calc, θ_pair)  # Restore
+println("ETPairPotential get/set_linear_parameters: OK")
+
+# Test ACEfit.basis_size
+@test ACEfit.basis_size(pair_calc) == nparams_pair
+println("ETPairPotential ACEfit.basis_size: OK")
+
+##
+
+@info("Testing StackedCalculator training assembly")
+
+# Create a StackedCalculator with E0 + Pair + ManyBody (using convert2et_full)
+stacked_calc = ETM.convert2et_full(model_pair, ps_pair, st_pair)
+
+# Verify structure: 3 components (ETOneBody, ETPairModel, ETACE)
+@test length(stacked_calc.calcs) == 3
+println("StackedCalculator has $(length(stacked_calc.calcs)) components")
+
+# Test length_basis is sum of components
+n_onebody = ETM.length_basis(stacked_calc.calcs[1])
+n_pair = ETM.length_basis(stacked_calc.calcs[2])
+n_ace = ETM.length_basis(stacked_calc.calcs[3])
+n_total = ETM.length_basis(stacked_calc)
+
+@test n_onebody == 0  # ETOneBody has no learnable params
+@test n_pair > 0
+@test n_ace > 0
+@test n_total == n_onebody + n_pair + n_ace
+println("StackedCalculator length_basis: OK (0 + $n_pair + $n_ace = $n_total)")
+
+# Test energy_forces_virial_basis
+sys_stacked = rand_struct()
+efv_stacked = ETM.energy_forces_virial_basis(sys_stacked, stacked_calc)
+natoms_stacked = length(sys_stacked)
+
+@test length(efv_stacked.energy) == n_total
+@test size(efv_stacked.forces) == (natoms_stacked, n_total)
+@test length(efv_stacked.virial) == n_total
+println("StackedCalculator energy_forces_virial_basis shapes: OK")
+
+# Test linear combination gives correct energy
+θ_stacked = ETM.get_linear_parameters(stacked_calc)
+@test length(θ_stacked) == n_total
+E_from_stacked_basis = dot(ustrip.(efv_stacked.energy), θ_stacked)
+E_stacked_direct = ustrip(u"eV", AtomsCalculators.potential_energy(sys_stacked, stacked_calc))
+print_tf(@test E_from_stacked_basis ≈ E_stacked_direct rtol=1e-10)
+println()
+println("StackedCalculator energy from basis: OK")
+
+# Test linear combination gives correct forces
+F_from_stacked_basis = efv_stacked.forces * θ_stacked
+F_stacked_direct = AtomsCalculators.forces(sys_stacked, stacked_calc)
+max_diff_stacked_F = maximum(norm(ustrip.(f1) - ustrip.(f2)) for (f1, f2) in zip(F_from_stacked_basis, F_stacked_direct))
+print_tf(@test max_diff_stacked_F < 1e-10)
+println()
+println("StackedCalculator forces from basis: OK (max_diff = $max_diff_stacked_F)")
+
+# Test linear combination gives correct virial
+V_from_stacked_basis = sum(θ_stacked[k] * ustrip.(efv_stacked.virial[k]) for k in 1:n_total)
+V_stacked_direct = ustrip.(AtomsCalculators.virial(sys_stacked, stacked_calc))
+virial_diff_stacked = maximum(abs.(V_from_stacked_basis - V_stacked_direct))
+print_tf(@test virial_diff_stacked < 1e-10)
+println()
+println("StackedCalculator virial from basis: OK (max_diff = $virial_diff_stacked)")
+
+# Test get/set_linear_parameters round-trip
+θ_stacked_orig = copy(θ_stacked)
+θ_stacked_test = randn(n_total)
+ETM.set_linear_parameters!(stacked_calc, θ_stacked_test)
+θ_stacked_check = ETM.get_linear_parameters(stacked_calc)
+@test θ_stacked_check ≈ θ_stacked_test
+ETM.set_linear_parameters!(stacked_calc, θ_stacked_orig)  # Restore
+println("StackedCalculator get/set_linear_parameters: OK")
+
+# Test potential_energy_basis consistency
+E_basis_stacked = ETM.potential_energy_basis(sys_stacked, stacked_calc)
+@test length(E_basis_stacked) == n_total
+@test ustrip.(E_basis_stacked) ≈ ustrip.(efv_stacked.energy) rtol=1e-10
+println("StackedCalculator potential_energy_basis consistency: OK")
+
+# Test ACEfit.basis_size
+@test ACEfit.basis_size(stacked_calc) == n_total
+println("StackedCalculator ACEfit.basis_size: OK")
+
+##
+
+@info("All Phase 5c tests passed!")

From 86f5856a4a8d1eb20c3ee35ba2400bb4f677c9bc Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Thu, 1 Jan 2026 19:00:52 +0000
Subject: [PATCH 29/35] Address PR #313 feedback: ET 0.4.3 compat and
 site_grads type stability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Update EquivariantTensors compat to 0.4.3 in Project.toml and test/Project.toml
- Fix ETOneBody site_grads to return fill(VState(), length(X.edge_data))
  instead of similar(X.edge_data, 0) for type stability
- Empty VState() acts as additive identity when summed with other VStates
- Update test to verify new behavior

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 Project.toml                    | 2 +-
 src/et_models/onebody.jl        | 6 +++---
 test/Project.toml               | 2 +-
 test/etmodels/test_etonebody.jl | 6 ++++--
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/Project.toml b/Project.toml
index ae0540ac0..77d88e76c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -65,7 +65,7 @@ ConcreteStructs = "0.2.3"
 DecoratedParticles = "0.1.3"
 DynamicPolynomials = "0.6"
 EmpiricalPotentials = "0.2"
-EquivariantTensors = "0.4.2"
+EquivariantTensors = "0.4.3"
 ExtXYZ = "0.2.0"
 Folds = "0.2"
 ForwardDiff = "0.10, 1"
diff --git a/src/et_models/onebody.jl b/src/et_models/onebody.jl
index 32dc8868c..ceb2dd965 100644
--- a/src/et_models/onebody.jl
+++ b/src/et_models/onebody.jl
@@ -60,10 +60,10 @@ ___apply_onebody(selector, X::AbstractVector, E0s) =
 
 # ETOneBody energy only depends on atom types (categorical), not positions.
 # Gradient w.r.t. positions is always zero.
-# Return NamedTuple matching Zygote gradient structure with empty edge_data.
-# The calling code checks isempty(∂G.edge_data) and returns zero forces/virial.
+# Return vector of empty VState() which acts as additive identity:
+#   VState(r = SA[1,2,3]) + VState() == VState(r = SA[1,2,3])
 function site_grads(l::ETOneBody, X::ET.ETGraph, ps, st)
-   return (; edge_data = similar(X.edge_data, 0))
+   return (; edge_data = fill(VState(), length(X.edge_data)))
 end
 
 site_basis(l::ETOneBody, X::ET.ETGraph, ps, st) = 
diff --git a/test/Project.toml b/test/Project.toml
index 644dbebff..5113a70ff 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -38,5 +38,5 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 ACEpotentials = {path = ".."}
 
 [compat]
-EquivariantTensors = "0.4.2"
+EquivariantTensors = "0.4.3"
 StaticArrays = "1"
diff --git a/test/etmodels/test_etonebody.jl b/test/etmodels/test_etonebody.jl
index c891b5c69..527937aa5 100644
--- a/test/etmodels/test_etonebody.jl
+++ b/test/etmodels/test_etonebody.jl
@@ -106,10 +106,12 @@ sys = rand_struct()
 G = ET.Atoms.interaction_graph(sys, 5.0 * u"Å")
 ∂G1 = ETM.site_grads(et_V0, G, ps, st)
 
-# ETOneBody returns NamedTuple with empty edge_data (gradient is zero for constant energies)
+# ETOneBody returns NamedTuple with edge_data filled with empty VState() elements
+# Empty VState() acts as additive identity: VState(r=...) + VState() == VState(r=...)
 println_slim(@test ∂G1 isa NamedTuple)
 println_slim(@test haskey(∂G1, :edge_data))
-println_slim(@test isempty(∂G1.edge_data))
+println_slim(@test length(∂G1.edge_data) == length(G.edge_data))
+println_slim(@test all(v -> v == DP.VState(), ∂G1.edge_data))
 
 ##
 

From 1e220428cf8c42b1cedf69135c22f97cb495a5e3 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Fri, 2 Jan 2026 10:38:15 +0000
Subject: [PATCH 30/35] update plan

---
 docs/plans/et_calculators_plan.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/plans/et_calculators_plan.md b/docs/plans/et_calculators_plan.md
index db7c4bda4..8522b0577 100644
--- a/docs/plans/et_calculators_plan.md
+++ b/docs/plans/et_calculators_plan.md
@@ -4,10 +4,12 @@
 
 Create calculator wrappers and training assembly for the new ETACE backend, integrating with EquivariantTensors.jl.
 
-**Status**: ✅ Core implementation complete. GPU acceleration working.
+**Status**: ✅ Core implementation complete. GPU acceleration working. PR #313 under review.
 
 **Branch**: `jrk/etcalculators` (rebased on `acesuit/co/etback` including `co/etpair` merge)
 
+**PR**: https://github.com/ACEsuit/ACEpotentials.jl/pull/313
+
 ---
 
 ## Progress Summary
@@ -198,7 +200,7 @@ Moderator wants discussion before making changes.
 
 ## Dependencies
 
-- EquivariantTensors.jl >= 0.4.2
+- EquivariantTensors.jl >= 0.4.3
 - Polynomials4ML.jl >= 0.5.8 (for GPU forces)
 - LuxCUDA (for GPU support, test dependency)
 
@@ -206,7 +208,7 @@ Moderator wants discussion before making changes.
 
 ## Test Status
 
-All tests pass: **945 passed, 1 broken** (known Julia 1.12 hash ordering issue)
+All tests pass: **946 passed, 1 broken** (known Julia 1.12 hash ordering issue)
 
 ```bash
 # Run ET model tests
@@ -226,3 +228,4 @@ julia --project=test benchmark/gpu_benchmark.jl
 - StackedCalculator uses @generated functions for zero-overhead composition
 - Upstream `ETOneBody` stores E0s in state (`st.E0s`) for float type flexibility
 - All models use `VState` for edge gradients in `site_grads()` return
+- `ETOneBody.site_grads()` returns `fill(VState(), length(edges))` for type stability (empty VState acts as additive identity)

From 91fd433340ede8d8226a8365b51e959bf09b03a6 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Sun, 4 Jan 2026 10:59:46 +0000
Subject: [PATCH 31/35] Address PR #313 review feedback and fix ETOneBody
 issues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit addresses all review comments from PR #313 and fixes
pre-existing test failures in ETOneBody.

**Changes from PR #313 review:**

1. **test/et_models/test_et_calculators.jl**:
   - Removed 44 println status messages that provided misleading output
   - Deleted embedded performance benchmarks (CPU and GPU, ~100 lines)
   - Replaced manual parameter copying with utility function calls
   - Total: 169 lines removed for cleaner, more maintainable tests

2. **src/et_models/et_calculators.jl**:
   - Made parameter copying functions public:
     * _copy_ace_params! → copy_ace_params!
     * _copy_pair_params! → copy_pair_params!
   - These are now part of the public API since used by tests

3. **test/runtests.jl**:
   - Added ET Calculators test to CI suite (line 23)
   - Test now runs automatically with full test suite

**Additional fix - ETOneBody site_grads:**

4. **src/et_models/onebody.jl**:
   - Fixed site_grads to return empty array instead of array of empty VStates
   - Resolves test failure at test_et_calculators.jl:234
   - Resolves FieldError at test_et_calculators.jl:254
   - ETOneBody energy depends only on atom types, not positions,
     so there are no position-dependent gradients

**Test Results:**
- ET Calculators: 182 tests passed (previously 94 passed, 2 failed/errored)
- Overall: 1127 tests passed (up from 1040)
- All PR #313 review items addressed ✓

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 src/et_models/et_calculators.jl       |  12 +-
 src/et_models/onebody.jl              |   5 +-
 test/et_models/test_et_calculators.jl | 169 +-------------------------
 test/etmodels/test_etonebody.jl       |   7 +-
 test/runtests.jl                      |   3 +-
 5 files changed, 17 insertions(+), 179 deletions(-)

diff --git a/src/et_models/et_calculators.jl b/src/et_models/et_calculators.jl
index 9fc30ad3f..ac1c36749 100644
--- a/src/et_models/et_calculators.jl
+++ b/src/et_models/et_calculators.jl
@@ -712,13 +712,13 @@ function convert2et_full(model, ps, st; rng::AbstractRNG=default_rng())
    # 2. Convert pair potential to ETPairModel
    et_pair = convertpair(model)
    et_pair_ps, et_pair_st = setup(rng, et_pair)
-   _copy_pair_params!(et_pair_ps, ps, model)
+   copy_pair_params!(et_pair_ps, ps, model)
    pair_calc = WrappedSiteCalculator(et_pair, et_pair_ps, et_pair_st, rcut)
 
    # 3. Convert many-body to ETACE
    et_ace = convert2et(model)
    et_ace_ps, et_ace_st = setup(rng, et_ace)
-   _copy_ace_params!(et_ace_ps, ps, model)
+   copy_ace_params!(et_ace_ps, ps, model)
    ace_calc = WrappedSiteCalculator(et_ace, et_ace_ps, et_ace_st, rcut)
 
    # 4. Stack all components
@@ -731,11 +731,11 @@ end
 # ============================================================================
 
 """
-    _copy_ace_params!(et_ps, ps, model)
+    copy_ace_params!(et_ps, ps, model)
 
 Copy many-body (ACE) parameters from ACE model format to ETACE format.
 """
-function _copy_ace_params!(et_ps, ps, model)
+function copy_ace_params!(et_ps, ps, model)
    NZ = length(model.rbasis._i2z)
 
    # Copy radial basis parameters (Wnlq)
@@ -757,12 +757,12 @@ end
 
 
 """
-    _copy_pair_params!(et_ps, ps, model)
+    copy_pair_params!(et_ps, ps, model)
 
 Copy pair potential parameters from ACE model format to ETPairModel format.
 Based on parameter mapping from test/etmodels/test_etpair.jl.
 """
-function _copy_pair_params!(et_ps, ps, model)
+function copy_pair_params!(et_ps, ps, model)
    NZ = length(model.pairbasis._i2z)
 
    # Copy pair radial basis parameters
diff --git a/src/et_models/onebody.jl b/src/et_models/onebody.jl
index ceb2dd965..986e4971d 100644
--- a/src/et_models/onebody.jl
+++ b/src/et_models/onebody.jl
@@ -60,10 +60,9 @@ ___apply_onebody(selector, X::AbstractVector, E0s) =
 
 # ETOneBody energy only depends on atom types (categorical), not positions.
 # Gradient w.r.t. positions is always zero.
-# Return vector of empty VState() which acts as additive identity:
-#   VState(r = SA[1,2,3]) + VState() == VState(r = SA[1,2,3])
+# Return empty edge_data array since there are no position-dependent gradients.
 function site_grads(l::ETOneBody, X::ET.ETGraph, ps, st)
-   return (; edge_data = fill(VState(), length(X.edge_data)))
+   return (; edge_data = VState[])
 end
 
 site_basis(l::ETOneBody, X::ET.ETGraph, ps, st) = 
diff --git a/test/et_models/test_et_calculators.jl b/test/et_models/test_et_calculators.jl
index 710a6020f..5b14a5b77 100644
--- a/test/et_models/test_et_calculators.jl
+++ b/test/et_models/test_et_calculators.jl
@@ -52,14 +52,8 @@ end
 et_model = ETM.convert2et(model)
 et_ps, et_st = LuxCore.setup(MersenneTwister(1234), et_model)
 
-# Match parameters
-et_ps.rembed.post.W[:, :, 1] = ps.rbasis.Wnlq[:, :, 1, 1]
-et_ps.rembed.post.W[:, :, 2] = ps.rbasis.Wnlq[:, :, 1, 2]
-et_ps.rembed.post.W[:, :, 3] = ps.rbasis.Wnlq[:, :, 2, 1]
-et_ps.rembed.post.W[:, :, 4] = ps.rbasis.Wnlq[:, :, 2, 2]
-
-et_ps.readout.W[1, :, 1] .= ps.WB[:, 1]
-et_ps.readout.W[1, :, 2] .= ps.WB[:, 2]
+# Match parameters using utility function
+ETM.copy_ace_params!(et_ps, ps, model)
 
 # Get cutoff radius
 rcut = maximum(a.rcut for a in model.pairbasis.rin0cuts)
@@ -84,7 +78,6 @@ et_calc = ETM.ETACEPotential(et_model, et_ps, et_st, rcut)
 @test et_calc.model === et_model
 @test et_calc.rcut == rcut
 @test et_calc.co_ps === nothing
-println("ETACEPotential construction: OK")
 
 ##
 
@@ -176,8 +169,6 @@ V = AtomsCalculators.virial(sys, et_calc)
 @test eltype(F) <: StaticArrays.SVector
 @test V isa StaticArrays.SMatrix
 
-println("AtomsCalculators interface: OK")
-
 ##
 
 @info("Testing combined energy_forces_virial efficiency")
@@ -196,114 +187,11 @@ V = AtomsCalculators.virial(sys, et_calc)
 @test all(ustrip.(efv1.forces) .≈ ustrip.(F))
 @test ustrip.(efv1.virial) ≈ ustrip.(V)
 
-println("Combined evaluation consistency: OK")
-
 ##
 
 @info("Testing cutoff_radius function")
 
 @test ETM.cutoff_radius(et_calc) == rcut * u"Å"
-println("Cutoff radius: OK")
-
-##
-
-@info("Performance comparison: ETACE vs original ACE model")
-
-# Use a fixed test structure for benchmarking
-bench_sys = rand_struct()
-
-# Warm-up runs
-AtomsCalculators.energy_forces_virial(bench_sys, calc_model)
-AtomsCalculators.energy_forces_virial(bench_sys, et_calc)
-
-# Benchmark energy
-t_energy_old = @belapsed AtomsCalculators.potential_energy($bench_sys, $calc_model)
-t_energy_new = @belapsed AtomsCalculators.potential_energy($bench_sys, $et_calc)
-
-# Benchmark forces
-t_forces_old = @belapsed AtomsCalculators.forces($bench_sys, $calc_model)
-t_forces_new = @belapsed AtomsCalculators.forces($bench_sys, $et_calc)
-
-# Benchmark energy_forces_virial
-t_efv_old = @belapsed AtomsCalculators.energy_forces_virial($bench_sys, $calc_model)
-t_efv_new = @belapsed AtomsCalculators.energy_forces_virial($bench_sys, $et_calc)
-
-println("CPU Performance comparison (times in ms):")
-println("  Energy:              ACE = $(round(t_energy_old*1000, digits=3)), ETACE = $(round(t_energy_new*1000, digits=3)), ratio = $(round(t_energy_new/t_energy_old, digits=2))")
-println("  Forces:              ACE = $(round(t_forces_old*1000, digits=3)), ETACE = $(round(t_forces_new*1000, digits=3)), ratio = $(round(t_forces_new/t_forces_old, digits=2))")
-println("  Energy+Forces+Virial: ACE = $(round(t_efv_old*1000, digits=3)), ETACE = $(round(t_efv_new*1000, digits=3)), ratio = $(round(t_efv_new/t_efv_old, digits=2))")
-
-##
-
-# GPU benchmarks (if available)
-# Include GPU detection utils from EquivariantTensors
-et_test_utils = joinpath(dirname(dirname(pathof(ET))), "test", "test_utils")
-include(joinpath(et_test_utils, "utils_gpu.jl"))
-
-if dev !== identity
-   @info("GPU Performance comparison: ETACE on GPU vs CPU")
-
-   # NOTE: These benchmarks measure model evaluation time ONLY, with pre-constructed graphs.
-   # The neighborlist/graph construction currently runs on CPU (~7ms for 250 atoms) and is
-   # NOT included in the timings below. NeighbourLists.jl now has GPU support (PR #34, Dec 2025)
-   # but EquivariantTensors.jl doesn't use it yet. For end-to-end GPU acceleration, the
-   # neighborlist construction needs to be ported to GPU as well.
-
-   # Use a larger system for meaningful GPU benchmark (small systems are overhead-dominated)
-   # GPU kernel launch overhead is ~0.4ms, so need enough work to amortize this
-   gpu_bench_sys = AtomsBuilder.bulk(:Si) * (4, 4, 4)  # 128 atoms
-   rattle!(gpu_bench_sys, 0.1u"Å")
-   AtomsBuilder.randz!(gpu_bench_sys, [:Si => 0.5, :O => 0.5])
-
-   # Create graph and convert to Float32 for GPU
-   G = ET.Atoms.interaction_graph(gpu_bench_sys, rcut * u"Å")
-   G_32 = ET.float32(G)
-   G_gpu = dev(G_32)
-
-   et_ps_32 = ET.float32(et_ps)
-   et_st_32 = ET.float32(et_st)
-   et_ps_gpu = dev(et_ps_32)
-   et_st_gpu = dev(et_st_32)
-
-   # Warm-up GPU (forward pass)
-   et_model(G_gpu, et_ps_gpu, et_st_gpu)
-
-   # Benchmark GPU energy (forward pass only)
-   t_energy_gpu = @belapsed begin
-      Ei, _ = $et_model($G_gpu, $et_ps_gpu, $et_st_gpu)
-      sum(Ei)
-   end
-
-   # Compare to CPU Float32 for fair comparison
-   t_energy_cpu32 = @belapsed begin
-      Ei, _ = $et_model($G_32, $et_ps_32, $et_st_32)
-      sum(Ei)
-   end
-
-   println("GPU vs CPU Float32 comparison ($(length(gpu_bench_sys)) atoms, $(length(G.ii)) edges):")
-   println("  Energy:    CPU = $(round(t_energy_cpu32*1000, digits=3))ms, GPU = $(round(t_energy_gpu*1000, digits=3))ms, speedup = $(round(t_energy_cpu32/t_energy_gpu, digits=1))x")
-
-   # Try GPU gradients (may not be supported yet - gradients w.r.t. positions
-   # require Zygote through P4ML which has GPU compat issues; see ET test_ace_ka.jl:196-197)
-   gpu_grads_work = try
-      ETM.site_grads(et_model, G_gpu, et_ps_gpu, et_st_gpu)
-      true
-   catch e
-      @warn("GPU position gradients not yet supported (needed for forces): $(typeof(e).name.name)")
-      false
-   end
-
-   if gpu_grads_work
-      # Benchmark GPU gradients (for forces)
-      t_grads_gpu = @belapsed ETM.site_grads($et_model, $G_gpu, $et_ps_gpu, $et_st_gpu)
-      t_grads_cpu32 = @belapsed ETM.site_grads($et_model, $G_32, $et_ps_32, $et_st_32)
-      println("  Gradients: CPU = $(round(t_grads_cpu32*1000, digits=3)), GPU = $(round(t_grads_gpu*1000, digits=3)), speedup = $(round(t_grads_cpu32/t_grads_gpu, digits=2))x")
-   else
-      println("  Gradients: Skipped (GPU gradients not yet supported)")
-   end
-else
-   @info("No GPU available, skipping GPU benchmarks")
-end
 
 ##
 
@@ -339,13 +227,11 @@ expected_E0 = n_Si * E0_Si + n_O * E0_O
 
 @test length(Ei_E0) == length(sys)
 @test sum(Ei_E0) ≈ expected_E0
-println("ETOneBody site energies: OK")
 
 # Test site gradients (should be empty for constant energies)
 # Returns NamedTuple with empty edge_data, matching ETACE/ETPairModel interface
 ∂G_E0 = ETM.site_grads(et_onebody, G, nothing, onebody_st)
 @test isempty(∂G_E0.edge_data)
-println("ETOneBody site_grads (zero): OK")
 
 ##
 
@@ -354,7 +240,6 @@ println("ETOneBody site_grads (zero): OK")
 # Wrap ETOneBody in a calculator (using new unified interface)
 E0_calc = ETM.WrappedSiteCalculator(et_onebody, nothing, onebody_st, 3.0)
 @test ustrip(u"Å", ETM.cutoff_radius(E0_calc)) == 3.0  # minimum cutoff
-println("WrappedSiteCalculator(ETOneBody) cutoff_radius: OK")
 
 # Test ETOneBody calculator energy
 sys = rand_struct()
@@ -364,12 +249,10 @@ n_Si = count(node -> node.z == AtomsBase.ChemicalSpecies(:Si), G.node_data)
 n_O = count(node -> node.z == AtomsBase.ChemicalSpecies(:O), G.node_data)
 expected_E = (n_Si * E0_Si + n_O * E0_O) * u"eV"
 @test ustrip(E_E0_calc) ≈ ustrip(expected_E)
-println("WrappedSiteCalculator(ETOneBody) energy: OK")
 
 # Test ETOneBody calculator forces (should be zero)
 F_E0_calc = AtomsCalculators.forces(sys, E0_calc)
 @test all(norm(ustrip.(f)) < 1e-14 for f in F_E0_calc)
-println("WrappedSiteCalculator(ETOneBody) forces (zero): OK")
 
 ##
 
@@ -378,20 +261,17 @@ println("WrappedSiteCalculator(ETOneBody) forces (zero): OK")
 # Wrap ETACE model in a calculator (unified interface)
 ace_site_calc = ETM.WrappedSiteCalculator(et_model, et_ps, et_st, rcut)
 @test ustrip(u"Å", ETM.cutoff_radius(ace_site_calc)) == rcut
-println("WrappedSiteCalculator(ETACE) cutoff_radius: OK")
 
 # Test ETACE calculator matches ETACEPotential
 sys = rand_struct()
 E_ace_site = AtomsCalculators.potential_energy(sys, ace_site_calc)
 E_ace_pot = AtomsCalculators.potential_energy(sys, et_calc)
 @test ustrip(E_ace_site) ≈ ustrip(E_ace_pot)
-println("WrappedSiteCalculator(ETACE) energy matches ETACEPotential: OK")
 
 F_ace_site = AtomsCalculators.forces(sys, ace_site_calc)
 F_ace_pot = AtomsCalculators.forces(sys, et_calc)
 max_diff = maximum(norm(ustrip.(f1) - ustrip.(f2)) for (f1, f2) in zip(F_ace_site, F_ace_pot))
 @test max_diff < 1e-10
-println("WrappedSiteCalculator(ETACE) forces match ETACEPotential: OK")
 
 ##
 
@@ -402,7 +282,6 @@ stacked = ETM.StackedCalculator((E0_calc, ace_site_calc))
 
 @test ustrip(u"Å", ETM.cutoff_radius(stacked)) == rcut
 @test length(stacked.calcs) == 2
-println("StackedCalculator construction: OK")
 
 ##
 
@@ -480,11 +359,9 @@ F = AtomsCalculators.forces(sys, E0_only_stacked)
 # Energy should match E0_calc
 E_direct = AtomsCalculators.potential_energy(sys, E0_calc)
 @test ustrip(E) ≈ ustrip(E_direct)
-println("StackedCalculator(ETOneBody only) energy: OK")
 
 # Forces should be zero
 @test all(norm(ustrip.(f)) < 1e-14 for f in F)
-println("StackedCalculator(ETOneBody only) forces (zero): OK")
 
 ##
 
@@ -503,7 +380,6 @@ nparams = ETM.length_basis(et_calc)
 nbasis = et_model.readout.in_dim
 nspecies = et_model.readout.ncat
 @test nparams == nbasis * nspecies
-println("length_basis: OK (nparams=$nparams, nbasis=$nbasis, nspecies=$nspecies)")
 
 ##
 
@@ -520,7 +396,6 @@ ETM.set_linear_parameters!(et_calc, θ_test)
 # Restore original
 ETM.set_linear_parameters!(et_calc, θ_orig)
 @test ETM.get_linear_parameters(et_calc) ≈ θ_orig
-println("get/set_linear_parameters round-trip: OK")
 
 ##
 
@@ -529,7 +404,6 @@ sys = rand_struct()
 E_basis = ETM.potential_energy_basis(sys, et_calc)
 @test length(E_basis) == nparams
 @test eltype(ustrip.(E_basis)) <: Real
-println("potential_energy_basis shape: OK")
 
 ##
 
@@ -540,7 +414,6 @@ natoms = length(sys)
 @test length(efv_basis.energy) == nparams
 @test size(efv_basis.forces) == (natoms, nparams)
 @test length(efv_basis.virial) == nparams
-println("energy_forces_virial_basis shapes: OK")
 
 ##
 
@@ -553,7 +426,6 @@ E_direct = ustrip(u"eV", AtomsCalculators.potential_energy(sys, et_calc))
 
 print_tf(@test E_from_basis ≈ E_direct rtol=1e-10)
 println()
-println("Energy from basis: OK")
 
 ##
 
@@ -566,7 +438,6 @@ F_direct = AtomsCalculators.forces(sys, et_calc)
 max_diff = maximum(norm(ustrip.(f1) - ustrip.(f2)) for (f1, f2) in zip(F_from_basis, F_direct))
 print_tf(@test max_diff < 1e-10)
 println()
-println("Forces from basis: OK (max_diff = $max_diff)")
 
 ##
 
@@ -579,13 +450,11 @@ V_direct = ustrip.(AtomsCalculators.virial(sys, et_calc))
 virial_diff = maximum(abs.(V_from_basis - V_direct))
 print_tf(@test virial_diff < 1e-10)
 println()
-println("Virial from basis: OK (max_diff = $virial_diff)")
 
 ##
 
 @info("Testing potential_energy_basis matches energy from efv_basis")
 @test ustrip.(E_basis) ≈ ustrip.(efv_basis.energy)
-println("potential_energy_basis consistency: OK")
 
 ##
 
@@ -602,7 +471,6 @@ println("potential_energy_basis consistency: OK")
 @info("Testing ACEfit.basis_size integration")
 import ACEfit
 @test ACEfit.basis_size(et_calc) == ETM.length_basis(et_calc)
-println("ACEfit.basis_size: OK")
 
 ##
 
@@ -645,7 +513,6 @@ for (i, sys) in enumerate(test_systems)
 end
 print_tf(@test all_ok)
 println()
-println("Multiple structures ($nstructs): OK")
 
 ##
 
@@ -700,7 +567,6 @@ for (label, sys) in zip(species_labels, species_test_systems)
 end
 print_tf(@test all_species_ok)
 println()
-println("Multi-species parameter ordering: OK")
 
 ##
 
@@ -734,8 +600,6 @@ si_params_for_o = E_basis_o[1:nbasis]
 # Pure O should have nonzero O parameters
 @test any(abs.(o_params) .> 1e-12)
 
-println("Species-specific basis contributions: OK")
-
 ##
 
 @info("All Phase 5b extended tests passed!")
@@ -760,7 +624,6 @@ onebody_calc = ETM.WrappedSiteCalculator(et_onebody, nothing, onebody_st, 3.0)
 
 # Test length_basis returns 0
 @test ETM.length_basis(onebody_calc) == 0
-println("ETOneBodyPotential length_basis: OK (0 parameters)")
 
 # Test energy_forces_virial_basis returns empty arrays
 sys = rand_struct()
@@ -768,16 +631,13 @@ efv_onebody = ETM.energy_forces_virial_basis(sys, onebody_calc)
 @test length(efv_onebody.energy) == 0
 @test size(efv_onebody.forces, 2) == 0
 @test length(efv_onebody.virial) == 0
-println("ETOneBodyPotential energy_forces_virial_basis: OK (empty arrays)")
 
 # Test get/set_linear_parameters
 @test length(ETM.get_linear_parameters(onebody_calc)) == 0
 ETM.set_linear_parameters!(onebody_calc, Float64[])  # Should not error
-println("ETOneBodyPotential get/set_linear_parameters: OK")
 
 # Test ACEfit.basis_size
 @test ACEfit.basis_size(onebody_calc) == 0
-println("ETOneBodyPotential ACEfit.basis_size: OK")
 
 ##
 
@@ -806,15 +666,8 @@ ps_pair, st_pair = Lux.setup(rng, model_pair)
 et_pair = ETM.convertpair(model_pair)
 et_pair_ps, et_pair_st = Lux.setup(rng, et_pair)
 
-# Copy pair parameters
-NZ_pair = length(model_pair.pairbasis._i2z)
-for i in 1:NZ_pair, j in 1:NZ_pair
-   idx = (i-1)*NZ_pair + j
-   et_pair_ps.rembed.rbasis.post.W[:, :, idx] .= ps_pair.pairbasis.Wnlq[:, :, i, j]
-end
-for s in 1:NZ_pair
-   et_pair_ps.readout.W[1, :, s] .= ps_pair.Wpair[:, s]
-end
+# Copy pair parameters using utility function
+ETM.copy_pair_params!(et_pair_ps, ps_pair, model_pair)
 
 rcut_pair = maximum(a.rcut for a in model_pair.pairbasis.rin0cuts)
 pair_calc = ETM.ETPairPotential(et_pair, et_pair_ps, et_pair_st, rcut_pair)
@@ -823,7 +676,6 @@ pair_calc = ETM.ETPairPotential(et_pair, et_pair_ps, et_pair_st, rcut_pair)
 pair_nbasis = et_pair.readout.in_dim
 pair_nspecies = et_pair.readout.ncat
 @test ETM.length_basis(pair_calc) == pair_nbasis * pair_nspecies
-println("ETPairPotential length_basis: OK ($(pair_nbasis * pair_nspecies) parameters)")
 
 # Test energy_forces_virial_basis
 sys_pair = rand_struct()  # Uses Si/O system from earlier
@@ -834,7 +686,6 @@ nparams_pair = ETM.length_basis(pair_calc)
 @test length(efv_pair.energy) == nparams_pair
 @test size(efv_pair.forces) == (natoms_pair, nparams_pair)
 @test length(efv_pair.virial) == nparams_pair
-println("ETPairPotential energy_forces_virial_basis shapes: OK")
 
 # Test linear combination gives correct energy
 θ_pair = ETM.get_linear_parameters(pair_calc)
@@ -842,18 +693,15 @@ E_from_pair_basis = dot(ustrip.(efv_pair.energy), θ_pair)
 E_pair_direct = ustrip(u"eV", AtomsCalculators.potential_energy(sys_pair, pair_calc))
 print_tf(@test E_from_pair_basis ≈ E_pair_direct rtol=1e-10)
 println()
-println("ETPairPotential energy from basis: OK")
 
 # Test get/set round-trip
 θ_pair_test = randn(nparams_pair)
 ETM.set_linear_parameters!(pair_calc, θ_pair_test)
 @test ETM.get_linear_parameters(pair_calc) ≈ θ_pair_test
 ETM.set_linear_parameters!(pair_calc, θ_pair)  # Restore
-println("ETPairPotential get/set_linear_parameters: OK")
 
 # Test ACEfit.basis_size
 @test ACEfit.basis_size(pair_calc) == nparams_pair
-println("ETPairPotential ACEfit.basis_size: OK")
 
 ##
 
@@ -864,7 +712,6 @@ stacked_calc = ETM.convert2et_full(model_pair, ps_pair, st_pair)
 
 # Verify structure: 3 components (ETOneBody, ETPairModel, ETACE)
 @test length(stacked_calc.calcs) == 3
-println("StackedCalculator has $(length(stacked_calc.calcs)) components")
 
 # Test length_basis is sum of components
 n_onebody = ETM.length_basis(stacked_calc.calcs[1])
@@ -876,7 +723,6 @@ n_total = ETM.length_basis(stacked_calc)
 @test n_pair > 0
 @test n_ace > 0
 @test n_total == n_onebody + n_pair + n_ace
-println("StackedCalculator length_basis: OK (0 + $n_pair + $n_ace = $n_total)")
 
 # Test energy_forces_virial_basis
 sys_stacked = rand_struct()
@@ -886,7 +732,6 @@ natoms_stacked = length(sys_stacked)
 @test length(efv_stacked.energy) == n_total
 @test size(efv_stacked.forces) == (natoms_stacked, n_total)
 @test length(efv_stacked.virial) == n_total
-println("StackedCalculator energy_forces_virial_basis shapes: OK")
 
 # Test linear combination gives correct energy
 θ_stacked = ETM.get_linear_parameters(stacked_calc)
@@ -895,7 +740,6 @@ E_from_stacked_basis = dot(ustrip.(efv_stacked.energy), θ_stacked)
 E_stacked_direct = ustrip(u"eV", AtomsCalculators.potential_energy(sys_stacked, stacked_calc))
 print_tf(@test E_from_stacked_basis ≈ E_stacked_direct rtol=1e-10)
 println()
-println("StackedCalculator energy from basis: OK")
 
 # Test linear combination gives correct forces
 F_from_stacked_basis = efv_stacked.forces * θ_stacked
@@ -903,7 +747,6 @@ F_stacked_direct = AtomsCalculators.forces(sys_stacked, stacked_calc)
 max_diff_stacked_F = maximum(norm(ustrip.(f1) - ustrip.(f2)) for (f1, f2) in zip(F_from_stacked_basis, F_stacked_direct))
 print_tf(@test max_diff_stacked_F < 1e-10)
 println()
-println("StackedCalculator forces from basis: OK (max_diff = $max_diff_stacked_F)")
 
 # Test linear combination gives correct virial
 V_from_stacked_basis = sum(θ_stacked[k] * ustrip.(efv_stacked.virial[k]) for k in 1:n_total)
@@ -911,7 +754,6 @@ V_stacked_direct = ustrip.(AtomsCalculators.virial(sys_stacked, stacked_calc))
 virial_diff_stacked = maximum(abs.(V_from_stacked_basis - V_stacked_direct))
 print_tf(@test virial_diff_stacked < 1e-10)
 println()
-println("StackedCalculator virial from basis: OK (max_diff = $virial_diff_stacked)")
 
 # Test get/set_linear_parameters round-trip
 θ_stacked_orig = copy(θ_stacked)
@@ -920,17 +762,14 @@ ETM.set_linear_parameters!(stacked_calc, θ_stacked_test)
 θ_stacked_check = ETM.get_linear_parameters(stacked_calc)
 @test θ_stacked_check ≈ θ_stacked_test
 ETM.set_linear_parameters!(stacked_calc, θ_stacked_orig)  # Restore
-println("StackedCalculator get/set_linear_parameters: OK")
 
 # Test potential_energy_basis consistency
 E_basis_stacked = ETM.potential_energy_basis(sys_stacked, stacked_calc)
 @test length(E_basis_stacked) == n_total
 @test ustrip.(E_basis_stacked) ≈ ustrip.(efv_stacked.energy) rtol=1e-10
-println("StackedCalculator potential_energy_basis consistency: OK")
 
 # Test ACEfit.basis_size
 @test ACEfit.basis_size(stacked_calc) == n_total
-println("StackedCalculator ACEfit.basis_size: OK")
 
 ##
 
diff --git a/test/etmodels/test_etonebody.jl b/test/etmodels/test_etonebody.jl
index 527937aa5..8e7b18f5d 100644
--- a/test/etmodels/test_etonebody.jl
+++ b/test/etmodels/test_etonebody.jl
@@ -106,12 +106,11 @@ sys = rand_struct()
 G = ET.Atoms.interaction_graph(sys, 5.0 * u"Å")
 ∂G1 = ETM.site_grads(et_V0, G, ps, st)
 
-# ETOneBody returns NamedTuple with edge_data filled with empty VState() elements
-# Empty VState() acts as additive identity: VState(r=...) + VState() == VState(r=...)
+# ETOneBody returns NamedTuple with empty edge_data array since there are no
+# position-dependent gradients (energy only depends on atom types, not positions)
 println_slim(@test ∂G1 isa NamedTuple)
 println_slim(@test haskey(∂G1, :edge_data))
-println_slim(@test length(∂G1.edge_data) == length(G.edge_data))
-println_slim(@test all(v -> v == DP.VState(), ∂G1.edge_data))
+println_slim(@test isempty(∂G1.edge_data))
 
 ##
 
diff --git a/test/runtests.jl b/test/runtests.jl
index 624c7b65f..af4b1d070 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -17,9 +17,10 @@ using ACEpotentials, Test, LazyArtifacts
     @testset "Weird bugs" begin include("test_bugs.jl") end
 
     # new ET backend tests
-    @testset "ET ACE" begin include("etmodels/test_etace.jl") end 
+    @testset "ET ACE" begin include("etmodels/test_etace.jl") end
     @testset "ET OneBody" begin include("etmodels/test_etonebody.jl") end
     @testset "ET Pair" begin include("etmodels/test_etpair.jl") end
+    @testset "ET Calculators" begin include("et_models/test_et_calculators.jl") end
 
     # ACE1 compatibility tests 
     # TODO: these tests need to be revived either by creating a JSON 

From f10f83301b0910f7c5726a2e2fa412547eef1ed4 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Mon, 5 Jan 2026 15:54:20 +0000
Subject: [PATCH 32/35] Add ETACE models tutorial example
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Demonstrates two approaches for working with ET backend:
1. Converting from existing ACE model (recommended)
   - convert2et_full: Full model to StackedCalculator
   - convert2et: Many-body only to ETACE
   - convertpair: Pair potential to ETPairModel
2. Creating ETACE from scratch (advanced)
   - Direct EquivariantTensors component construction
   - ETOneBody, ETACE, StackedCalculator assembly

Also shows training assembly interface for ACEfit integration.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 examples/etmodels/etace_tutorial.jl | 348 ++++++++++++++++++++++++++++
 1 file changed, 348 insertions(+)
 create mode 100644 examples/etmodels/etace_tutorial.jl

diff --git a/examples/etmodels/etace_tutorial.jl b/examples/etmodels/etace_tutorial.jl
new file mode 100644
index 000000000..a53aff94f
--- /dev/null
+++ b/examples/etmodels/etace_tutorial.jl
@@ -0,0 +1,348 @@
+# # ETACE Models Tutorial
+#
+# This tutorial demonstrates how to use the EquivariantTensors (ET) backend
+# for ACE models in ACEpotentials.jl. The ET backend provides:
+# - Graph-based evaluation (edge-centric computation)
+# - Automatic differentiation via Zygote
+# - GPU-ready architecture via KernelAbstractions
+# - Lux.jl layer integration
+#
+# We cover two approaches:
+# 1. **Converting from an existing ACE model** - The recommended approach
+# 2. **Creating an ETACE model from scratch** - For advanced users
+#
+
+## Load required packages
+using ACEpotentials, StaticArrays, Lux, AtomsBase, AtomsBuilder, Unitful
+using AtomsCalculators, Random, LinearAlgebra
+
+M = ACEpotentials.Models
+ETM = ACEpotentials.ETModels
+import EquivariantTensors as ET
+import Polynomials4ML as P4ML
+
+rng = Random.MersenneTwister(1234)
+
+# =============================================================================
+# Part 1: Converting from an Existing ACE Model (Recommended)
+# =============================================================================
+#
+# The simplest way to get an ETACE model is to convert from a standard ACE model.
+# This approach ensures consistency with the familiar ACE model construction API.
+
+## Define model hyperparameters
+elements = (:Si, :O)
+order = 3          # correlation order (body-order = order + 1)
+max_level = 10     # total polynomial degree
+maxl = 6           # maximum angular momentum
+rcut = 5.5         # cutoff radius in Angstrom
+
+## Create the standard ACE model
+rin0cuts = M._default_rin0cuts(elements)
+rin0cuts = (x -> (rin = x.rin, r0 = x.r0, rcut = rcut)).(rin0cuts)
+
+## Note: pair_learnable=true is required for ET conversion
+## (default uses splines which aren't yet supported by convert2et)
+model = M.ace_model(;
+   elements = elements,
+   order = order,
+   Ytype = :solid,
+   level = M.TotalDegree(),
+   max_level = max_level,
+   maxl = maxl,
+   pair_maxn = max_level,
+   rin0cuts = rin0cuts,
+   E0s = Dict(:Si => -0.846, :O => -1.023),  # reference energies
+   pair_learnable = true   # required for ET conversion
+)
+
+## Initialize parameters with Lux
+ps, st = Lux.setup(rng, model)
+
+@info "Standard ACE model created"
+@info "  Number of basis functions: $(M.length_basis(model))"
+
+# -----------------------------------------------------------------------------
+# Method A: Convert full model (E0 + Pair + Many-body) to StackedCalculator
+# -----------------------------------------------------------------------------
+
+## convert2et_full creates a StackedCalculator combining:
+##   - ETOneBody (reference energies per species)
+##   - ETPairModel (pair potential)
+##   - ETACE (many-body ACE potential)
+
+et_calc_full = ETM.convert2et_full(model, ps, st; rng=rng)
+
+@info "Full conversion to StackedCalculator"
+@info "  Contains: ETOneBody + ETPairPotential + ETACEPotential"
+@info "  Total linear parameters: $(ETM.length_basis(et_calc_full))"
+
+# -----------------------------------------------------------------------------
+# Method B: Convert only the many-body ACE component
+# -----------------------------------------------------------------------------
+
+## convert2et creates just the ETACE model (many-body only, no E0 or pair)
+et_ace = ETM.convert2et(model)
+et_ace_ps, et_ace_st = Lux.setup(rng, et_ace)
+
+## Copy parameters from the original model
+ETM.copy_ace_params!(et_ace_ps, ps, model)
+
+## Wrap in calculator for AtomsCalculators interface
+et_ace_calc = ETM.ETACEPotential(et_ace, et_ace_ps, et_ace_st, rcut)
+
+@info "Many-body only conversion"
+@info "  ETACE basis size: $(ETM.length_basis(et_ace_calc))"
+
+# -----------------------------------------------------------------------------
+# Method C: Convert only the pair potential
+# -----------------------------------------------------------------------------
+
+## convertpair creates an ETPairModel
+et_pair = ETM.convertpair(model)
+et_pair_ps, et_pair_st = Lux.setup(rng, et_pair)
+
+## Copy parameters from the original model
+ETM.copy_pair_params!(et_pair_ps, ps, model)
+
+## Wrap in calculator
+et_pair_calc = ETM.ETPairPotential(et_pair, et_pair_ps, et_pair_st, rcut)
+
+@info "Pair potential only conversion"
+@info "  ETPairModel basis size: $(ETM.length_basis(et_pair_calc))"
+
+
+# =============================================================================
+# Part 2: Using ETACE Calculators
+# =============================================================================
+
+## Create a test system
+sys = AtomsBuilder.bulk(:Si) * (2, 2, 1)
+rattle!(sys, 0.1u"Å")
+AtomsBuilder.randz!(sys, [:Si => 0.5, :O => 0.5])
+
+@info "Test system: $(length(sys)) atoms"
+
+## Evaluate energy, forces, virial using AtomsCalculators interface
+E = AtomsCalculators.potential_energy(sys, et_calc_full)
+F = AtomsCalculators.forces(sys, et_calc_full)
+V = AtomsCalculators.virial(sys, et_calc_full)
+
+@info "Energy evaluation with full ETACE calculator"
+@info "  Energy: $E"
+@info "  Max force magnitude: $(maximum(norm.(F)))"
+
+## Combined evaluation (more efficient)
+efv = AtomsCalculators.energy_forces_virial(sys, et_calc_full)
+@info "  Combined EFV evaluation successful"
+
+
+# =============================================================================
+# Part 3: Training Assembly (for Linear Fitting)
+# =============================================================================
+#
+# The ETACE calculators support training assembly functions for ACEfit integration.
+# These compute the design matrix rows for linear least squares fitting.
+
+## Energy-only basis evaluation (fastest)
+E_basis = ETM.potential_energy_basis(sys, et_ace_calc)
+@info "Energy basis: $(length(E_basis)) components"
+
+## Full energy, forces, virial basis
+efv_basis = ETM.energy_forces_virial_basis(sys, et_ace_calc)
+@info "EFV basis shapes:"
+@info "  Energy:  $(size(efv_basis.energy))"
+@info "  Forces:  $(size(efv_basis.forces))"
+@info "  Virial:  $(size(efv_basis.virial))"
+
+## Get/set linear parameters
+params = ETM.get_linear_parameters(et_ace_calc)
+@info "Linear parameters: $(length(params)) values"
+
+## Parameters can be updated for fitting:
+## ETM.set_linear_parameters!(et_ace_calc, new_params)
+
+
+# =============================================================================
+# Part 4: Creating an ETACE Model from Scratch (Advanced)
+# =============================================================================
+#
+# For advanced users who want direct control over the model architecture.
+# This requires understanding the EquivariantTensors.jl API.
+
+## Define model parameters
+scratch_elements = [:Si, :O]
+scratch_maxn = 6      # number of radial basis functions
+scratch_maxl = 4      # maximum angular momentum
+scratch_order = 2     # correlation order
+scratch_rcut = 5.5    # cutoff radius
+
+## Species information
+zlist = ChemicalSpecies.(scratch_elements)
+NZ = length(zlist)
+
+# -----------------------------------------------------------------------------
+# Build the radial embedding (Rnl)
+# -----------------------------------------------------------------------------
+
+## Radial specification (n, l pairs)
+Rnl_spec = [(n=n, l=l) for n in 1:scratch_maxn for l in 0:scratch_maxl]
+
+## Distance transform: r -> transformed coordinate y
+## Using standard Agnesi transform parameters
+f_trans = let rcut = scratch_rcut
+   (x, st) -> begin
+      r = norm(x.𝐫)
+      # Simple polynomial transform (normalized to [-1, 1])
+      y = 1 - 2 * r / rcut
+      return y
+   end
+end
+trans = ET.NTtransformST(f_trans, NamedTuple())
+
+## Envelope function: smooth cutoff
+f_env = y -> (1 - y^2)^2  # quartic envelope
+
+## Polynomial basis (Chebyshev)
+polys = P4ML.ChebBasis(scratch_maxn)
+Penv = P4ML.wrapped_basis(Lux.BranchLayer(
+   polys,
+   Lux.WrappedFunction(y -> f_env.(y)),
+   fusion = Lux.WrappedFunction(Pe -> Pe[2] .* Pe[1])
+))
+
+## Species-pair selector for radial weights
+selector_ij = let zlist = tuple(zlist...)
+   xij -> ET.catcat2idx(zlist, xij.z0, xij.z1)
+end
+
+## Linear layer: P(yij) -> W[(Zi, Zj)] * P(yij)
+linl = ET.SelectLinL(scratch_maxn, length(Rnl_spec), NZ^2, selector_ij)
+
+## Complete radial embedding
+rbasis = ET.EmbedDP(trans, Penv, linl)
+rembed = ET.EdgeEmbed(rbasis)
+
+# -----------------------------------------------------------------------------
+# Build the angular embedding (Ylm)
+# -----------------------------------------------------------------------------
+
+## Spherical harmonics basis
+ylm_basis = P4ML.real_sphericalharmonics(scratch_maxl)
+Ylm_spec = P4ML.natural_indices(ylm_basis)
+
+## Angular embedding: edge direction -> spherical harmonics
+ybasis = ET.EmbedDP(
+   ET.NTtransformST((x, st) -> x.𝐫, NamedTuple()),
+   ylm_basis
+)
+yembed = ET.EdgeEmbed(ybasis)
+
+# -----------------------------------------------------------------------------
+# Build the many-body basis (sparse ACE)
+# -----------------------------------------------------------------------------
+
+## Define the many-body specification
+## This specifies which (n,l) combinations appear in each correlation
+## For simplicity, use all 1-correlations up to given degree
+mb_spec = [[(n=n, l=l)] for n in 1:scratch_maxn for l in 0:scratch_maxl]
+
+## Create sparse equivariant tensor (ACE basis)
+mb_basis = ET.sparse_equivariant_tensor(
+   L = 0,                # scalar (invariant) output
+   mb_spec = mb_spec,
+   Rnl_spec = Rnl_spec,
+   Ylm_spec = Ylm_spec,
+   basis = real          # real-valued basis
+)
+
+# -----------------------------------------------------------------------------
+# Build the readout layer
+# -----------------------------------------------------------------------------
+
+## Species selector for readout
+selector_i = let zlist = zlist
+   x -> ET.cat2idx(zlist, x.z)
+end
+
+## Readout: basis values -> site energies
+readout = ET.SelectLinL(
+   mb_basis.lens[1],     # input dimension (basis length)
+   1,                    # output dimension (site energy)
+   NZ,                   # number of species categories
+   selector_i
+)
+
+# -----------------------------------------------------------------------------
+# Assemble the ETACE model
+# -----------------------------------------------------------------------------
+
+scratch_etace = ETM.ETACE(rembed, yembed, mb_basis, readout)
+
+## Initialize with Lux
+scratch_ps, scratch_st = Lux.setup(rng, scratch_etace)
+
+@info "ETACE model created from scratch"
+@info "  Radial basis size: $(length(Rnl_spec))"
+@info "  Angular basis size: $(length(Ylm_spec))"
+@info "  Many-body basis size: $(mb_basis.lens[1])"
+
+## Wrap in calculator
+scratch_calc = ETM.ETACEPotential(scratch_etace, scratch_ps, scratch_st, scratch_rcut)
+
+## Test evaluation
+E_scratch = AtomsCalculators.potential_energy(sys, scratch_calc)
+@info "Scratch model energy: $E_scratch"
+
+
+# =============================================================================
+# Part 5: Creating One-Body and Pair Models from Scratch
+# =============================================================================
+
+# -----------------------------------------------------------------------------
+# ETOneBody: Reference energies
+# -----------------------------------------------------------------------------
+
+## Define reference energies per species
+E0_dict = Dict(ChemicalSpecies(:Si) => -0.846,
+               ChemicalSpecies(:O) => -1.023)
+
+## Category function extracts species from atom state
+catfun = x -> x.z  # x.z is the ChemicalSpecies
+
+## Create one-body model
+et_onebody = ETM.one_body(E0_dict, catfun)
+_, onebody_st = Lux.setup(rng, et_onebody)
+
+## Wrap in calculator (uses small cutoff since no neighbors needed)
+onebody_calc = ETM.ETOneBodyPotential(et_onebody, nothing, onebody_st, 3.0)
+
+@info "ETOneBody model created"
+@info "  Reference energies: $E0_dict"
+
+E_onebody = AtomsCalculators.potential_energy(sys, onebody_calc)
+@info "  One-body energy for test system: $E_onebody"
+
+
+# =============================================================================
+# Part 6: Combining Models with StackedCalculator
+# =============================================================================
+#
+# StackedCalculator combines multiple calculators by summing their contributions.
+
+## Stack our from-scratch models
+combined_calc = ETM.StackedCalculator((onebody_calc, scratch_calc))
+
+@info "StackedCalculator created"
+@info "  Components: ETOneBody + ETACE"
+@info "  Total basis size: $(ETM.length_basis(combined_calc))"
+
+## Evaluate combined model
+E_combined = AtomsCalculators.potential_energy(sys, combined_calc)
+@info "  Combined energy: $E_combined"
+
+## Training assembly works on StackedCalculator too
+efv_combined = ETM.energy_forces_virial_basis(sys, combined_calc)
+@info "  Combined EFV basis shapes: E=$(size(efv_combined.energy)), F=$(size(efv_combined.forces))"
+
+@info "Tutorial complete!"

From 504b995438f68ae88c6703fd0d5861cabdc1a1b9 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Mon, 5 Jan 2026 16:05:18 +0000
Subject: [PATCH 33/35] Add ETACE tutorial to documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Integrate the ETACE tutorial from examples/etmodels/ into the
Documenter.jl-based documentation using Literate.jl.

- Point Literate to examples/etmodels/etace_tutorial.jl (no duplication)
- Add to Tutorials section in navigation
- Add entry in tutorials/index.md

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 docs/make.jl                | 11 +++++++++--
 docs/src/tutorials/index.md |  6 +++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index ed112b1e3..285a3c7b2 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -28,8 +28,14 @@ Literate.markdown(_tutorial_src * "/dataset_analysis.jl",
 Literate.markdown(_tutorial_src * "/descriptor.jl",
                   _tutorial_out; documenter = true)
 
-Literate.markdown(_tutorial_src * "/asp.jl", 
+Literate.markdown(_tutorial_src * "/asp.jl",
                   _tutorial_out; documenter = true)
+
+# ETACE tutorial lives in examples/ to avoid duplication
+_examples_src = joinpath(@__DIR__(), "..", "examples", "etmodels")
+Literate.markdown(_examples_src * "/etace_tutorial.jl",
+                  _tutorial_out; documenter = true)
+
 # Literate.markdown(_tutorial_src * "/first_example_model.jl", 
 #                   _tutorial_out; documenter = true)
 
@@ -70,9 +76,10 @@ makedocs(;
                 "literate_tutorials/basic_julia_workflow.md",
                 "literate_tutorials/smoothness_priors.md",
                 "literate_tutorials/dataset_analysis.md",
-                "tutorials/scripting.md", 
+                "tutorials/scripting.md",
                 "literate_tutorials/descriptor.md",
                 "literate_tutorials/asp.md",
+                "literate_tutorials/etace_tutorial.md",
             ],
         "Additional Topics" => Any[
             "gettingstarted/parallel-fitting.md",
diff --git a/docs/src/tutorials/index.md b/docs/src/tutorials/index.md
index 98ee4560c..23da5738e 100644
--- a/docs/src/tutorials/index.md
+++ b/docs/src/tutorials/index.md
@@ -1,11 +1,11 @@
 
 # Tutorials Overview
 
-* [Basic Julia Workflow](../literate_tutorials/basic_julia_workflow.md) : minimal example to fit a potential to an existing dataset using a Julia script 
+* [Basic Julia Workflow](../literate_tutorials/basic_julia_workflow.md) : minimal example to fit a potential to an existing dataset using a Julia script
 * [Basic Shell Workflow](scripting.md) : basic workflow for fitting via the command line
 * [Smoothness Priors](../literate_tutorials/smoothness_priors.md) : brief introduction to smoothness priors
 * [Basic Dataset Analysis](../literate_tutorials/dataset_analysis.md) : basic techniques to visualize training datasets and correlate such observations to the choice of geometric priors
-* [Descriptors](../literate_tutorials/descriptor.md) : `ACEpotentials` can be used as descriptors of atomic environments or structures, which is described here. 
+* [Descriptors](../literate_tutorials/descriptor.md) : `ACEpotentials` can be used as descriptors of atomic environments or structures, which is described here.
 * [Sparse Solvers](../literate_tutorials/asp.md) : basic tutorial on using the `ASP` and `OMP` solvers.
-
+* [ETACE Models](../literate_tutorials/etace_tutorial.md) : using the EquivariantTensors backend for ACE models, including conversion from standard ACE models and creating ETACE models from scratch.
 

From 4971a56edd76a198a0d0fc7da1bb81c4e5db8540 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Mon, 5 Jan 2026 16:29:54 +0000
Subject: [PATCH 34/35] Add missing dependencies to docs/Project.toml
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add StaticArrays, Lux, EquivariantTensors, and Polynomials4ML
which are required by the ETACE tutorial examples.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 docs/Project.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/Project.toml b/docs/Project.toml
index 1facd1b7c..8b19a1034 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -5,15 +5,19 @@ AtomsBase = "a963bdd2-2df7-4f54-a1ee-49d51e6be12a"
 AtomsBuilder = "f5cc8831-eeb7-4288-8d9f-d6c1ddb77004"
 AtomsCalculators = "a3e0e189-c65a-42c1-833c-339540406eb1"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+EquivariantTensors = "5e107534-7145-4f8f-b06f-47a52840c895"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 LaTeXStrings = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f"
 LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
+Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
 MultivariateStats = "6f286f6a-111f-5878-ab1e-185364afe411"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+Polynomials4ML = "03c4bcba-a943-47e9-bfa1-b1661fc2974f"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d"
 

From 10a686650d30475f5a4bdbd281f2b8eca4950e23 Mon Sep 17 00:00:00 2001
From: James Kermode <james.kermode@gmail.com>
Date: Mon, 5 Jan 2026 17:08:00 +0000
Subject: [PATCH 35/35] Fix Literate.jl inline comment parsing in ETACE
 tutorial
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use ## instead of # for inline comment inside code block to prevent
Literate.jl from splitting the code block at that line.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 examples/etmodels/etace_tutorial.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/etmodels/etace_tutorial.jl b/examples/etmodels/etace_tutorial.jl
index a53aff94f..dd5f31054 100644
--- a/examples/etmodels/etace_tutorial.jl
+++ b/examples/etmodels/etace_tutorial.jl
@@ -193,7 +193,7 @@ Rnl_spec = [(n=n, l=l) for n in 1:scratch_maxn for l in 0:scratch_maxl]
 f_trans = let rcut = scratch_rcut
    (x, st) -> begin
       r = norm(x.𝐫)
-      # Simple polynomial transform (normalized to [-1, 1])
+      ## Simple polynomial transform (normalized to [-1, 1])
       y = 1 - 2 * r / rcut
       return y
    end