From 934f6e43fc90971eda69854b25ec4fbb8e633337 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.ib0.cheyenne.ucar.edu>
Date: Fri, 20 Dec 2019 12:38:04 -0700
Subject: [PATCH 001/146] add new deformation coefficients to registries

---
 src/core_atmosphere/Registry.xml      | 30 +++++++++++++++++++++++++++
 src/core_init_atmosphere/Registry.xml | 25 ++++++++++++++++++++++
 2 files changed, 55 insertions(+)
diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 4281c40bba..2ae703653d 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -494,6 +494,11 @@
 			<var name="deriv_two"/>
 			<var name="defc_a"/>
 			<var name="defc_b"/>
+                        <var name="deformation_coef_c2"/>
+                        <var name="deformation_coef_s2"/>
+                        <var name="deformation_coef_cs"/>
+                        <var name="deformation_coef_c"/>
+                        <var name="deformation_coef_s"/>
 			<var name="coeffs_reconstruct"/>
 #ifdef MPAS_CAM_DYCORE
 			<var name="cell_gradient_coef_x"/>
@@ -594,6 +599,17 @@
 			<var name="cell_gradient_coef_x"/>
 			<var name="cell_gradient_coef_y"/>
 #endif
+			<var name="deriv_two"/>
+			<var name="defc_a"/>
+			<var name="defc_b"/>
+                        <var name="deformation_coef_c2"/>
+                        <var name="deformation_coef_s2"/>
+                        <var name="deformation_coef_cs"/>
+                        <var name="deformation_coef_c"/>
+                        <var name="deformation_coef_s"/>
+			<var name="coeffs_reconstruct"/>
+			<var name="east"/>
+			<var name="north"/>
 			<var_array name="scalars"/>
 			<var name="initial_time"/>
 			<var name="xtime"/>
@@ -1561,6 +1577,20 @@
                 <var name="cell_gradient_coef_y" type="real" dimensions="maxEdges nCells" units="m^-1"
                      description="Coefficients for computing the y (meridional) derivative of a cell-centered variable"/>
 #endif
+                <var name="deformation_coef_c2" type="real" dimensions="maxEdges nCells" units="unitless"
+                     description="Coefficients for computing the cos-squared terms of the 3d deformation"/>
+
+                <var name="deformation_coef_s2" type="real" dimensions="maxEdges nCells" units="unitless"
+                     description="Coefficients for computing the sine-squared terms of the 3d deformation"/>
+
+                <var name="deformation_coef_cs" type="real" dimensions="maxEdges nCells" units="unitless"
+                     description="Coefficients for computing the cos-sine terms of the 3d deformation"/>
+
+                <var name="deformation_coef_c" type="real" dimensions="maxEdges nCells" units="unitless"
+                     description="Coefficients for computing the cos-only terms of the 3d deformation"/>
+
+                <var name="deformation_coef_s" type="real" dimensions="maxEdges nCells" units="unitless"
+                     description="Coefficients for computing the sine-only terms of the 3d deformation"/>
 
                 <!-- Arrays required for reconstruction of velocity field -->
                 <var name="coeffs_reconstruct" type="real" dimensions="R3 maxEdges nCells" units="unitless"
diff --git a/src/core_init_atmosphere/Registry.xml b/src/core_init_atmosphere/Registry.xml
index cf4934a81b..d24568c866 100644
--- a/src/core_init_atmosphere/Registry.xml
+++ b/src/core_init_atmosphere/Registry.xml
@@ -468,6 +468,11 @@
                         <var name="defc_b" packages="gwd_stage_in;vertical_stage_in;met_stage_in"/>
                         <var name="cell_gradient_coef_x" packages="gwd_stage_in;vertical_stage_in;met_stage_in"/>
                         <var name="cell_gradient_coef_y" packages="gwd_stage_in;vertical_stage_in;met_stage_in"/>
+                        <var name="deformation_coef_c2" packages="gwd_stage_in;vertical_stage_in;met_stage_in"/>
+                        <var name="deformation_coef_s2" packages="gwd_stage_in;vertical_stage_in;met_stage_in"/>
+                        <var name="deformation_coef_cs" packages="gwd_stage_in;vertical_stage_in;met_stage_in"/>
+                        <var name="deformation_coef_c" packages="gwd_stage_in;vertical_stage_in;met_stage_in"/>
+                        <var name="deformation_coef_s" packages="gwd_stage_in;vertical_stage_in;met_stage_in"/>
                         <var name="coeffs_reconstruct" packages="gwd_stage_in;vertical_stage_in;met_stage_in"/>
                         <var name="cf1" packages="met_stage_in"/>
                         <var name="cf2" packages="met_stage_in"/>
@@ -573,6 +578,11 @@
                         <var name="defc_b"/>
                         <var name="cell_gradient_coef_x"/>
                         <var name="cell_gradient_coef_y"/>
+                        <var name="deformation_coef_c2"/>
+                        <var name="deformation_coef_s2"/>
+                        <var name="deformation_coef_cs"/>
+                        <var name="deformation_coef_c"/>
+                        <var name="deformation_coef_s"/>
                         <var name="coeffs_reconstruct"/>
                         <var name="cf1" packages="vertical_stage_out;met_stage_out"/>
                         <var name="cf2" packages="vertical_stage_out;met_stage_out"/>
@@ -1114,6 +1124,21 @@
                 <var name="cell_gradient_coef_y" type="real" dimensions="maxEdges nCells" units="m^-1"
                      description="Coefficients for computing the y (meridional) derivative of a cell-centered variable"/>
 
+                <var name="deformation_coef_c2" type="real" dimensions="maxEdges nCells" units="unitless"
+                     description="Coefficients for computing the cos-squared terms of the 3d deformation"/>
+ 
+                <var name="deformation_coef_s2" type="real" dimensions="maxEdges nCells" units="unitless"
+                     description="Coefficients for computing the sine-squared terms of the 3d deformation"/>
+
+                <var name="deformation_coef_cs" type="real" dimensions="maxEdges nCells" units="unitless"
+                     description="Coefficients for computing the cos-sine terms of the 3d deformation"/>
+
+                <var name="deformation_coef_c" type="real" dimensions="maxEdges nCells" units="unitless"
+                     description="Coefficients for computing the cos-only terms of the 3d deformation"/>
+
+                <var name="deformation_coef_s" type="real" dimensions="maxEdges nCells" units="unitless"
+                     description="Coefficients for computing the sine-only terms of the 3d deformation"/>
+
                 <!-- arrays required for reconstruction of velocity field -->
                 <var name="coeffs_reconstruct" type="real" dimensions="R3 maxEdges nCells" units="unitless"
                      description="Coefficients to reconstruct velocity vectors at cell centers"/>

From da78a8cb3384e32fb488cbb25d30eba02d18f1d6 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.ib0.cheyenne.ucar.edu>
Date: Fri, 20 Dec 2019 15:18:27 -0700
Subject: [PATCH 002/146] initialize new deformation coefficients

---
 src/core_init_atmosphere/mpas_atm_advection.F | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/core_init_atmosphere/mpas_atm_advection.F b/src/core_init_atmosphere/mpas_atm_advection.F
index f4d44c984e..59edf3b843 100644
--- a/src/core_init_atmosphere/mpas_atm_advection.F
+++ b/src/core_init_atmosphere/mpas_atm_advection.F
@@ -758,6 +758,8 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
 
       real (kind=RKIND), dimension(:,:), pointer :: defc_a, defc_b
       real (kind=RKIND), dimension(:,:), pointer :: cell_gradient_coef_x, cell_gradient_coef_y
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2, deformation_coef_s2, deformation_coef_cs
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c, deformation_coef_s
       integer, dimension(:,:), pointer :: cellsOnEdge, edgesOnCell, cellsOnCell, verticesOnCell
       integer, dimension(:), pointer :: nEdgesOnCell
       real (kind=RKIND), dimension(:), pointer :: xCell, yCell, zCell
@@ -927,9 +929,19 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
             defc_b(i,iCell) = dl*2.*sint_cost/area_cell
             cell_gradient_coef_x(i,iCell) = dl*cos(thetat(i))/area_cell
             cell_gradient_coef_y(i,iCell) = dl*sin(thetat(i))/area_cell
+            deformation_coef_c2(i,iCell) = dl*cost2/area_cell
+            deformation_coef_s2(i,iCell) = dl*sint2/area_cell
+            deformation_coef_cs(i,iCell) = dl*sint_cost/area_cell
+            deformation_coef_c(i,iCell) = dl*cos(thetat(i))/area_cell
+            deformation_coef_s(i,iCell) = dl*sin(thetat(i))/area_cell
             if (cellsOnEdge(1,EdgesOnCell(i,iCell)) /= iCell) then
                defc_a(i,iCell) = - defc_a(i,iCell)
                defc_b(i,iCell) = - defc_b(i,iCell)
+               deformation_coef_c2(i,iCell) = - deformation_coef_c2(i,iCell)
+               deformation_coef_s2(i,iCell) = - deformation_coef_s2(i,iCell)
+               deformation_coef_cs(i,iCell) = - deformation_coef_cs(i,iCell)
+               deformation_coef_c(i,iCell) = - deformation_coef_c(i,iCell)
+               deformation_coef_s(i,iCell) = - deformation_coef_s(i,iCell)
             end if
  
          end do

From b7880ce698cc608d17609505e258b3afbf6032af Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.ib0.cheyenne.ucar.edu>
Date: Fri, 20 Dec 2019 16:32:22 -0700
Subject: [PATCH 003/146] add pointers for new coefficients

---
 src/core_init_atmosphere/mpas_atm_advection.F | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/core_init_atmosphere/mpas_atm_advection.F b/src/core_init_atmosphere/mpas_atm_advection.F
index 59edf3b843..7e04564fa0 100644
--- a/src/core_init_atmosphere/mpas_atm_advection.F
+++ b/src/core_init_atmosphere/mpas_atm_advection.F
@@ -787,6 +787,11 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
       call mpas_pool_get_array(mesh, 'defc_b', defc_b)
       call mpas_pool_get_array(mesh, 'cell_gradient_coef_x', cell_gradient_coef_x)
       call mpas_pool_get_array(mesh, 'cell_gradient_coef_y', cell_gradient_coef_y)
+      call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2)
+      call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2)
+      call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs)
+      call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c)
+      call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s)
       call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell)
       call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)
       call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell)

From 235b61ce726b41daf4572d3da1d3a71463a6eeb1 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne1.ib0.cheyenne.ucar.edu>
Date: Mon, 23 Dec 2019 15:07:40 -0700
Subject: [PATCH 004/146] add initialization of new arrays to zero

---
 src/core_init_atmosphere/mpas_atm_advection.F | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/core_init_atmosphere/mpas_atm_advection.F b/src/core_init_atmosphere/mpas_atm_advection.F
index 7e04564fa0..7a954cd630 100644
--- a/src/core_init_atmosphere/mpas_atm_advection.F
+++ b/src/core_init_atmosphere/mpas_atm_advection.F
@@ -806,6 +806,11 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
 
       defc_a(:,:) = 0.
       defc_b(:,:) = 0.
+      deformation_coef_c2(:,:) = 0.
+      deformation_coef_s2(:,:) = 0.
+      deformation_coef_cs(:,:) = 0.
+      deformation_coef_c(:,:) = 0.
+      deformation_coef_s(:,:) = 0.
 
       cell_gradient_coef_x(:,:) = 0.
       cell_gradient_coef_y(:,:) = 0.

From cf17fae8fcd6986ffcc45babcbd8be70339f9b0b Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne1.ib0.cheyenne.ucar.edu>
Date: Mon, 23 Dec 2019 15:29:57 -0700
Subject: [PATCH 005/146] pass new arrays to dyn_tend_work - compiles

---
 .../dynamics/mpas_atm_time_integration.F              | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 4fe2faefc4..4aa46b4879 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4810,6 +4810,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       real (kind=RKIND), dimension(:,:), pointer :: ur_cell, vr_cell
 
       real (kind=RKIND), dimension(:,:), pointer :: defc_a, defc_b
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2, deformation_coef_s2, deformation_coef_cs
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c, deformation_coef_s
 
       real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy
 
@@ -4905,6 +4907,11 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_array(mesh, 'angleEdge', angleEdge)
       call mpas_pool_get_array(mesh, 'defc_a', defc_a)
       call mpas_pool_get_array(mesh, 'defc_b', defc_b)
+      call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2)
+      call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2)
+      call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs)
+      call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c)
+      call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s)
       call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2)
       call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4)
       call mpas_pool_get_array(mesh, 'u_init', u_init)
@@ -4966,6 +4973,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
          cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, &
          latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
          rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
+         deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
          tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_del4u_div_factor, &
          config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
          config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, &
@@ -4990,6 +4998,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, &
       latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
       rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
+      deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
       tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_del4u_div_factor, &
       config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
       config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, &
@@ -5098,6 +5107,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
       real (kind=RKIND), dimension(maxEdges,nCells+1) :: defc_a
       real (kind=RKIND), dimension(maxEdges,nCells+1) :: defc_b
+      real (kind=RKIND), dimension(maxEdges,nCells+1) :: deformation_coef_c2, deformation_coef_s2, deformation_coef_cs
+      real (kind=RKIND), dimension(maxEdges,nCells+1) :: deformation_coef_c, deformation_coef_s
 
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: tend_w_pgf
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: tend_w_buoy

From bf910d67d2934316b6339a69f3b54b1549f0cbaf Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Thu, 4 Dec 2025 13:52:43 -0700
Subject: [PATCH 006/146] add deformation formulation to 2d Smagorinsky kdiff

---
 .../dynamics/mpas_atm_time_integration.F      | 24 ++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 4aa46b4879..b1e0241cc3 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5147,6 +5147,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension( nVertLevels+1 ) :: d_diag, d_off_diag, flux_arr
       real (kind=RKIND), dimension( nVertLevels + 1 ) :: wduz, wdwz, wdtz, dpzx
       real (kind=RKIND), dimension( nVertLevels ) :: ru_edge_w, q, u_mix
+
+      real (kind=RKIND), dimension( nVertLevels+1 ) :: d_11, d_22, d_12
+      real (kind=RKIND), dimension( nVertLevels+1 ) :: dudx, dudy, dvdx, dvdy
+
       real (kind=RKIND) :: theta_turb_flux, w_turb_flux, r
       real (kind=RKIND) :: scalar_weight
       real (kind=RKIND) :: inv_r_earth
@@ -5246,6 +5250,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                   d_off_diag(k) = 0.0_RKIND
                end do
 
+               dudx(1:nVertLevels) = 0.0
+               dudy(1:nVertLevels) = 0.0
+               dvdx(1:nVertLevels) = 0.0
+               dvdy(1:nVertLevels) = 0.0
+
                !$acc loop seq
                do iEdge=1,nEdgesOnCell(iCell)
                   !$acc loop vector
@@ -5254,6 +5263,14 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                                    - defc_b(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
                      d_off_diag(k) = d_off_diag(k) + defc_b(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell))  &
                                                    + defc_a(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+                     dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+                                       - deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+                     dudy(k) = dudy(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+                                       - deformation_coef_s2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+                     dvdx(k) = dvdx(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+                                       + deformation_coef_c2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+                     dvdy(k) = dvdy(k) + deformation_coef_s2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+                                       + deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
                   end do
                end do
 !DIR$ IVDEP
@@ -5261,7 +5278,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                do k=1, nVertLevels
                   ! here is the Smagorinsky formulation, 
                   ! followed by imposition of an upper bound on the eddy viscosity
-                  kdiff(k,iCell) = min((c_s * config_len_disp)**2 * sqrt(d_diag(k)**2 + d_off_diag(k)**2),(0.01*config_len_disp**2) * invDt)
+              !   kdiff(k,iCell) = min((c_s * config_len_disp)**2 * sqrt(d_diag(k)**2 + d_off_diag(k)**2),(0.01*config_len_disp**2) * invDt)
+                  ! deformation formulation
+                  d_11(k) = 2*dudx(k)
+                  d_22(k) = 2*dvdy(k)
+                  d_12(k) = dudy(k) + dvdx(k)
+                  kdiff(k,iCell) = min((c_s * config_len_disp)**2 * sqrt(0.25*(d_11(k)-d_22(k))**2 + d_12(k)**2),(0.01*config_len_disp**2) * invDt)
                end do
             end do
             !$acc end parallel

From 31a507c27690953c26ff409dfa7bcff605b0beba Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Fri, 27 Dec 2019 12:47:26 -0700
Subject: [PATCH 007/146] bug fixes for computing the deformation coefficients
 for the 2D Smagorinsky scheme on Cartesian planes. This also fixes
 periodicity on those planes for the coefficients.

---
 src/core_init_atmosphere/mpas_atm_advection.F | 66 ++++++++++++++++---
 1 file changed, 58 insertions(+), 8 deletions(-)

diff --git a/src/core_init_atmosphere/mpas_atm_advection.F b/src/core_init_atmosphere/mpas_atm_advection.F
index 7a954cd630..585f5443ef 100644
--- a/src/core_init_atmosphere/mpas_atm_advection.F
+++ b/src/core_init_atmosphere/mpas_atm_advection.F
@@ -11,6 +11,7 @@ module atm_advection
    use mpas_derived_types
    use mpas_pool_routines
    use mpas_constants
+   use mpas_vector_operations
    use mpas_abort, only : mpas_dmpar_global_abort
    use mpas_log, only : mpas_log_write
 
@@ -764,6 +765,7 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
       integer, dimension(:), pointer :: nEdgesOnCell
       real (kind=RKIND), dimension(:), pointer :: xCell, yCell, zCell
       real (kind=RKIND), dimension(:), pointer :: xVertex, yVertex, zVertex
+      real (kind=RKIND), dimension(:), pointer :: xEdge, yEdge, zEdge
 
       real (kind=RKIND), dimension(nCells) :: theta_abs
 
@@ -774,14 +776,22 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
       integer :: iCell
       real (kind=RKIND) :: pii
       real (kind=RKIND), dimension(25) :: xp, yp
+      real (kind=RKIND) :: xe, ye
+
       
       real (kind=RKIND) :: length_scale
       integer, dimension(25) :: cell_list
 
-      integer :: iv
+      integer :: iv, ie
       logical :: do_the_cell
       real (kind=RKIND) :: area_cell, sint2, cost2, sint_cost, dx, dy
 
+      logical, pointer :: is_periodic
+      real(kind=RKIND), pointer :: x_period, y_period
+
+      call mpas_pool_get_config(mesh, 'is_periodic', is_periodic)
+      call mpas_pool_get_config(mesh, 'x_period', x_period)
+      call mpas_pool_get_config(mesh, 'y_period', y_period)
 
       call mpas_pool_get_array(mesh, 'defc_a', defc_a)
       call mpas_pool_get_array(mesh, 'defc_b', defc_b)
@@ -803,6 +813,9 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
       call mpas_pool_get_array(mesh, 'xVertex', xVertex)
       call mpas_pool_get_array(mesh, 'yVertex', yVertex)
       call mpas_pool_get_array(mesh, 'zVertex', zVertex)
+      call mpas_pool_get_array(mesh, 'xEdge', xEdge)
+      call mpas_pool_get_array(mesh, 'yEdge', yEdge)
+      call mpas_pool_get_array(mesh, 'zEdge', zEdge)
 
       defc_a(:,:) = 0.
       defc_b(:,:) = 0.
@@ -900,21 +913,58 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
 
          else     ! On an x-y plane
 
-            theta_abs(iCell) = 0.0
+            do i=1,n-1
+               iv = verticesOnCell(i,iCell)
+               xp(i) = mpas_fix_periodicity(xVertex(iv),xCell(iCell),x_period) - xCell(iCell)
+               yp(i) = mpas_fix_periodicity(yVertex(iv),yCell(iCell),y_period) - yCell(iCell)
+            end do
 
-            xp(1) = xCell(iCell)
-            yp(1) = yCell(iCell)
+            ! if(iCell.lt.11) then
+            !  call mpas_log_write(' setting defc coefs, cell $i', intArgs=(/iCell/))
+            !  do i=1,n-1
+            !    iv = verticesOnCell(i,iCell)
+            !    call mpas_log_write(' xp,yp,xvc,yvc, $r $r $r $r', realArgs=(/xp(i),yp(i),xVertex(iv)-xCell(iCell),yVertex(iv)-yCell(iCell)/))
+            !  end do
+            ! end if
 
-            do i=2,n
-               iv = verticesOnCell(i-1,iCell)
-               xp(i) = xVertex(iv)
-               yp(i) = yVertex(iv)
+            do i=1,n-1
+              ie = edgesOnCell(i,iCell)
+              xe = mpas_fix_periodicity(xEdge(ie),xCell(iCell),x_period) - xCell(iCell)
+              ye = mpas_fix_periodicity(yEdge(ie),yCell(iCell),y_period) - yCell(iCell)
+              thetat(i) = atan2(ye,xe)
             end do
 
+            ! if(iCell .lt. 11) then
+            !   call mpas_log_write(' edge angles, plane calc, cell $i', intArgs=(/iCell/))
+            !   do i=1,n-1
+            !     call mpas_log_write(' edge angle $r', realArgs=(/thetat(i)*180./3.1415926/))
+            !   end do
+            ! end if
+
+            theta_abs(iCell) = thetat(1)
+
          end if
 
          ! (1) compute cell area on the tangent plane used in the integrals
          ! (2) compute angle of cell edge normal vector.  here we are repurposing thetat
+         thetat(1) = theta_abs(iCell)
+
+         do i=2,n-1
+            ip1 = i+1
+            if (ip1 == n) ip1 = 1
+            thetat(i) = plane_angle( 0.0_RKIND, 0.0_RKIND, 0.0_RKIND,  &
+                                     xp(i)-xp(i-1), yp(i)-yp(i-1), 0.0_RKIND,  &
+                                     xp(ip1)-xp(i), yp(ip1)-yp(i), 0.0_RKIND,  &
+                                     0.0_RKIND, 0.0_RKIND, 1.0_RKIND)
+            thetat(i) = thetat(i) + thetat(i-1)
+         end do
+
+            if(iCell .lt. 11) then
+              call mpas_log_write(' edge angles, generic calc, cell $i', intArgs=(/iCell/))
+              do i=1,n-1
+                call mpas_log_write(' edge angle $r', realArgs=(/thetat(i)*180./3.1415926/))
+              end do
+            end if
 
          area_cell = 0.
          do i=1,n-1

From a4c08d4a85fb719bbc03af26491ec4183a5c5fde Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Mon, 30 Dec 2019 12:26:47 -0700
Subject: [PATCH 008/146] Fixed error in w_x, w_y deformation coefficients.
 Added unit test for deformation coefficients on Cartesian-plane meshes.

---
 src/core_init_atmosphere/mpas_atm_advection.F | 243 +++++++++++++++++-
 1 file changed, 234 insertions(+), 9 deletions(-)

diff --git a/src/core_init_atmosphere/mpas_atm_advection.F b/src/core_init_atmosphere/mpas_atm_advection.F
index 585f5443ef..3b487f3797 100644
--- a/src/core_init_atmosphere/mpas_atm_advection.F
+++ b/src/core_init_atmosphere/mpas_atm_advection.F
@@ -765,7 +765,8 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
       integer, dimension(:), pointer :: nEdgesOnCell
       real (kind=RKIND), dimension(:), pointer :: xCell, yCell, zCell
       real (kind=RKIND), dimension(:), pointer :: xVertex, yVertex, zVertex
-      real (kind=RKIND), dimension(:), pointer :: xEdge, yEdge, zEdge
+      real (kind=RKIND), dimension(:), pointer :: xEdge, yEdge, zEdge, angleEdge
+      real (kind=RKIND), dimension(:), pointer :: areaCell
 
       real (kind=RKIND), dimension(nCells) :: theta_abs
 
@@ -784,7 +785,11 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
 
       integer :: iv, ie
       logical :: do_the_cell
+<<<<<<< HEAD
       real (kind=RKIND) :: area_cell, sint2, cost2, sint_cost, dx, dy
+=======
+      real (kind=RKIND) :: area_cell, sint2, cost2, sint_cost
+>>>>>>> 94ba9031b (Fixed error in w_x, w_y deformation coefficients.)
 
       logical, pointer :: is_periodic
       real(kind=RKIND), pointer :: x_period, y_period
@@ -816,6 +821,8 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
       call mpas_pool_get_array(mesh, 'xEdge', xEdge)
       call mpas_pool_get_array(mesh, 'yEdge', yEdge)
       call mpas_pool_get_array(mesh, 'zEdge', zEdge)
+      call mpas_pool_get_array(mesh, 'angleEdge', angleEdge)
+      call mpas_pool_get_array(mesh, 'areaCell', areaCell)
 
       defc_a(:,:) = 0.
       defc_b(:,:) = 0.
@@ -959,12 +966,12 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
             thetat(i) = thetat(i) + thetat(i-1)
          end do
 
-            if(iCell .lt. 11) then
-              call mpas_log_write(' edge angles, generic calc, cell $i', intArgs=(/iCell/))
-              do i=1,n-1
-                call mpas_log_write(' edge angle $r', realArgs=(/thetat(i)*180./3.1415926/))
-              end do
-            end if
+         !   if(iCell .lt. 11) then
+         !     call mpas_log_write(' edge angles, generic calc, cell $i', intArgs=(/iCell/))
+         !     do i=1,n-1
+         !       call mpas_log_write(' edge angle $r', realArgs=(/thetat(i)*180./3.1415926/))
+         !     end do
+         !   end if
 
          area_cell = 0.
          do i=1,n-1
@@ -1000,14 +1007,232 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
                deformation_coef_c2(i,iCell) = - deformation_coef_c2(i,iCell)
                deformation_coef_s2(i,iCell) = - deformation_coef_s2(i,iCell)
                deformation_coef_cs(i,iCell) = - deformation_coef_cs(i,iCell)
-               deformation_coef_c(i,iCell) = - deformation_coef_c(i,iCell)
-               deformation_coef_s(i,iCell) = - deformation_coef_s(i,iCell)
+!               deformation_coef_c(i,iCell) = - deformation_coef_c(i,iCell)
+!               deformation_coef_s(i,iCell) = - deformation_coef_s(i,iCell)
             end if
  
          end do
 
       end do
 
+      call atm_init_test_coefs( deformation_coef_c2, deformation_coef_s2, &
+                                deformation_coef_cs, deformation_coef_c,  &
+                                deformation_coef_s,                       &
+                                is_periodic, on_a_sphere,                 &
+                                x_period, y_period,                       &
+                                xEdge, yEdge, zEdge,                      &
+                                xCell, yCell, zCell, nCells,              &
+                                angleEdge, nEdgesOnCell, edgesOnCell )
+
+
    end subroutine atm_initialize_deformation_weights
 
  end module atm_advection
+!-----------------------
+
+      subroutine atm_init_test_coefs( deformation_coef_c2, deformation_coef_s2, &
+                                      deformation_coef_cs, deformation_coef_c,  &
+                                      deformation_coef_s,                       &
+                                      is_periodic, on_a_sphere,                 &
+                                      x_period, y_period,                       &
+                                      xEdge, yEdge, zEdge,                      &
+                                      xCell, yCell, zCell, nCells,              &
+                                      angleEdge, nEdgesOnCell, edgesOnCell )
+
+      implicit none
+
+      logical :: is_periodic, on_a_sphere
+      integer :: nCells
+      integer, dimension(:) :: nEdgesOnCell
+      real (kind=RKIND) :: x_period, y_period
+      real (kind=RKIND), dimension(:,:) :: deformation_coef_c2, deformation_coef_s2
+      real (kind=RKIND), dimension(:,:) :: deformation_coef_cs
+      real (kind=RKIND), dimension(:,:) :: deformation_coef_c, deformation_coef_s
+      integer, dimension(:,:) :: edgesOnCell
+      real (kind=RKIND), dimension(:) :: angleEdge, xEdge, yEdge, zEdge
+      real (kind=RKIND), dimension(:) :: xCell, yCell, zCell
+
+      ! local variables
+
+      integer :: iCell, iEdge, ie
+      real (kind=RKIND) :: cos_edge, sin_edge, ux, uy, vx, vy, wx, wy
+      real (kind=RKIND) :: xc, yc, xe, ye
+      real (kind=RKIND) :: angle_e, ue, ve, we, e_int
+      real (kind=RKIND) :: dudx, dudy, dvdx, dvdy, dwdx, dwdy
+      real (kind=RKIND) :: dudx_c, dudy_c, dvdx_c, dvdy_c, dwdx_c, dwdy_c
+
+      real (kind=RKIND) :: dudx_err_max, dudy_err_max, dvdx_err_max, dvdy_err_max, dwdx_err_max, dwdy_err_max
+      real (kind=RKIND) :: dudx_err_tot, dudy_err_tot, dvdx_err_tot, dvdy_err_tot, dwdx_err_tot, dwdy_err_tot
+      real (kind=RKIND) :: dudx_max, dudy_max, dvdx_max, dvdy_max, dwdx_max, dwdy_max
+
+      real (kind=RKIND) :: ang
+      real (kind=RKIND), parameter :: x_vel= 1.0, y_vel=1.0, w_vel=1.0
+      real (kind=RKIND) :: u_edge, v_edge, w_edge, x, y, angle, xl, yl
+      real (kind=RKIND) :: dudx_cell, dudy_cell, dvdx_cell, dvdy_cell, dwdx_cell, dwdy_cell
+
+      !  Test tunction definitions
+      !
+      !  here are the velocity field functions and their derivatives.
+      !  First a simple test: U = x_vel*(-x+y), V = y_vel * (-x+y), W = w_vel*(-x+y)
+
+      u_edge(x,y,ang,xl,yl) =  (x_vel*(x+y)) * cos(ang) + (y_vel * (x+y) * sin(ang))
+      v_edge(x,y,ang,xl,yl) = -(x_vel*(x+y)) * sin(ang) + (y_vel * (x+y) * cos(ang))
+      w_edge(x,y,xl,yl) = w_vel * (x+y)
+
+      dudx_cell(x,y,xl,yl) = x_vel
+      dudy_cell(x,y,xl,yl) = x_vel
+      dvdx_cell(x,y,xl,yl) = y_vel
+      dvdy_cell(x,y,xl,yl) = y_vel
+      dwdx_cell(x,y,xl,yl) = w_vel
+      dwdy_cell(x,y,xl,yl) = w_vel
+
+      ! -----------------
+
+      if ( (.not. on_a_sphere) .and. (is_periodic) ) then  ! test is for doubly-periodic Cartesian plane only
+
+      dudx_err_max = 0.
+      dudy_err_max = 0.
+      dvdx_err_max = 0.
+      dvdy_err_max = 0.
+      dwdx_err_max = 0.
+      dwdy_err_max = 0.
+
+      dudx_err_tot = 0.
+      dudy_err_tot = 0.
+      dvdx_err_tot = 0.
+      dvdy_err_tot = 0.
+      dwdx_err_tot = 0.
+      dwdy_err_tot = 0.
+
+      dudx_max = 0.
+      dudy_max = 0.
+      dvdx_max = 0.
+      dvdy_max = 0.
+      dwdx_max = 0.
+      dwdy_max = 0.
+
+      do iCell = 1, nCells
+
+        dudx = 0.
+        dudy = 0.
+        dvdx = 0.
+        dvdy = 0.
+        dwdx = 0.
+        dwdy = 0.
+
+        xc = xCell(iCell)
+        yc = yCell(iCell)
+
+        dudx_c = dudx_cell(xc,yc,x_period,y_period)
+        dudy_c = dudy_cell(xc,yc,x_period,y_period)
+        dvdx_c = dvdx_cell(xc,yc,x_period,y_period)
+        dvdy_c = dvdy_cell(xc,yc,x_period,y_period)
+        dwdx_c = dwdx_cell(xc,yc,x_period,y_period)
+        dwdy_c = dwdy_cell(xc,yc,x_period,y_period)
+
+        do iEdge = 1, nEdgesOnCell(iCell)
+
+          ie = edgesOnCell(iEdge,iCell)
+          angle_e = angleEdge(ie)
+          xe = xEdge(ie)
+          ye = yEdge(ie)
+
+          xe = mpas_fix_periodicity(xe,xc,x_period)
+          ye = mpas_fix_periodicity(ye,yc,y_period)
+
+          ue = u_edge(xe,ye,angle_e,x_period,y_period)
+          ve = v_edge(xe,ye,angle_e,x_period,y_period)
+          we = w_edge(xe,ye,x_period,y_period)
+
+          dudx = dudx + deformation_coef_c2(iEdge,iCell)*ue &
+                      - deformation_coef_cs(iEdge,iCell)*ve
+          dudy = dudy + deformation_coef_cs(iEdge,iCell)*ue &
+                      - deformation_coef_s2(iEdge,iCell)*ve
+          dvdx = dvdx + deformation_coef_cs(iEdge,iCell)*ue &
+                      + deformation_coef_c2(iEdge,iCell)*ve
+          dvdy = dvdy + deformation_coef_s2(iEdge,iCell)*ue &
+                      + deformation_coef_cs(iEdge,iCell)*ve
+
+          dwdx = dwdx + deformation_coef_c(iEdge,iCell)*we
+          dwdy = dwdy + deformation_coef_s(iEdge,iCell)*we
+
+        end do
+
+        ! call mpas_log_write(' u_x, u_y, $r, $r ', realArgs=(/dudx, dudy/))
+        ! call mpas_log_write(' v_x, v_y, $r, $r ', realArgs=(/dvdx, dvdy/))
+        ! call mpas_log_write(' w_x, w_y, $r, $r ', realArgs=(/dwdx, dwdy/))
+
+        ! check result for cell          
+
+        e_int = abs(dudx_c - dudx)
+        dudx_err_tot = dudx_err_tot + e_int
+        dudx_err_max = max(dudx_err_max, e_int) 
+
+        e_int = abs(dudy_c - dudy)
+        dudy_err_tot = dudy_err_tot + e_int
+        dudy_err_max = max(dudy_err_max, e_int) 
+
+        e_int = abs(dvdx_c - dvdx)
+        dvdx_err_tot = dvdx_err_tot + e_int
+        dvdx_err_max = max(dvdx_err_max, e_int) 
+
+        e_int = abs(dvdy_c - dvdy)
+        dvdy_err_tot = dvdy_err_tot + e_int
+        dvdy_err_max = max(dvdy_err_max, e_int) 
+
+        e_int = abs(dwdx_c - dwdx)
+        dwdx_err_tot = dwdx_err_tot + e_int
+        dwdx_err_max = max(dwdx_err_max, e_int) 
+
+        e_int = abs(dwdy_c - dwdy)
+        dwdy_err_tot = dwdy_err_tot + e_int
+        dwdy_err_max = max(dwdy_err_max, e_int) 
+
+        dudx_max = max(dudx_max, abs(dudx_c))
+        dudy_max = max(dudy_max, abs(dudy_c))
+        dvdx_max = max(dvdx_max, abs(dvdx_c))
+        dvdy_max = max(dvdy_max, abs(dvdy_c))
+        dwdx_max = max(dwdx_max, abs(dwdx_c))
+        dwdy_max = max(dwdy_max, abs(dwdy_c))
+
+      end do
+
+      ! scale errors
+
+      dudx_err_max = dudx_err_max/dudx_max
+      dudy_err_max = dudy_err_max/dudy_max
+      dvdx_err_max = dvdx_err_max/dvdx_max
+      dvdy_err_max = dvdy_err_max/dvdy_max
+      dwdx_err_max = dwdx_err_max/dwdx_max
+      dwdy_err_max = dwdy_err_max/dwdy_max
+
+      dudx_err_tot = dudx_err_tot/dudx_max/real(nCells)
+      dudy_err_tot = dudy_err_tot/dudy_max/real(nCells)
+      dvdx_err_tot = dvdx_err_tot/dvdx_max/real(nCells)
+      dvdy_err_tot = dvdy_err_tot/dvdy_max/real(nCells)
+      dwdx_err_tot = dwdx_err_tot/dwdx_max/real(nCells)
+      dwdy_err_tot = dwdy_err_tot/dwdy_max/real(nCells)
+
+      ! output 
+
+      call mpas_log_write('  ')
+      call mpas_log_write(' deformation coefficients check ')
+      call mpas_log_write(' dudx check, max abs(dudx), max and avg error  $r, $r, $r', &
+                          realArgs=(/dudx_max, dudx_err_max, dudx_err_tot/))
+      call mpas_log_write(' dudy check, max abs(dudy), max and avg error  $r, $r, $r', &
+                          realArgs=(/dudy_max, dudy_err_max, dudy_err_tot/))
+      call mpas_log_write(' dvdx check, max abs(dvdx), max and avg error  $r, $r, $r', &
+                          realArgs=(/dvdx_max, dvdx_err_max, dvdx_err_tot/))
+      call mpas_log_write(' dvdy check, max abs(dvdy), max and avg error  $r, $r, $r', &
+                          realArgs=(/dvdy_max, dvdy_err_max, dvdy_err_tot/))
+      call mpas_log_write(' dwdx check, max abs(dwdx), max and avg error  $r, $r, $r', &
+                          realArgs=(/dwdx_max, dwdx_err_max, dwdx_err_tot/))
+      call mpas_log_write(' dwdy check, max abs(dwdy), max and avg error  $r, $r, $r', &
+                          realArgs=(/dwdy_max, dwdy_err_max, dwdy_err_tot/))
+      call mpas_log_write('  ')
+
+      end if
+
+      end subroutine atm_init_test_coefs
+
+end module atm_advection

From 4fbeab4ff8213c6b545ff6c34d31a4c63b8b2e7e Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Tue, 31 Dec 2019 10:50:15 -0700
Subject: [PATCH 009/146] New module file for the dissipation routines. The
 algorithm for the 2D Smagorinsky eddy viscosity coefficients are implemented
 in this module.  Others to follow.

---
 .../dynamics/mpas_atm_dissipation_models.F    | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
new file mode 100644
index 0000000000..c64fd90fa3
--- /dev/null
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -0,0 +1,74 @@
+! Copyright (c) 2013,  Los Alamos National Security, LLC (LANS)
+! and the University Corporation for Atmospheric Research (UCAR).
+!
+! Unless noted otherwise source code is licensed under the BSD license.
+! Additional copyright and license information can be found in the LICENSE file
+! distributed with this code, or at http://mpas-dev.github.com/license.html
+!
+
+module mpas_atm_dissipation_models
+
+  use mpas_kind_types, only : RKIND
+
+    contains
+
+      subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,                                 &
+                                 deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
+                                 invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
+                                 cellStart, cellEnd, nEdgesOnCell, edgesOnCell,                     &
+                                 nCells, nEdges, nVertLevels, maxEdges                             )
+      implicit none
+      
+      integer, intent(in) :: cellStart, cellEnd, nCells, nEdges, nVertLevels, maxEdges
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_c2
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_s2
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_cs
+      real (kind=RKIND), intent(in) :: c_s, config_len_disp, invDt, config_visc4_2dsmag
+      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+      integer, dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell
+
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: kdiff
+      real (kind=RKIND), intent(out) :: h_mom_eddy_visc4, h_theta_eddy_visc4
+
+      ! local variables
+
+      integer :: iCell, iEdge, k
+      real (kind=RKIND), dimension(nVertLevels) :: d_11, d_22, d_12, dudx, dudy, dvdx, dvdy
+
+            do iCell = cellStart,cellEnd
+               dudx(1:nVertLevels) = 0.0
+               dudy(1:nVertLevels) = 0.0
+               dvdx(1:nVertLevels) = 0.0
+               dvdy(1:nVertLevels) = 0.0
+               do iEdge=1,nEdgesOnCell(iCell)
+                  do k=1,nVertLevels
+                     dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+                                       - deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+                     dudy(k) = dudy(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+                                       - deformation_coef_s2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+                     dvdx(k) = dvdx(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+                                       + deformation_coef_c2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+                     dvdy(k) = dvdy(k) + deformation_coef_s2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+                                       + deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+                  end do
+               end do
+!DIR$ IVDEP
+               do k=1, nVertLevels
+                  ! here is the Smagorinsky formulation, 
+                  ! followed by imposition of an upper bound on the eddy viscosity
+                  d_11(k) = 2*dudx(k)
+                  d_22(k) = 2*dvdy(k)
+                  d_12(k) = dudy(k) + dvdx(k)
+                  kdiff(k,iCell) = (c_s * config_len_disp)**2 * sqrt(0.25*(d_11(k)-d_22(k))**2 + d_12(k)**2)
+                  kdiff(k,iCell) = min(kdiff(k,iCell),(0.01*config_len_disp**2) * invDt)
+               end do
+            end do
+
+            h_mom_eddy_visc4   = config_visc4_2dsmag * config_len_disp**3
+            h_theta_eddy_visc4 = h_mom_eddy_visc4
+
+      end subroutine smagorinsky_2d
+
+end module mpas_atm_dissipation_models

From bf5e369d941fae9323c95cbc3ab0a403d3937a1b Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Thu, 4 Dec 2025 13:55:05 -0700
Subject: [PATCH 010/146] (1) Changes to the Makefile to compile new
 dissipation module. (2) Changes in atm_compute_dyn_tend to call dissipation
 module subroutine to compute 2D Smagorinsky eddy viscosity.

---
 src/core_atmosphere/dynamics/Makefile         |  5 +-
 .../dynamics/mpas_atm_time_integration.F      | 58 +++----------------
 2 files changed, 10 insertions(+), 53 deletions(-)

diff --git a/src/core_atmosphere/dynamics/Makefile b/src/core_atmosphere/dynamics/Makefile
index 6892633c68..1bf21fff9d 100644
--- a/src/core_atmosphere/dynamics/Makefile
+++ b/src/core_atmosphere/dynamics/Makefile
@@ -1,11 +1,12 @@
 .SUFFIXES: .F .o
 
 OBJS = mpas_atm_time_integration.o \
-       mpas_atm_boundaries.o
+       mpas_atm_boundaries.o \
+       mpas_atm_dissipation_models.o
 
 all: $(OBJS)
 
-mpas_atm_time_integration.o: mpas_atm_boundaries.o mpas_atm_iau.o
+mpas_atm_time_integration.o: mpas_atm_boundaries.o mpas_atm_iau.o mpas_atm_dissipation_models.o
 
 mpas_atm_boundaries.o:
 
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index b1e0241cc3..c2aa4cf94f 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -36,6 +36,7 @@ module atm_time_integration
    use mpas_atm_boundaries, only : nSpecZone, nRelaxZone, nBdyZone, mpas_atm_get_bdy_state, mpas_atm_get_bdy_tend  ! regional_MPAS addition
    
    use mpas_atm_iau  
+   use mpas_atm_dissipation_models
 
    !
    ! Abstract interface for routine used to communicate halos of fields
@@ -5236,60 +5237,15 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          !$acc end parallel
 
          ! Smagorinsky eddy viscosity, based on horizontal deformation (in this case on model coordinate surfaces).
-         ! The integration coefficients were precomputed and stored in defc_a and defc_b
+         ! The integration coefficients were precomputed and stored in deformation_coef_*
 
          if(config_horiz_mixing == "2d_smagorinsky") then
 
-            !$acc parallel default(present)
-            !$acc loop gang worker private(d_diag, d_off_diag)
-            do iCell = cellStart,cellEnd
-
-               !$acc loop vector
-               do k = 1, nVertLevels
-                  d_diag(k) = 0.0_RKIND
-                  d_off_diag(k) = 0.0_RKIND
-               end do
-
-               dudx(1:nVertLevels) = 0.0
-               dudy(1:nVertLevels) = 0.0
-               dvdx(1:nVertLevels) = 0.0
-               dvdy(1:nVertLevels) = 0.0
-
-               !$acc loop seq
-               do iEdge=1,nEdgesOnCell(iCell)
-                  !$acc loop vector
-                  do k=1,nVertLevels
-                     d_diag(k)     = d_diag(k)     + defc_a(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell))  &
-                                                   - defc_b(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
-                     d_off_diag(k) = d_off_diag(k) + defc_b(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell))  &
-                                                   + defc_a(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
-                     dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
-                                       - deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
-                     dudy(k) = dudy(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
-                                       - deformation_coef_s2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
-                     dvdx(k) = dvdx(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
-                                       + deformation_coef_c2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
-                     dvdy(k) = dvdy(k) + deformation_coef_s2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
-                                       + deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
-                  end do
-               end do
-!DIR$ IVDEP
-               !$acc loop vector
-               do k=1, nVertLevels
-                  ! here is the Smagorinsky formulation, 
-                  ! followed by imposition of an upper bound on the eddy viscosity
-              !   kdiff(k,iCell) = min((c_s * config_len_disp)**2 * sqrt(d_diag(k)**2 + d_off_diag(k)**2),(0.01*config_len_disp**2) * invDt)
-                  ! deformation formulation
-                  d_11(k) = 2*dudx(k)
-                  d_22(k) = 2*dvdy(k)
-                  d_12(k) = dudy(k) + dvdx(k)
-                  kdiff(k,iCell) = min((c_s * config_len_disp)**2 * sqrt(0.25*(d_11(k)-d_22(k))**2 + d_12(k)**2),(0.01*config_len_disp**2) * invDt)
-               end do
-            end do
-            !$acc end parallel
-
-            h_mom_eddy_visc4   = config_visc4_2dsmag * config_len_disp**3
-            h_theta_eddy_visc4 = h_mom_eddy_visc4
+            call smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,                                 &
+                                 deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
+                                 invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
+                                 cellStart, cellEnd, nEdgesOnCell, edgesOnCell,                     &
+                                 nCells, nEdges, nVertLevels, maxEdges                             )
 
          else if(config_horiz_mixing == "2d_fixed") then
 

From 597f74f0bddf7ddd17386fb29d1b3a917f7ed1e8 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Tue, 31 Dec 2019 11:11:04 -0700
Subject: [PATCH 011/146] Modifications to use mpas_atm_dimensions in the
 dissipation module so that compile time specification of Nvertlevels and
 maxEdges is enabled.

---
 src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F | 7 +++++--
 src/core_atmosphere/dynamics/mpas_atm_time_integration.F   | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index c64fd90fa3..7b8457d927 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -16,10 +16,13 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
                                  deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
                                  invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
                                  cellStart, cellEnd, nEdgesOnCell, edgesOnCell,                     &
-                                 nCells, nEdges, nVertLevels, maxEdges                             )
+                                 nCells, nEdges                                                    )
+
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here
+
       implicit none
       
-      integer, intent(in) :: cellStart, cellEnd, nCells, nEdges, nVertLevels, maxEdges
+      integer, intent(in) :: cellStart, cellEnd, nCells, nEdges
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v
       real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_c2
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index c2aa4cf94f..0ec79e0800 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5245,7 +5245,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                  deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
                                  invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
                                  cellStart, cellEnd, nEdgesOnCell, edgesOnCell,                     &
-                                 nCells, nEdges, nVertLevels, maxEdges                             )
+                                 nCells, nEdges                                                    )
 
          else if(config_horiz_mixing == "2d_fixed") then
 

From b454527bc22d6425241273219ab11c000d15bb65 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Thu, 4 Dec 2025 14:00:37 -0700
Subject: [PATCH 012/146] Adding configuration string for the les models in the
 Registry. Made string available in atm_compute_dyn_tend and set up logic to
 allow for different dissipation options that now include the les models.

---
 src/core_atmosphere/Registry.xml              |  5 ++
 .../dynamics/mpas_atm_time_integration.F      | 51 ++++++++++++-------
 2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 2ae703653d..94e6b43f22 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -146,6 +146,11 @@
                      description="Formulation of horizontal mixing"
                      possible_values="`2d_fixed' or `2d_smagorinsky'"/>
 
+                <nml_option name="config_les_model" type="character" default_value="none"
+                     units="-"
+                     description="LES dissipation model"
+                     possible_values="`none'' or `3d_smagorinsky'"/>
+
                 <nml_option name="config_len_disp" type="real" default_value="0.0" in_defaults="false"
                      units="m"
                      description="Horizontal length scale, used by the Smagorinsky formulation of horizontal diffusion and by 3-d divergence damping"
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 0ec79e0800..97f3d85e25 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4819,6 +4819,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       real (kind=RKIND), pointer :: coef_3rd_order, c_s
       logical, pointer :: config_mix_full
       character (len=StrKIND), pointer :: config_horiz_mixing
+      character (len=StrKIND), pointer :: config_les_model
       real (kind=RKIND), pointer :: config_del4u_div_factor
       real (kind=RKIND), pointer :: config_h_theta_eddy_visc4
       real (kind=RKIND), pointer :: config_h_mom_eddy_visc4
@@ -4837,6 +4838,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_config(configs, 'config_coef_3rd_order', coef_3rd_order)
       call mpas_pool_get_config(configs, 'config_mix_full', config_mix_full)
       call mpas_pool_get_config(configs, 'config_horiz_mixing', config_horiz_mixing)
+      call mpas_pool_get_config(configs, 'config_les_model', config_les_model)
       call mpas_pool_get_config(configs, 'config_del4u_div_factor', config_del4u_div_factor)
       call mpas_pool_get_config(configs, 'config_h_theta_eddy_visc4', config_h_theta_eddy_visc4)
       call mpas_pool_get_config(configs, 'config_h_mom_eddy_visc4', config_h_mom_eddy_visc4)
@@ -4975,7 +4977,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
          latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
          rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
          deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
-         tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_del4u_div_factor, &
+         tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_les_model, &
+         config_del4u_div_factor, &
          config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
          config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, &
          config_mpas_cam_coef, &
@@ -5000,7 +5003,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
       rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
       deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
-      tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_del4u_div_factor, &
+      tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_les_model, &
+      config_del4u_div_factor, &
       config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
       config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, &
       config_mpas_cam_coef, &
@@ -5117,6 +5121,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND) :: coef_3rd_order, c_s
       logical :: config_mix_full
       character (len=StrKIND) :: config_horiz_mixing
+      character (len=StrKIND) :: config_les_model
       real (kind=RKIND) :: config_del4u_div_factor
       real (kind=RKIND) :: config_h_theta_eddy_visc4
       real (kind=RKIND) :: config_h_mom_eddy_visc4
@@ -5239,28 +5244,36 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          ! Smagorinsky eddy viscosity, based on horizontal deformation (in this case on model coordinate surfaces).
          ! The integration coefficients were precomputed and stored in deformation_coef_*
 
-         if(config_horiz_mixing == "2d_smagorinsky") then
+         if(config_les_model == "none") then
 
-            call smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,                                 &
-                                 deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
-                                 invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
-                                 cellStart, cellEnd, nEdgesOnCell, edgesOnCell,                     &
-                                 nCells, nEdges                                                    )
+            if(config_horiz_mixing == "2d_smagorinsky") then
 
-         else if(config_horiz_mixing == "2d_fixed") then
+               call smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,                                 &
+                                    deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
+                                    invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
+                                    cellStart, cellEnd, nEdgesOnCell, edgesOnCell,                     &
+                                    nCells, nEdges                                                    )
 
-            !$acc parallel default(present)
-            !$acc loop gang worker
-            do iCell = cellStart, cellEnd
-               !$acc loop vector
-               do k = 1, nVertLevels
-                  kdiff(k,iCell) = config_h_theta_eddy_visc2
+            else if(config_horiz_mixing == "2d_fixed") then
+
+               !$acc parallel default(present)
+               !$acc loop gang worker
+               do iCell = cellStart, cellEnd
+                  !$acc loop vector
+                  do k = 1, nVertLevels
+                     kdiff(k,iCell) = config_h_theta_eddy_visc2
+                  end do
                end do
-            end do
-            !$acc end parallel
+               !$acc end parallel
+
+               h_mom_eddy_visc4 = config_h_mom_eddy_visc4
+               h_theta_eddy_visc4 = config_h_theta_eddy_visc4
+
+            end if
+
+         else if (config_les_model == "3d_smagorinsky") then
 
-            h_mom_eddy_visc4 = config_h_mom_eddy_visc4
-            h_theta_eddy_visc4 = config_h_theta_eddy_visc4
+            ! call to 3D smagorinsky here...
 
          end if
 

From 728077eec2d347b1bfe2d280da20fc0f130ebbe4 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Thu, 4 Dec 2025 14:06:43 -0700
Subject: [PATCH 013/146] Moved the dissipation term computations for u, theta
 and w from subroutine atm_compute_dyn_tend to subroutines in
 mpas_atm_dissipation.F. The results are no longer bit-for-bit with the
 modified code because we have re-arranged the order to the processes in the
 vertical momentum equation to accommodate doing the horizontal and vertical
 dissipation for w together.

---
 .../dynamics/mpas_atm_dissipation_models.F    | 502 ++++++++++++++++++
 .../dynamics/mpas_atm_time_integration.F      | 447 ++--------------
 2 files changed, 533 insertions(+), 416 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 7b8457d927..af983af979 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -74,4 +74,506 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
 
       end subroutine smagorinsky_2d
 
+!---------------------------------------                                                                                                              
+
+      subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
+                                cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
+                                cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
+                                nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign,                          &
+                                invAreaCell, invAreaTriangle, invDvEdge, invDcEdge,                          &
+                                angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4,                 &
+                                config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
+                                config_del4u_div_factor, zgrid, kdiff,                                       &
+                                delsq_u, delsq_vorticity, delsq_divergence,                                  &
+                                u, divergence, vorticity, rho_edge, u_init, v_init, tend_u_euler            )
+
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+
+      implicit none
+
+      integer, intent(in) :: edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd
+      integer, intent(in) :: vertexStart, vertexEnd, vertexDegree
+      integer, intent(in) :: cellStart, cellEnd
+      integer, intent(in) :: nCells, nEdges, nVertices
+      logical, intent(in) :: config_mix_full
+
+      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+      integer, dimension(2,nEdges+1), intent(in) :: verticesOnEdge
+      integer, dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell
+      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+      integer, dimension(vertexDegree,nVertices+1), intent(in) :: edgesOnVertex
+
+      real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
+      real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
+      real (kind=RKIND), intent(in) :: config_del4u_div_factor
+
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
+      real (kind=RKIND), dimension(vertexDegree,nVertices+1), intent(in) :: edgesOnVertex_sign
+      real (kind=RKIND), dimension(nVertices+1), intent(in) :: invAreaTriangle
+      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDvEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: angleEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dcEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
+      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
+
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: divergence
+      real (kind=RKIND), dimension(nVertLevels,nVertices+1), intent(in) :: vorticity
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: kdiff
+
+
+      !  scratch space from calling routine
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: delsq_u
+      real (kind=RKIND), dimension(nVertLevels,nVertices+1) :: delsq_vorticity
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_divergence
+
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: u_init, v_init
+
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(out) :: tend_u_euler
+
+      ! local variables                                                                                                                               
+
+      integer :: iEdge, cell1, cell2, vertex1, vertex2, iVertex, iCell, i, k
+      real (kind=RKIND) :: r_dc, r_dv, u_diffusion, kdiffu, r, edge_sign, u_mix_scale
+      real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
+      real (kind=RKIND), dimension(nVertLevels) :: u_mix
+
+!$OMP BARRIER                                                                                                                                         
+
+         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).                                                                          
+         ! First, storage to hold the result from the first del^2 computation.                                                                        
+
+         delsq_u(1:nVertLevels,edgeStart:edgeEnd) = 0.0
+
+         do iEdge=edgeStart,edgeEnd
+            cell1 = cellsOnEdge(1,iEdge)
+            cell2 = cellsOnEdge(2,iEdge)
+            vertex1 = verticesOnEdge(1,iEdge)
+            vertex2 = verticesOnEdge(2,iEdge)
+            r_dc = invDcEdge(iEdge)
+            r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
+
+!DIR$ IVDEP                                                                                                                                           
+            do k=1,nVertLevels
+
+               ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity                                                         
+               !                    only valid for h_mom_eddy_visc4 == constant                                                                       
+              u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) * r_dc  &
+                              -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv
+
+               delsq_u(k,iEdge) = delsq_u(k,iEdge) + u_diffusion
+
+               kdiffu = 0.5*(kdiff(k,cell1)+kdiff(k,cell2))
+
+               ! include 2nd-orer diffusion here                                                                                                      
+               tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) &
+                                       + rho_edge(k,iEdge)* kdiffu * u_diffusion * meshScalingDel2(iEdge)
+
+            end do
+         end do
+
+         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active                                                                               
+
+!$OMP BARRIER                                                                                                                                         
+
+            do iVertex=vertexStart,vertexEnd
+               delsq_vorticity(1:nVertLevels,iVertex) = 0.0
+               do i=1,vertexDegree
+                  iEdge = edgesOnVertex(i,iVertex)
+                  edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge) * edgesOnVertex_sign(i,iVertex)
+                  do k=1,nVertLevels
+                     delsq_vorticity(k,iVertex) = delsq_vorticity(k,iVertex) + edge_sign * delsq_u(k,iEdge)
+                  end do
+               end do
+            end do
+
+            do iCell=cellStart,cellEnd
+               delsq_divergence(1:nVertLevels,iCell) = 0.0
+               r = invAreaCell(iCell)
+               do i=1,nEdgesOnCell(iCell)
+                  iEdge = edgesOnCell(i,iCell)
+                  edge_sign = r * dvEdge(iEdge) * edgesOnCell_sign(i,iCell)
+                  do k=1,nVertLevels
+                     delsq_divergence(k,iCell) = delsq_divergence(k,iCell) + edge_sign * delsq_u(k,iEdge)
+                  end do
+               end do
+            end do
+
+!$OMP BARRIER                                                                                                                                         
+
+            do iEdge=edgeSolveStart,edgeSolveEnd
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
+               vertex1 = verticesOnEdge(1,iEdge)
+               vertex2 = verticesOnEdge(2,iEdge)
+
+               u_mix_scale = meshScalingDel4(iEdge)*h_mom_eddy_visc4
+               r_dc = u_mix_scale * config_del4u_div_factor * invDcEdge(iEdge)
+               r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
+
+!DIR$ IVDEP                                                                                                                                           
+               do k=1,nVertLevels
+
+                  ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity                                                      
+                  !                    only valid for h_mom_eddy_visc4 == constant                                                                    
+                  !                                                                                                                                   
+                  ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor                                           
+                  !    relative to the rotational part.  The stability constraint on the divergence component is much less                            
+                  !    stringent than the rotational part, and this flexibility may be useful.                                                        
+                  !                                                                                                                                   
+                  u_diffusion =  rho_edge(k,iEdge) *  ( ( delsq_divergence(k,cell2)  - delsq_divergence(k,cell1) ) * r_dc  &
+                                                       -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) * r_dv )
+                  tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - u_diffusion
+
+               end do
+            end do
+
+         end if ! 4th order mixing is active                                                                                                          
+
+      !                                                                                                                                               
+      !  vertical mixing for u - 2nd order filter in physical (z) space                                                                               
+      !                                                                                                                                               
+         if ( v_mom_eddy_visc2 > 0.0 ) then
+
+            if (config_mix_full) then  ! mix full state                                                                                               
+
+               do iEdge=edgeSolveStart,edgeSolveEnd
+
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
+
+                  do k=2,nVertLevels-1
+
+                     z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
+                     z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
+                     z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
+                     z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
+
+                     zm = 0.5*(z1+z2)
+                     z0 = 0.5*(z2+z3)
+                     zp = 0.5*(z3+z4)
+
+                     tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &
+                                        (u(k+1,iEdge)-u(k  ,iEdge))/(zp-z0)                      &
+                                       -(u(k  ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm))
+                  end do
+               end do
+
+            else  ! idealized cases where we mix on the perturbation from the initial 1-D state                                                       
+
+                      do iEdge=edgeSolveStart,edgeSolveEnd
+
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
+
+                  do k=1,nVertLevels
+                     u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) &
+                                           - v_init(k) * sin( angleEdge(iEdge) )
+                  end do
+
+                  do k=2,nVertLevels-1
+
+                     z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
+                     z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
+                     z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
+                     z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
+
+                     zm = 0.5*(z1+z2)
+                     z0 = 0.5*(z2+z3)
+                     zp = 0.5*(z3+z4)
+
+                     tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &
+                                        (u_mix(k+1)-u_mix(k  ))/(zp-z0)                      &
+                                       -(u_mix(k  )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm))
+                  end do
+               end do
+
+            end if  ! mix perturbation state                                                                                                          
+
+         end if  ! vertical mixing of horizontal momentum                                                                                             
+
+      end subroutine u_dissipation
+
+!------------------------
+
+      subroutine w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                nCells, nEdges,                                           &
+                                nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                invAreaCell, invDcEdge, dvEdge,                           &
+                                meshScalingDel2, meshScalingDel4,                         &
+                                rdzw, rdzu,                                               &
+                                v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
+                                delsq_w,                                                  &
+                                w, rho_edge, kdiff, rho_zz,                               &
+                                tend_w_euler                                             )
+
+
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+
+      implicit none
+
+      integer, intent(in) :: cellStart, cellEnd
+      integer, intent(in) :: cellSolveStart, cellSolveEnd
+      integer, intent(in) :: nCells, nEdges
+
+      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+      integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
+
+      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+
+      real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
+      real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
+
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
+      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
+
+      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: kdiff
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
+
+      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(inout) :: tend_w_euler
+
+      ! storage passed in from calling routine                                                                                                        
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_w
+
+      ! local variables                                                                                                                               
+
+      integer :: cell1, cell2, iEdge, iCell, i, k
+      real (kind=RKIND) :: r_areaCell, edge_sign, w_turb_flux
+
+!  !OMP BARRIER  why is this openmp barrier here???                                                                                                   
+
+         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (w) ).                                                                          
+       !                                                                                                                                            
+       ! First, storage to hold the result from the first del^2 computation.                                                                        
+       !  we copied code from the theta mixing, hence the theta* names.                                                                             
+
+
+         delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0
+
+         do iCell=cellStart,cellEnd
+            tend_w_euler(1:nVertLevels+1,iCell) = 0.0
+            r_areaCell = invAreaCell(iCell)
+            do i=1,nEdgesOnCell(iCell)
+               iEdge = edgesOnCell(i,iCell)
+
+               edge_sign = 0.5 * r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
+
+!DIR$ IVDEP                                                                                                                                           
+              do k=2,nVertLevels
+
+                  w_turb_flux =  edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
+                  delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux
+                  w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * &
+                                  (kdiff(k,cell1)+kdiff(k,cell2)+kdiff(k-1,cell1)+kdiff(k-1,cell2))
+                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux
+               end do
+            end do
+         end do
+
+!$OMP BARRIER                                                                                                                                         
+
+         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active                                                                               
+
+            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...                                                 
+               r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell)
+               do i=1,nEdgesOnCell(iCell)
+                  iEdge = edgesOnCell(i,iCell)
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
+
+                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge)
+
+                  do k=2,nVertLevels
+                     tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1))
+                  end do
+
+               end do
+            end do
+
+         end if ! 4th order mixing is active                                                                                                          
+
+      if ( v_mom_eddy_visc2 > 0.0 ) then  ! vertical mixing
+
+         do iCell=cellSolveStart,cellSolveEnd
+!DIR$ IVDEP                                                                                                                                           
+            do k=2,nVertLevels
+               tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*(  &
+                                           (w(k+1,iCell)-w(k  ,iCell))*rdzw(k)                              &
+                                          -(w(k  ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k)
+            end do
+         end do
+
+      end if
+
+      end subroutine w_dissipation
+
+!-----------------------------------------------------
+
+      subroutine theta_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                    nCells, nEdges,                                           &
+                                    nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                    invAreaCell, invDcEdge, dvEdge,                           &
+                                    meshScalingDel2, meshScalingDel4,                         &
+                                    config_mix_full, t_init, zgrid,                           &
+                                    rdzw, rdzu,                                               &
+                                    v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+                                    delsq_theta,                                              &
+                                    theta_m, rho_edge, kdiff, rho_zz,                         &
+                                    tend_theta_euler                                         )
+
+
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+      implicit none
+
+      integer, intent(in) :: cellStart, cellEnd
+      integer, intent(in) :: cellSolveStart, cellSolveEnd
+      integer, intent(in) :: nCells, nEdges
+
+      logical, intent(in) :: config_mix_full
+
+      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+      integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
+
+      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+
+      real (kind=RKIND), intent(in) :: h_theta_eddy_visc4
+      real (kind=RKIND), intent(in) :: v_theta_eddy_visc2
+      real (kind=RKIND), intent(in) :: prandtl_inv
+
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
+      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
+      real (kind=RKIND), dimension(nVertLevels+1, nCells+1), intent(in) :: zgrid
+
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: t_init
+
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: kdiff
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
+
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(inout) :: tend_theta_euler
+
+      ! storage passed in from calling routine                                                                                                        
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_theta
+
+      ! local variables                                                                                                                               
+      integer :: cell1, cell2, iEdge, iCell, i, k
+      real (kind=RKIND) :: r_areaCell, edge_sign, theta_turb_flux, pr_scale
+      real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
+
+         delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
+
+         do iCell=cellStart,cellEnd
+            tend_theta_euler(1:nVertLevels,iCell) = 0.0
+            r_areaCell = invAreaCell(iCell)
+            do i=1,nEdgesOnCell(iCell)
+               iEdge = edgesOnCell(i,iCell)
+               edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+               pr_scale = prandtl_inv * meshScalingDel2(iEdge)
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
+!DIR$ IVDEP
+               do k=1,nVertLevels
+
+!  we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below
+
+                  theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge)
+                  delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
+                  theta_turb_flux = theta_turb_flux*0.5*(kdiff(k,cell1)+kdiff(k,cell2)) * pr_scale
+                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux
+
+               end do
+            end do
+          end do
+
+!$OMP BARRIER
+            
+         if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
+
+            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
+               r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
+               do i=1,nEdgesOnCell(iCell)
+
+                  iEdge = edgesOnCell(i,iCell)
+                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
+
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
+
+                  do k=1,nVertLevels
+                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
+                  end do
+               end do
+            end do
+
+         end if ! 4th order mixing is active 
+
+         if ( v_theta_eddy_visc2 > 0.0 ) then  ! vertical mixing for theta_m
+
+            if (config_mix_full) then
+
+               do iCell = cellSolveStart,cellSolveEnd
+                  do k=2,nVertLevels-1
+                     z1 = zgrid(k-1,iCell)
+                     z2 = zgrid(k  ,iCell)
+                     z3 = zgrid(k+1,iCell)
+                     z4 = zgrid(k+2,iCell)
+
+                     zm = 0.5*(z1+z2)
+                     z0 = 0.5*(z2+z3)
+                     zp = 0.5*(z3+z4)
+
+                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
+                                              (theta_m(k+1,iCell)-theta_m(k  ,iCell))/(zp-z0)                 &
+                                             -(theta_m(k  ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
+                  end do
+               end do
+
+         else  ! idealized cases where we mix on the perturbation from the initial 1-D state
+
+               do iCell = cellSolveStart,cellSolveEnd
+                  do k=2,nVertLevels-1
+                     z1 = zgrid(k-1,iCell)
+                     z2 = zgrid(k  ,iCell)
+                     z3 = zgrid(k+1,iCell)
+                     z4 = zgrid(k+2,iCell)
+
+                     zm = 0.5*(z1+z2)
+                     z0 = 0.5*(z2+z3)
+                     zp = 0.5*(z3+z4)
+
+                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
+                                              ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k  ,iCell)-t_init(k,iCell)))/(zp-z0)      &
+                                             -((theta_m(k  ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
+                  end do
+               end do
+
+            end if
+
+         end if
+
+      end subroutine theta_dissipation
+
 end module mpas_atm_dissipation_models
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 97f3d85e25..1ae2abb219 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5461,203 +5461,16 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
 !$OMP BARRIER
 
-         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).
-         ! First, storage to hold the result from the first del^2 computation.
-
-         !$acc parallel default(present)
-         !$acc loop gang worker
-         do iEdge = edgeStart, edgeEnd
-            !$acc loop vector
-            do k = 1, nVertLevels
-               delsq_u(k,iEdge) = 0.0_RKIND
-            end do
-         end do
-         !$acc end parallel
-
-         !$acc parallel default(present)
-         !$acc loop gang worker
-         do iEdge=edgeStart,edgeEnd
-            cell1 = cellsOnEdge(1,iEdge)
-            cell2 = cellsOnEdge(2,iEdge)
-            vertex1 = verticesOnEdge(1,iEdge)
-            vertex2 = verticesOnEdge(2,iEdge)
-            r_dc = invDcEdge(iEdge)
-            r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
-
-!DIR$ IVDEP
-            !$acc loop vector
-            do k=1,nVertLevels
-
-               ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
-               !                    only valid for h_mom_eddy_visc4 == constant
-              u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) * r_dc  &
-                              -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv
-
-               delsq_u(k,iEdge) = delsq_u(k,iEdge) + u_diffusion
-
-               kdiffu = 0.5*(kdiff(k,cell1)+kdiff(k,cell2))
-
-               ! include 2nd-orer diffusion here 
-               tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) &
-                                       + rho_edge(k,iEdge)* kdiffu * u_diffusion * meshScalingDel2(iEdge)
-
-            end do
-         end do
-         !$acc end parallel
-
-         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active
-
-!$OMP BARRIER
-
-            !$acc parallel default(present)
-            !$acc loop gang worker
-            do iVertex=vertexStart,vertexEnd
-
-               !$acc loop vector
-               do k=1,nVertLevels
-                  delsq_vorticity(k,iVertex) = 0.0_RKIND
-               end do
-
-               !$acc loop seq
-               do i=1,vertexDegree
-                  iEdge = edgesOnVertex(i,iVertex)
-                  edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge) * edgesOnVertex_sign(i,iVertex)
-
-                  !$acc loop vector
-                  do k=1,nVertLevels
-                     delsq_vorticity(k,iVertex) = delsq_vorticity(k,iVertex) + edge_sign * delsq_u(k,iEdge)
-                  end do
-               end do
-            end do
-
-            !$acc loop gang worker
-            do iCell=cellStart,cellEnd
-
-               !$acc loop vector
-               do k=1,nVertLevels
-                  delsq_divergence(k,iCell) = 0.0_RKIND
-               end do
-
-               r = invAreaCell(iCell)
-
-               !$acc loop seq
-               do i=1,nEdgesOnCell(iCell)
-                  iEdge = edgesOnCell(i,iCell)
-                  edge_sign = r * dvEdge(iEdge) * edgesOnCell_sign(i,iCell)
-
-                  !$acc loop vector
-                  do k=1,nVertLevels
-                     delsq_divergence(k,iCell) = delsq_divergence(k,iCell) + edge_sign * delsq_u(k,iEdge)
-                  end do
-               end do
-            end do
-            !$acc end parallel
-         
-!$OMP BARRIER
-
-            !$acc parallel default(present)
-            !$acc loop gang worker
-            do iEdge=edgeSolveStart,edgeSolveEnd
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
-               vertex1 = verticesOnEdge(1,iEdge)
-               vertex2 = verticesOnEdge(2,iEdge)
-
-               u_mix_scale = meshScalingDel4(iEdge)*h_mom_eddy_visc4
-               r_dc = u_mix_scale * config_del4u_div_factor * invDcEdge(iEdge)
-               r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
-
-!DIR$ IVDEP
-               !$acc loop vector
-               do k=1,nVertLevels
-
-                  ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
-                  !                    only valid for h_mom_eddy_visc4 == constant
-                  !
-                  ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor 
-                  !    relative to the rotational part.  The stability constraint on the divergence component is much less
-                  !    stringent than the rotational part, and this flexibility may be useful.
-                  !
-                  u_diffusion =  rho_edge(k,iEdge) *  ( ( delsq_divergence(k,cell2)  - delsq_divergence(k,cell1) ) * r_dc  &
-                                                       -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) * r_dv )
-                  tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - u_diffusion
-                  
-               end do
-            end do
-            !$acc end parallel
-         
-         end if ! 4th order mixing is active 
-
-      !
-      !  vertical mixing for u - 2nd order filter in physical (z) space
-      !
-         if ( v_mom_eddy_visc2 > 0.0 ) then
-
-            if (config_mix_full) then  ! mix full state
-
-               !$acc parallel default(present)
-               !$acc loop gang worker
-               do iEdge=edgeSolveStart,edgeSolveEnd
-
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
-
-                  !$acc loop vector
-                  do k=2,nVertLevels-1
-
-                     z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
-                     z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
-                     z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
-                     z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
-
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
-
-                     tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &
-                                        (u(k+1,iEdge)-u(k  ,iEdge))/(zp-z0)                      &
-                                       -(u(k  ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
-               end do
-               !$acc end parallel
-
-            else  ! idealized cases where we mix on the perturbation from the initial 1-D state
-
-               !$acc parallel default(present)
-               !$acc loop gang worker private(u_mix)
-               do iEdge=edgeSolveStart,edgeSolveEnd
-
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
-
-                  !$acc loop vector
-                  do k=1,nVertLevels
-                     u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) &
-                                           - v_init(k) * sin( angleEdge(iEdge) )
-                  end do
-
-                  !$acc loop vector
-                  do k=2,nVertLevels-1
-
-                     z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
-                     z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
-                     z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
-                     z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
-
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
-
-                     tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &
-                                        (u_mix(k+1)-u_mix(k  ))/(zp-z0)                      &
-                                       -(u_mix(k  )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
-               end do
-               !$acc end parallel
-
-            end if  ! mix perturbation state
-
-         end if  ! vertical mixing of horizontal momentum
+            call  u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
+                                 cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
+                                 cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
+                                 nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign,                          &
+                                 invAreaCell, invAreaTriangle, invDvEdge, invDcEdge,                          &
+                                 angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4,                 &
+                                 config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
+                                 config_del4u_div_factor, zgrid, kdiff,                                       &
+                                 delsq_u, delsq_vorticity, delsq_divergence,                                  &
+                                 u, divergence, vorticity, rho_edge, u_init, v_init, tend_u_euler            )
 
       end if ! (rk_step 1 test for computing mixing terms)
 
@@ -5785,80 +5598,16 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
       if (rk_step == 1) then
 
-!  !OMP BARRIER  why is this openmp barrier here???
-
-         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).
-         !
-         ! First, storage to hold the result from the first del^2 computation.
-         !  we copied code from the theta mixing, hence the theta* names.
-
-         !$acc parallel default(present)
-         !$acc loop gang worker
-         do iCell=cellStart,cellEnd
-
-            !$acc loop vector
-            do k=1,nVertLevels
-               delsq_w(k,iCell) = 0.0_RKIND
-            end do
-
-            !$acc loop vector
-            do k=1,nVertLevels+1
-               tend_w_euler(k,iCell) = 0.0_RKIND
-            end do
-
-            r_areaCell = invAreaCell(iCell)
-
-            !$acc loop seq
-            do i=1,nEdgesOnCell(iCell)
-               iEdge = edgesOnCell(i,iCell)
-
-               edge_sign = 0.5 * r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
-
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
-
-!DIR$ IVDEP
-              !$acc loop vector
-              do k=2,nVertLevels
-
-                  w_turb_flux =  edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
-                  delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux
-                  w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * &
-                                  (kdiff(k,cell1)+kdiff(k,cell2)+kdiff(k-1,cell1)+kdiff(k-1,cell2))
-                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux
-               end do
-            end do
-         end do
-         !$acc end parallel
-
-!$OMP BARRIER
-
-         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active
-
-            !$acc parallel default(present)
-            !$acc loop gang worker
-            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
-
-               r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell)
-
-               !$acc loop seq
-               do i=1,nEdgesOnCell(iCell)
-                  iEdge = edgesOnCell(i,iCell)
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
-
-                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge)
-
-                  !$acc loop vector
-                  do k=2,nVertLevels
-                     tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1))
-                  end do
-           
-               end do
-            end do
-            !$acc end parallel
-
-         end if ! 4th order mixing is active 
+         call  w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                              nCells, nEdges,                                           &
+                              nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                              invAreaCell, invDcEdge, dvEdge,                           &
+                              meshScalingDel2, meshScalingDel4,                         &
+                              rdzw, rdzu,                                               &
+                              v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
+                              delsq_w,                                                  &
+                              w, rho_edge, kdiff, rho_zz,                               &
+                              tend_w_euler                                             )
 
       end if ! horizontal mixing for w computed in first rk_step
 
@@ -5913,27 +5662,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       end do
       !$acc end parallel
 
-      if (rk_step == 1) then
-
-         if ( v_mom_eddy_visc2 > 0.0 ) then
-
-            !$acc parallel default(present)
-            !$acc loop gang worker
-            do iCell=cellSolveStart,cellSolveEnd
-!DIR$ IVDEP
-               !$acc loop vector
-               do k=2,nVertLevels
-                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*(  &
-                                           (w(k+1,iCell)-w(k  ,iCell))*rdzw(k)                              &
-                                          -(w(k  ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k)
-               end do
-            end do
-            !$acc end parallel
-
-         end if
-
-      end if ! mixing term computed first rk_step
-
       ! add in mixing terms for w
 
       !$acc parallel default(present)
@@ -6024,69 +5752,17 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
       if (rk_step == 1) then
 
-         !$acc parallel default(present)
-         !$acc loop gang worker
-         do iCell=cellStart,cellEnd
-
-            !$acc loop vector
-            do k=1,nVertLevels
-               delsq_theta(k,iCell) = 0.0_RKIND
-               tend_theta_euler(k,iCell) = 0.0_RKIND
-            end do
-
-            r_areaCell = invAreaCell(iCell)
-
-            !$acc loop seq
-            do i=1,nEdgesOnCell(iCell)
-               iEdge = edgesOnCell(i,iCell)
-               edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
-               pr_scale = prandtl_inv * meshScalingDel2(iEdge)
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
-!DIR$ IVDEP
-               !$acc loop vector
-               do k=1,nVertLevels
-
-!  we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below
-
-                  theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge)
-                  delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
-                  theta_turb_flux = theta_turb_flux*0.5*(kdiff(k,cell1)+kdiff(k,cell2)) * pr_scale
-                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux
-
-               end do
-            end do
-          end do
-          !$acc end parallel
-
-!$OMP BARRIER
-            
-         if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
-
-            !$acc parallel default(present)
-            !$acc loop gang worker
-            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
-
-               r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
-
-               !$acc loop seq
-               do i=1,nEdgesOnCell(iCell)
-
-                  iEdge = edgesOnCell(i,iCell)
-                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
-
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
-
-                  !$acc loop vector
-                  do k=1,nVertLevels
-                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
-                  end do
-               end do
-            end do
-            !$acc end parallel
-
-         end if ! 4th order mixing is active 
+          call theta_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                  nCells, nEdges,                                           &
+                                  nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                  invAreaCell, invDcEdge, dvEdge,                           &
+                                  meshScalingDel2, meshScalingDel4,                         &
+                                  config_mix_full, t_init, zgrid,                           &
+                                  rdzw, rdzu,                                               &
+                                  v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+                                  delsq_theta,                                              &
+                                  theta_m, rho_edge, kdiff, rho_zz,                         &
+                                  tend_theta_euler                                         )
 
       end if ! theta mixing calculated first rk_step
 
@@ -6127,73 +5803,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       end do
       !$acc end parallel
 
-      !
-      !  vertical mixing for theta - 2nd order 
-      !
-
-      if (rk_step == 1) then
-
-         if ( v_theta_eddy_visc2 > 0.0 ) then  ! vertical mixing for theta_m
-
-            if (config_mix_full) then
-
-               !$acc parallel default(present)
-               !$acc loop gang worker
-               do iCell = cellSolveStart,cellSolveEnd
-
-                  !$acc loop vector
-                  do k=2,nVertLevels-1
-                     z1 = zgrid(k-1,iCell)
-                     z2 = zgrid(k  ,iCell)
-                     z3 = zgrid(k+1,iCell)
-                     z4 = zgrid(k+2,iCell)
-
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
-
-                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
-                                              (theta_m(k+1,iCell)-theta_m(k  ,iCell))/(zp-z0)                 &
-                                             -(theta_m(k  ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
-               end do
-               !$acc end parallel
-
-            else  ! idealized cases where we mix on the perturbation from the initial 1-D state
-
-               !$acc parallel default(present)
-               !$acc loop gang worker
-               do iCell = cellSolveStart,cellSolveEnd
-                  do k=2,nVertLevels-1
-                     z1 = zgrid(k-1,iCell)
-                     z2 = zgrid(k  ,iCell)
-                     z3 = zgrid(k+1,iCell)
-                     z4 = zgrid(k+2,iCell)
-
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
-
-                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
-                                              ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k  ,iCell)-t_init(k,iCell)))/(zp-z0)      &
-                                             -((theta_m(k  ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
-               end do
-               !$acc end parallel
-
-            end if
-
-         end if
-
-      end if ! compute vertical theta mixing on first rk_step
-
       !$acc parallel default(present)
       !$acc loop gang worker
       do iCell = cellSolveStart,cellSolveEnd
 !DIR$ IVDEP
          !$acc loop vector
          do k=1,nVertLevels
-!            tend_theta(k,iCell) = tend_theta(k,iCell) + tend_theta_euler(k,iCell)
             tend_theta(k,iCell) = tend_theta(k,iCell) + tend_theta_euler(k,iCell) + tend_rtheta_physics(k,iCell)
          end do
       end do

From 93d2ac51f461aad9d61e46ee0628496675b97ed7 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Mon, 6 Jan 2020 13:16:23 -0700
Subject: [PATCH 014/146] Added initialization for convective boundary layer
 LES case. This is config_init_case = 10

---
 .../mpas_init_atm_cases.F                     | 577 +++++++++++++++++-
 1 file changed, 576 insertions(+), 1 deletion(-)

diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index 673ebfc525..82b2517e76 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -393,13 +393,30 @@ subroutine init_atm_setup_case(domain, stream_manager)
             call init_atm_case_cam_mpas(stream_manager, domain % dminfo, block_ptr, &
                                         mesh, block_ptr % dimensions, block_ptr % configs, nVertLevels)
 
+      else if (config_init_case == 10) then
+
+         call mpas_log_write(' les test case ')
+         block_ptr => domain % blocklist
+         do while (associated(block_ptr))
+
+            call mpas_pool_get_dimension(block_ptr % dimensions, 'nCells', nCells)
+            call mpas_pool_get_dimension(block_ptr % dimensions, 'nVertLevels', nVertLevels)
+
+            call mpas_pool_get_subpool(block_ptr % structs, 'mesh', mesh)
+            call mpas_pool_get_subpool(block_ptr % structs, 'state', state)
+            call mpas_pool_get_subpool(block_ptr % structs, 'diag', diag)
+
+            call mpas_log_write(' calling test case setup ')
+            call init_atm_case_les(domain % dminfo, mesh, nCells, nVertLevels, state, diag, config_init_case, block_ptr % configs)
+            call decouple_variables(mesh, nCells, nVertLevels, state, diag)
+            call mpas_log_write(' returned from test case setup ')
             block_ptr => block_ptr % next
          end do
 
       else
 
          call mpas_log_write(' ***********************************************************', messageType=MPAS_LOG_ERR)
-         call mpas_log_write(' Only test cases 1 through 9 and 13 are currently supported.', messageType=MPAS_LOG_ERR)
+         call mpas_log_write(' Only test cases 1 through 10 and 13 are currently supported.', messageType=MPAS_LOG_ERR)
          call mpas_log_write(' ***********************************************************', messageType=MPAS_LOG_CRIT)
 
       end if
@@ -6184,6 +6201,564 @@ subroutine init_atm_case_lbc(timestamp, block, mesh, nCells, nEdges, nVertLevels
 
    end subroutine init_atm_case_lbc
 
+!---------------------
+
+   subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, test_case, configs)
+   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+   ! Large Eddy Simulation (les) test case setup
+   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+      implicit none
+
+      type (dm_info), intent(in) :: dminfo
+      type (mpas_pool_type), intent(inout) :: mesh
+      integer, intent(in) :: nCells
+      integer, intent(in) :: nVertLevels
+      type (mpas_pool_type), intent(inout) :: state
+      type (mpas_pool_type), intent(inout) :: diag
+      type (mpas_pool_type), intent(inout):: configs
+
+      integer, intent(in) :: test_case
+
+      real (kind=RKIND), dimension(:), pointer :: rdzw, dzu, rdzu, fzm, fzp
+      real (kind=RKIND), dimension(:,:), pointer :: zgrid, zxu, zz, hx, cqw
+      real (kind=RKIND), dimension(:,:), pointer :: ppb, pb, rho_zz, rb, rr, tb, rtb, p, pp, dss, t, rt, u, ru
+      real (kind=RKIND), dimension(:,:,:), pointer :: scalars
+      real (kind=RKIND), dimension(:,:,:), pointer :: zb, zb3
+
+      !This is temporary variable here. It just need when calculate tangential velocity v.
+      integer :: eoe, j
+      integer, dimension(:), pointer :: nEdgesOnEdge 
+      integer, dimension(:,:), pointer :: edgesOnEdge, cellsOnEdge
+      real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge
+
+      integer :: iCell, iCell1, iCell2 , iEdge, vtx1, vtx2, ivtx, i, k, nz, nz1, itr, cell1, cell2
+      integer, pointer :: nEdges, nVertices, maxEdges, nCellsSolve
+      integer, pointer :: index_qv
+
+      real (kind=RKIND), dimension(nVertLevels + 1 ) :: znu, znw, znwc, znwv
+      real (kind=RKIND), dimension(nVertLevels + 1 ) :: znuc, znuv
+
+      real (kind=RKIND), dimension(nVertLevels + 1 ) :: zc, zw, ah
+      real (kind=RKIND), dimension(nVertLevels ) :: zu, dzw, rdzwp, rdzwm
+
+      real (kind=RKIND), dimension(nVertLevels, nCells) :: relhum, thi, tbi, cqwb
+
+      real (kind=RKIND) ::  r, xnutr
+      real (kind=RKIND) ::  ztemp, zd, zt, dz, str
+
+      real (kind=RKIND), dimension(nVertLevels ) :: qvb
+      real (kind=RKIND), dimension(nVertLevels ) :: t_init_1d
+
+      real (kind=RKIND) :: d1, d2, d3, cof1, cof2
+      real (kind=RKIND), pointer :: cf1, cf2, cf3
+      real (kind=RKIND) :: ztr, thetar, ttr, thetas, um, us, zts, pitop, pibtop, ptopb, ptop, rcp, rcv, p0
+      real (kind=RKIND) :: radx, radz, zcent, xmid, delt, xloc, rad, yloc, ymid, a_scale
+      real (kind=RKIND) :: pres, temp, es, qvs
+
+      real (kind=RKIND), dimension(:), pointer :: xCell, yCell, zCell
+      real (kind=RKIND), dimension(:), pointer :: xEdge, yEdge, zEdge
+      real (kind=RKIND), dimension(:), pointer :: xVertex, yVertex, zVertex
+      real (kind=RKIND), dimension(:), pointer :: dcEdge, dvEdge, areaCell, areaTriangle
+      real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex
+      logical, pointer :: on_a_sphere
+      real (kind=RKIND), pointer :: sphere_radius
+      real (kind=RKIND), pointer :: config_ztop
+
+      real (kind=RKIND), dimension(:,:), pointer :: t_init, w, rw, v, rho, theta
+      real (kind=RKIND), dimension(:), pointer :: u_init, qv_init, angleEdge, fEdge, fVertex
+      real (kind=RKIND) :: u_vel, v_vel, randx
+
+      call mpas_pool_get_array(mesh, 'xCell', xCell)
+      call mpas_pool_get_array(mesh, 'yCell', yCell)
+      call mpas_pool_get_array(mesh, 'zCell', zCell)
+      call mpas_pool_get_array(mesh, 'xEdge', xEdge)
+      call mpas_pool_get_array(mesh, 'yEdge', yEdge)
+      call mpas_pool_get_array(mesh, 'zEdge', zEdge)
+      call mpas_pool_get_array(mesh, 'xVertex', xVertex)
+      call mpas_pool_get_array(mesh, 'yVertex', yVertex)
+      call mpas_pool_get_array(mesh, 'zVertex', zVertex)
+      call mpas_pool_get_array(mesh, 'dvEdge', dvEdge)
+      call mpas_pool_get_array(mesh, 'dcEdge', dcEdge)
+      call mpas_pool_get_array(mesh, 'areaCell', areaCell)
+      call mpas_pool_get_array(mesh, 'areaTriangle', areaTriangle)
+      call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex)
+
+      call mpas_pool_get_config(mesh, 'on_a_sphere', on_a_sphere)
+      call mpas_pool_get_config(mesh, 'sphere_radius', sphere_radius)
+      call mpas_pool_get_config(configs, 'config_ztop', config_ztop)
+
+      !
+      ! Scale all distances
+      !
+
+      a_scale = 1.0
+
+      xCell(:) = xCell(:) * a_scale
+      yCell(:) = yCell(:) * a_scale
+      zCell(:) = zCell(:) * a_scale
+      xVertex(:) = xVertex(:) * a_scale
+      yVertex(:) = yVertex(:) * a_scale
+      zVertex(:) = zVertex(:) * a_scale
+      xEdge(:) = xEdge(:) * a_scale
+      yEdge(:) = yEdge(:) * a_scale
+      zEdge(:) = zEdge(:) * a_scale
+      dvEdge(:) = dvEdge(:) * a_scale
+      dcEdge(:) = dcEdge(:) * a_scale
+      areaCell(:) = areaCell(:) * a_scale**2.0
+      areaTriangle(:) = areaTriangle(:) * a_scale**2.0
+      kiteAreasOnVertex(:,:) = kiteAreasOnVertex(:,:) * a_scale**2.0
+
+      call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
+      call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge)
+      call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge)
+      call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)
+
+      call mpas_pool_get_dimension(mesh, 'nEdges', nEdges)
+      call mpas_pool_get_dimension(mesh, 'nVertices', nVertices)
+      call mpas_pool_get_dimension(mesh, 'nCellsSolve', nCellsSolve)
+      call mpas_pool_get_dimension(mesh, 'maxEdges', maxEdges)
+      nz1 = nVertLevels
+      nz = nz1 + 1
+
+      call mpas_pool_get_array(mesh, 'zgrid', zgrid)
+      call mpas_pool_get_array(mesh, 'zb', zb)
+      call mpas_pool_get_array(mesh, 'zb3', zb3)
+      call mpas_pool_get_array(mesh, 'rdzw', rdzw)
+      call mpas_pool_get_array(mesh, 'dzu', dzu)
+      call mpas_pool_get_array(mesh, 'rdzu', rdzu)
+      call mpas_pool_get_array(mesh, 'fzm', fzm)
+      call mpas_pool_get_array(mesh, 'fzp', fzp)
+      call mpas_pool_get_array(mesh, 'zxu', zxu)
+      call mpas_pool_get_array(mesh, 'zz', zz)
+      call mpas_pool_get_array(mesh, 'hx', hx)
+      call mpas_pool_get_array(mesh, 'dss', dss)
+      call mpas_pool_get_array(mesh, 't_init', t_init)
+      call mpas_pool_get_array(mesh, 'u_init', u_init)
+      call mpas_pool_get_array(mesh, 'qv_init', qv_init)
+      call mpas_pool_get_array(mesh, 'angleEdge', angleEdge)
+      call mpas_pool_get_array(mesh, 'fEdge', fEdge)
+      call mpas_pool_get_array(mesh, 'fVertex', fVertex)
+
+      call mpas_pool_get_array(mesh, 'cf1', cf1)
+      call mpas_pool_get_array(mesh, 'cf2', cf2)
+      call mpas_pool_get_array(mesh, 'cf3', cf3)
+
+      call mpas_pool_get_array(diag, 'pressure_base', ppb)
+      call mpas_pool_get_array(diag, 'exner_base', pb)
+      call mpas_pool_get_array(diag, 'rho_base', rb)
+      call mpas_pool_get_array(diag, 'theta_base', tb)
+      call mpas_pool_get_array(diag, 'rtheta_base', rtb)
+      call mpas_pool_get_array(diag, 'exner', p)
+      call mpas_pool_get_array(diag, 'cqw', cqw)
+
+      call mpas_pool_get_array(diag, 'pressure_p', pp)
+      call mpas_pool_get_array(diag, 'rho_p', rr)
+      call mpas_pool_get_array(diag, 'rtheta_p', rt)
+      call mpas_pool_get_array(diag, 'ru', ru)
+      call mpas_pool_get_array(diag, 'rw', rw)
+      call mpas_pool_get_array(diag, 'v', v)
+      call mpas_pool_get_array(diag, 'rho', rho)
+      call mpas_pool_get_array(diag, 'theta', theta)
+
+      call mpas_pool_get_array(state, 'rho_zz', rho_zz)
+      call mpas_pool_get_array(state, 'theta_m', t)
+      call mpas_pool_get_array(state, 'u', u)
+      call mpas_pool_get_array(state, 'w', w)
+      call mpas_pool_get_array(state, 'scalars', scalars)
+
+      call mpas_pool_get_dimension(state, 'index_qv', index_qv)
+
+      scalars(:,:,:) = 0.
+
+      call atm_initialize_advection_rk(mesh, nCells, nEdges, maxEdges, on_a_sphere, sphere_radius )
+      call atm_initialize_deformation_weights(mesh, nCells, on_a_sphere, sphere_radius)
+      
+      xnutr = 0.
+      zd = 12000.
+
+      p0 = 1.e+05
+      rcp = rgas/cp
+      rcv = rgas/(cp-rgas)
+
+     call mpas_log_write(' point 1 in test case setup ')
+
+! We may pass in an hx(:,:) that has been precomputed elsewhere.
+! For now it is independent of k
+
+      do iCell=1,nCells
+        do k=1,nz
+          hx(k,iCell) = 0.  ! les on a flat Cartesian plane
+        end do
+      end do
+
+!      write(0,*) ' dz = ',dz
+      call mpas_log_write(' hx computation complete ')
+
+      !     metrics for hybrid coordinate and vertical stretching
+
+      str = 1.0  ! no stretching in les case: constant dz
+      ! zt = 20000.
+      zt = config_ztop
+      dz = zt/float(nz1)
+
+
+      do k=1,nz
+
+            ! zw(k) = zt*(real(k-1)*dz/zt)**str 
+            zw(k) = float(k-1)*dz
+            zc(k) = zw(k)
+!
+!           ah(k) governs the transition between terrain-following 
+!           and pureheight coordinates
+!                ah(k) = 0 is a terrain-following coordinate
+!                ah(k) = 1 is a height coordinate
+ 
+!            ah(k) = 1.-cos(.5*pii*(k-1)*dz/zt)**6
+            ah(k) = 1.
+!            call mpas_log_write(' k, zc, zw, ah = $i $r $r $r', intArgs=(/k/), realArgs=(/zc(k),zw(k),ah(k)/))
+      end do
+      do k=1,nz1
+         dzw (k) = zw(k+1)-zw(k)
+         rdzw(k) = 1./dzw(k)
+         zu(k  ) = .5*(zw(k)+zw(k+1))
+      end do
+      do k=2,nz1
+         dzu (k)  = .5*(dzw(k)+dzw(k-1))
+         rdzu(k)  =  1./dzu(k)
+         fzp (k)  = .5* dzw(k  )/dzu(k)
+         fzm (k)  = .5* dzw(k-1)/dzu(k)
+         rdzwp(k) = dzw(k-1)/(dzw(k  )*(dzw(k)+dzw(k-1)))
+         rdzwm(k) = dzw(k  )/(dzw(k-1)*(dzw(k)+dzw(k-1)))
+      end do
+
+!**********  how are we storing cf1, cf2 and cf3?
+
+      COF1 = (2.*DZU(2)+DZU(3))/(DZU(2)+DZU(3))*DZW(1)/DZU(2) 
+      COF2 =     DZU(2)        /(DZU(2)+DZU(3))*DZW(1)/DZU(3) 
+      CF1  = FZP(2) + COF1
+      CF2  = FZM(2) - COF1 - COF2
+      CF3  = COF2       
+
+      do iCell=1,nCells
+        do k=1,nz
+            zgrid(k,iCell) = ah(k)*(zc(k)*(1.-hx(k,iCell)/zt)+hx(k,iCell)) &
+                           + (1.-ah(k)) * zc(k)
+        end do
+        do k=1,nz1
+          zz (k,iCell) = (zw(k+1)-zw(k))/(zgrid(k+1,iCell)-zgrid(k,iCell))
+        end do
+      end do
+
+      do i=1, nEdges
+        iCell1 = cellsOnEdge(1,i)
+        iCell2 = cellsOnEdge(2,i)
+        do k=1,nz1
+          zxu (k,i) = 0.5 * (zgrid(k,iCell2)-zgrid(k,iCell1) + zgrid(k+1,iCell2)-zgrid(k+1,iCell1)) / dcEdge(i)
+        end do
+      end do
+      do i=1, nCells
+        do k=1,nz1
+          ztemp = .5*(zgrid(k+1,i)+zgrid(k,i))
+          dss(k,i) = 0.
+          ztemp = zgrid(k,i)
+          if(ztemp.gt.zd+.1)  then
+             dss(k,i) = dss(k,i)+xnutr*sin(.5*pii*(ztemp-zd)/(zt-zd))**2
+          end if
+        end do
+      end do
+
+!
+! initialization
+!
+         do i=1,nCells
+            do k=1,nz1
+
+               ztemp = .5*(zgrid(k,i)+zgrid(k+1,i))
+
+               ! if(ztemp .gt. ztr) then
+               !   t (k,i) = thetar*exp(9.8*(ztemp-ztr)/(1003.*ttr))
+               !   relhum(k,i) = 0.25
+               !  else
+               !   t (k,i) = 300.+43.*(ztemp/ztr)**1.25
+               !   relhum(k,i) = (1.-0.75*(ztemp/ztr)**1.25)
+               !   if(t(k,i).lt.thetas) t(k,i) = thetas
+               ! end if
+
+               t(k,i) = atm_get_sounding('theta',ztemp)
+               scalars(index_qv,k,i) = atm_get_sounding('qv',ztemp)
+
+               tb(k,i) = t(k,i)
+               thi(k,i) = t(k,i)
+               tbi(k,i) = t(k,i)
+               cqw(k,i) = 1.
+               cqwb(k,i) = 1.
+            end do
+         end do
+
+!  set the velocity field - we are on a plane here.
+
+         do i=1, nEdges
+            cell1 = cellsOnEdge(1,i)
+            cell2 = cellsOnEdge(2,i)
+            if(cell1 <= nCellsSolve .or. cell2 <= nCellsSolve) then
+               do k=1,nz1
+                  ztemp = .25*( zgrid(k,cell1 )+zgrid(k+1,cell1 )  &
+                               +zgrid(k,cell2)+zgrid(k+1,cell2))
+                  u_vel = atm_get_sounding('u',ztemp)
+                  v_vel = atm_get_sounding('v',ztemp)
+                  u(k,i) = cos(angleEdge(i))*u_vel - sin(angleEdge(i))*v_vel
+                  if(i == 1 ) u_init(k) = u(k,i)
+               end do
+            end if
+         end do
+
+         call mpas_dmpar_bcast_reals(dminfo, nz1, u_init)
+
+!
+!    for reference sounding 
+!
+     do itr=1,30
+
+      pitop = 1.-.5*dzw(1)*gravity*(1.+scalars(index_qv,1,1))/(cp*t(1,1)*zz(1,1))
+      pibtop = 1.-.5*dzw(1)*gravity*(1.+qvb(1))/(cp*tb(1,1)*zz(1,1))
+      do k=2,nz1
+         pitop = pitop-dzu(k)*gravity/(cp*cqw(k,1)*.5*(t(k,1)+t(k-1,1))   &
+                                   *.5*(zz(k,1)+zz(k-1,1)))
+         pibtop = pibtop-dzu(k)*gravity/(cp*cqwb(k,1)*.5*(tb(k,1)+tb(k-1,1))   &
+                                   *.5*(zz(k,1)+zz(k-1,1)))
+
+         !call mpas_log_write('$i $r $r $r $r', intArgs=(/k/), realArgs=(/pitop,tb(k,1),dzu(k),tb(k,1)/))
+      end do
+      pitop = pitop-.5*dzw(nz1)*gravity*(1.+scalars(index_qv,nz1,1))/(cp*t(nz1,1)*zz(nz1,1))
+      pibtop = pibtop-.5*dzw(nz1)*gravity*(1.+qvb(nz1))/(cp*tb(nz1,1)*zz(nz1,1))
+
+      call mpas_dmpar_bcast_real(dminfo, pitop)
+      call mpas_dmpar_bcast_real(dminfo, pibtop)
+
+      ptopb = p0*pibtop**(1./rcp)
+      call mpas_log_write('ptopb = $r', realArgs=(/0.01_RKIND*ptopb/))
+
+      do i=1, nCells
+         pb(nz1,i) = pibtop+.5*dzw(nz1)*gravity*(1.+qvb(nz1))/(cp*tb(nz1,i)*zz(nz1,i))
+         p (nz1,i) = pitop+.5*dzw(nz1)*gravity*(1.+scalars(index_qv,nz1,i))/(cp*t (nz1,i)*zz(nz1,i))
+         do k=nz1-1,1,-1
+            pb(k,i)  = pb(k+1,i) + dzu(k+1)*gravity/(cp*cqwb(k+1,i)*.5*(tb(k,i)+tb(k+1,i))   &
+                                           *.5*(zz(k,i)+zz(k+1,i)))
+            p (k,i)  = p (k+1,i) + dzu(k+1)*gravity/(cp*cqw(k+1,i)*.5*(t (k,i)+t (k+1,i))   &
+                                           *.5*(zz(k,i)+zz(k+1,i)))
+         end do
+         do k=1,nz1
+            rb (k,i) = pb(k,i)**(1./rcv)/((rgas/p0)*tb(k,i)*zz(k,i))
+            rtb(k,i) = rb(k,i)*tb(k,i)
+            rr (k,i) = p (k,i)**(1./rcv)/((rgas/p0)*t (k,i)*zz(k,i))-rb(k,i)
+            ppb(k,i) = p0*(zz(k,i)*rgas*rtb(k,i)/p0)**(cp/cv)
+         end do
+      end do
+
+     !
+     ! update water vapor mixing ratio from humidity profile
+     !
+     ! do i= 1,nCells
+     !    do k=1,nz1
+     !       temp     = p(k,i)*thi(k,i)
+     !       pres     = p0*p(k,i)**(1./rcp)
+     !       qvs      = 380.*exp(17.27*(temp-273.)/(temp-36.))/pres
+     !       scalars(index_qv,k,i) = min(0.014_RKIND,relhum(k,i)*qvs)
+     !    end do
+     ! end do
+
+      do k=1,nz1
+!*********************************************************************
+!           QVB = QV INCLUDES MOISTURE IN REFERENCE STATE
+!            qvb(k) = scalars(index_qv,k,1)
+!           QVB = 0 PRODUCES DRY REFERENCE STATE
+            qvb(k) = 0.
+!*********************************************************************
+      end do
+
+      do i= 1,nCells
+         do k=1,nz1
+            t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i))
+            tb(k,i) = tbi(k,i)*(1.+1.61*qvb(k))
+         end do
+         do k=2,nz1
+            cqw (k,i) = 1./(1.+.5*(scalars(index_qv,k,i)+scalars(index_qv,k-1,i)))
+            cqwb(k,i) = 1./(1.+.5*(qvb(k)+qvb(k-1)))
+         end do
+      end do
+
+      end do !end of iteration loop
+
+      call mpas_log_write(' base state sounding ')
+      call mpas_log_write(' k,     pb,     rb,     tb,     rtb,     t,     rr,      p,    qvb')
+      do k=1,nVertLevels
+         call mpas_log_write('$i $r $r $r $r $r $r $r $r', intArgs=(/k/), realArgs=(/pb(k,1),rb(k,1),tb(k,1),rtb(k,1),t(k,1),rr(k,1),p(k,1),qvb(k)/))
+      end do
+
+!
+!     potential temperature perturbation
+!
+
+      do i=1,nCells
+        do k = 1,4  ! same as in WRF
+          call random_number(randx)
+          thi(k,i) = thi(k,i) + 0.1*(randx-0.5)
+           t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i))
+        end do
+      end do
+
+      do itr=1,30
+
+        pitop = 1.-.5*dzw(1)*gravity*(1.+scalars(index_qv,1,1))/(cp*t(1,1)*zz(1,1))
+        do k=2,nz1
+          pitop = pitop-dzu(k)*gravity/(cp*cqw(k,1)*.5*(t (k,1)+t (k-1,1)) &
+                                                  *.5*(zz(k,1)+zz(k-1,1)))
+        end do
+        pitop = pitop - .5*dzw(nz1)*gravity*(1.+scalars(index_qv,nz1,1))/(cp*t(nz1,1)*zz(nz1,1))
+        ptop = p0*pitop**(1./rcp)
+        call mpas_log_write('ptop  = $r $r', realArgs=(/0.01_RKIND*ptop, 0.01_RKIND*ptopb/))
+
+        call mpas_dmpar_bcast_real(dminfo, ptop)
+
+        do i = 1, nCells
+
+          pp(nz1,i) = ptop-ptopb+.5*dzw(nz1)*gravity*   &
+                       (rr(nz1,i)+(rr(nz1,i)+rb(nz1,i))*scalars(index_qv,nz1,i))
+          do k=nz1-1,1,-1
+!             pp(k,i) = pp(k+1,i)+.5*dzu(k+1)*gravity*                   &
+!                            (rr(k  ,i)+(rr(k  ,i)+rb(k  ,i))*scalars(index_qv,k  ,i)  &
+!                            +rr(k+1,i)+(rr(k+1,i)+rb(k+1,i))*scalars(index_qv,k+1,i))
+               pp(k,i) = pp(k+1,i)+dzu(k+1)*gravity*(    &
+                            fzm(k+1)*(rb(k+1,i)*(scalars(index_qv,k+1,i)-qvb(k+1))    &
+                                     +rr(k+1,i)*(1.+scalars(index_qv,k+1,i)))         &
+                           +fzp(k+1)*(rb(k  ,i)*(scalars(index_qv,k  ,i)-qvb(k))      &
+                                     +rr(k  ,i)*(1.+scalars(index_qv,k  ,i))))
+          end do
+          if (itr==1.and.i==1) then
+             do k=1,nz1
+                call mpas_log_write('pp-check $r', realArgs=(/pp(k,i)/))
+             end do
+          end if
+          do k=1,nz1
+             rt(k,i) = (pp(k,i)/(rgas*zz(k,i))                   &
+                     -rtb(k,i)*(p(k,i)-pb(k,i)))/p(k,i)       
+             p (k,i) = (zz(k,i)*(rgas/p0)*(rtb(k,i)+rt(k,i)))**rcv
+             rr(k,i) = (rt(k,i)-rb(k,i)*(t(k,i)-tb(k,i)))/t(k,i)
+          end do
+
+        end do ! loop over cells
+
+      end do !  iteration loop
+!----------------------------------------------------------------------
+!
+      do k=1,nz1
+         qv_init(k) = scalars(index_qv,k,1)
+      end do
+
+      t_init_1d(:) = t(:,1)
+      call mpas_dmpar_bcast_reals(dminfo, nz1, t_init_1d)
+      call mpas_dmpar_bcast_reals(dminfo, nz1, qv_init)
+
+      do i=1,nCells
+         do k=1,nz1
+            t_init(k,i) = t_init_1d(k)
+            rho_zz(k,i) = rb(k,i)+rr(k,i)
+         end do
+      end do
+
+      do i=1,nEdges
+        cell1 = cellsOnEdge(1,i)
+        cell2 = cellsOnEdge(2,i)
+        if(cell1 <= nCellsSolve .or. cell2 <= nCellsSolve) then
+          do k=1,nz1
+            ru (k,i)  = 0.5*(rho_zz(k,cell1)+rho_zz(k,cell2))*u(k,i)    
+          end do
+        end if
+      end do
+
+
+      !
+      !  we are assuming w and rw are zero for this initialization
+      !  i.e., no terrain
+      !
+       rw = 0.0
+       w = 0.0
+
+       zb = 0.0
+       zb3 = 0.0
+
+      !
+      ! Generate rotated Coriolis field - same settings as in WRF
+      !
+      do iEdge=1,nEdges
+         fEdge(iEdge) = 1.e-04
+      end do
+
+      do iVtx=1,nVertices
+         fVertex(iVtx) = 1.e-04
+      end do
+
+      !
+      ! Compute mass fluxes tangential to each edge (i.e., through the faces of dual grid cells)
+      !
+      v(:,:) = 0.0
+      do iEdge = 1, nEdges
+         do i=1,nEdgesOnEdge(iEdge)
+            eoe = edgesOnEdge(i,iEdge)
+            if (eoe > 0) then
+               do k = 1, nVertLevels
+                 v(k,iEdge) = v(k,iEdge) + weightsOnEdge(i,iEdge) * u(k, eoe)
+              end do
+            end if
+         end do
+      end do
+
+     ! call mpas_log_write(' k,u_init, t_init, qv_init ')
+     ! do k=1,nVertLevels
+     !   call mpas_log_write('$i $r $r $r', intArgs=(/k/), realArgs=(/u_init(k),t_init(k,1),qv_init(k)/))
+     ! end do
+
+      ! Compute rho and theta from rho_zz and theta_m
+      do iCell=1,nCells
+         do k=1,nVertLevels
+            rho(k,iCell) = rho_zz(k,iCell) * zz(k,iCell)
+            theta(k,iCell) = t(k,iCell) / (1.0 + 1.61 * scalars(index_qv,k,iCell))
+         end do
+      end do
+
+   end subroutine init_atm_case_les
+
+
+   real (kind=RKIND) function atm_get_sounding( variable, height )
+
+     implicit none
+     real (kind=RKIND), intent(in) :: height
+     character(len=*), intent(in) :: variable
+
+     atm_get_sounding = -999.
+
+     if(variable == 'u') then 
+        atm_get_sounding = 0.
+     else if (variable == 'v') then
+        atm_get_sounding = 0.
+     else if (variable == 'qv') then
+        atm_get_sounding = 0.
+     else if (variable == 'theta') then
+
+        if(height .le. 1000.) then
+          atm_get_sounding = 300.
+        else if(height .le. 1150.) then
+          atm_get_sounding = 300. + (height-1000.)*8./150.
+        else
+          atm_get_sounding = 308. + (height-1150.)*3./1000.
+        end if
+
+      end if 
+
+   end function atm_get_sounding
+
+!-----------
 
    !-----------------------------------------------------------------------
    !  routine init_atm_case_cam_mpas

From c5eb44e4615ababeaace7e7407b741d474346f67 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne2.ib0.cheyenne.ucar.edu>
Date: Tue, 7 Jan 2020 16:13:08 -0700
Subject: [PATCH 015/146] add calcalate_n2 routine (no functional code yet)

---
 .../dynamics/mpas_atm_dissipation_models.F    | 15 +++++++++++++
 .../dynamics/mpas_atm_time_integration.F      | 21 +++++++++++++------
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index af983af979..9431a6295f 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -9,6 +9,7 @@
 module mpas_atm_dissipation_models
 
   use mpas_kind_types, only : RKIND
+  use mpas_atmphys_constants
 
     contains
 
@@ -74,6 +75,20 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
 
       end subroutine smagorinsky_2d
 
+!---------------------------------------                                                                                                              
+
+      subroutine calculate_n2(bn2, theta, scalars, num_scalars, index_qv, cellStart, cellEnd, nCells)
+
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+
+      integer, intent(in) :: cellStart, cellEnd, nCells
+      integer, intent(in) :: index_qv
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: bn2
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta
+      real (kind=RKIND), dimension(:,:,:) :: scalars
+
+      end subroutine calculate_n2
+
 !---------------------------------------                                                                                                              
 
       subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 1ae2abb219..d083f25233 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4777,7 +4777,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       real (kind=RKIND), dimension(:), pointer ::  fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, &
                                                    meshScalingDel2, meshScalingDel4
       real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
-                                                    divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
+                                                    divergence, vorticity, ke, pv_edge, theta_m, theta, rw, tend_rho, &
                                                     rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
                                                     h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save
 
@@ -4832,6 +4832,10 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       logical, pointer :: config_rayleigh_damp_u
       real (kind=RKIND), pointer :: config_rayleigh_damp_u_timescale_days
       integer, pointer :: config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels
+      integer, pointer :: config_number_rayleigh_damp_u_levels
+      integer, pointer :: index_qv
+
+      logical :: inactive_rthdynten
 
 
       call mpas_pool_get_config(mesh, 'sphere_radius', r_earth)
@@ -4860,6 +4864,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_array(state, 'w', w, 2)
       call mpas_pool_get_array(state, 'theta_m', theta_m, 2)
       call mpas_pool_get_array(state, 'theta_m', theta_m_save, 1)
+      call mpas_pool_get_array(state, 'theta', theta, 2)
       call mpas_pool_get_array(state, 'scalars', scalars, 2)
 
       call mpas_pool_get_array(diag, 'uReconstructZonal', ur_cell)
@@ -4954,6 +4959,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_dimension(state, 'num_scalars', num_scalars)
       call mpas_pool_get_dimension(state, 'moist_start', moist_start)
       call mpas_pool_get_dimension(state, 'moist_end', moist_end)
+      call mpas_pool_get_dimension(state, 'index_qv', index_qv)
 
       call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell)
       call mpas_pool_get_array(mesh, 'nAdvCellsForEdge', nAdvCellsForEdge)
@@ -4966,10 +4972,10 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_array(mesh, 'cf3', cf3)
 
       call atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels, &
-         nCellsSolve, nEdgesSolve, vertexDegree, maxEdges, maxEdges2, num_scalars, moist_start, moist_end, &
+         nCellsSolve, nEdgesSolve, vertexDegree, maxEdges, maxEdges2, num_scalars, index_qv, moist_start, moist_end, &
          fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, meshScalingDel2, meshScalingDel4, &
          weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
-         divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
+         divergence, vorticity, ke, pv_edge, theta_m, theta, rw, tend_rho, &
          rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
          h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
          theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
@@ -4992,10 +4998,10 @@ end subroutine atm_compute_dyn_tend
 
 
    subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dummy, &
-      nCellsSolve, nEdgesSolve, vertexDegree, maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, moist_start, moist_end, &
+      nCellsSolve, nEdgesSolve, vertexDegree, maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, moist_start, moist_end, &
       fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, meshScalingDel2, meshScalingDel4, &
       weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
-      divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
+      divergence, vorticity, ke, pv_edge, theta_m, theta, rw, tend_rho, &
       rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
       h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
       theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
@@ -5025,7 +5031,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       ! Dummy arguments
       !
       integer :: nCells, nEdges, nVertices, nVertLevels_dummy, nCellsSolve, nEdgesSolve, vertexDegree, &
-                 maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, moist_start, moist_end
+                 maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, moist_start, moist_end
 
       real (kind=RKIND), dimension(nEdges+1) :: fEdge
       real (kind=RKIND), dimension(nEdges+1) :: dvEdge
@@ -5049,6 +5055,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: ke
       real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: pv_edge
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: theta_m
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: theta
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: rw
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: tend_rho
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: rt_diabatic_tend
@@ -5065,6 +5072,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: cqu
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: h_divergence
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: kdiff
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: bn2
       real (kind=RKIND), dimension(maxEdges,nCells+1) :: edgesOnCell_sign
       real (kind=RKIND), dimension(vertexDegree,nVertices+1) :: edgesOnVertex_sign
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: rw_save
@@ -5274,6 +5282,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          else if (config_les_model == "3d_smagorinsky") then
 
             ! call to 3D smagorinsky here...
+                  call calculate_n2(bn2, theta, scalars, num_scalars, index_qv, cellStart, cellEnd, nCells)
 
          end if
 

From b18f3e2e29f9a753d2f5cc3abc18d83ac2c0493b Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne1.ib0.cheyenne.ucar.edu>
Date: Wed, 8 Jan 2020 14:00:15 -0700
Subject: [PATCH 016/146] add to arg list of calculate_n2

---
 .../dynamics/mpas_atm_dissipation_models.F      | 17 +++++++++++++----
 .../dynamics/mpas_atm_time_integration.F        |  3 ++-
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 9431a6295f..8d82678441 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -77,16 +77,25 @@ end subroutine smagorinsky_2d
 
 !---------------------------------------                                                                                                              
 
-      subroutine calculate_n2(bn2, theta, scalars, num_scalars, index_qv, cellStart, cellEnd, nCells)
+      subroutine calculate_n2( bn2, theta, exner, pressure_b, pp, zgrid, scalars, index_qv, &
+                               cellStart, cellEnd, nCells)
 
-      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+      use mpas_atm_dimensions  !  pull nVertLevels and num_scalars from here                                                                             
 
       integer, intent(in) :: cellStart, cellEnd, nCells
       integer, intent(in) :: index_qv
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: bn2
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta
-      real (kind=RKIND), dimension(:,:,:) :: scalars
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta, exner, pressure_b, pp
+      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
+      real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
 
+            do iCell = cellStart,cellEnd
+!DIR$ IVDEP
+               do k=1, nVertLevels
+                  ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
+                  bn2(k,iCell) = 0.
+               end do
+            end do
       end subroutine calculate_n2
 
 !---------------------------------------                                                                                                              
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index d083f25233..0f62fc2aa1 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5282,7 +5282,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          else if (config_les_model == "3d_smagorinsky") then
 
             ! call to 3D smagorinsky here...
-                  call calculate_n2(bn2, theta, scalars, num_scalars, index_qv, cellStart, cellEnd, nCells)
+                  call calculate_n2( bn2, theta, exner, pressure_b, pp, zgrid, scalars, index_qv, &
+                                     cellStart, cellEnd, nCells)
 
          end if
 

From a6336368bde6fb76e1be44babe129346a5c1c39c Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.ib0.cheyenne.ucar.edu>
Date: Thu, 16 Jan 2020 10:54:29 -0700
Subject: [PATCH 017/146] add dry N^2

---
 .../dynamics/mpas_atm_dissipation_models.F             | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 8d82678441..43b89248b6 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -88,13 +88,19 @@ subroutine calculate_n2( bn2, theta, exner, pressure_b, pp, zgrid, scalars, inde
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta, exner, pressure_b, pp
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
+! local
+      real (kind=RKIND) :: dz
+
 
             do iCell = cellStart,cellEnd
 !DIR$ IVDEP
-               do k=1, nVertLevels
+               do k=2, nVertLevels-1
                   ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
-                  bn2(k,iCell) = 0.
+                  dz = 0.5 * (zgrid(k+2,iCell)+zgrid(k+1,iCell)) - 0.5 * (zgrid(k,iCell)+zgrid(k-1,iCell))
+                  bn2(k,iCell) = gravity * (theta(k+1,iCell) - theta(k-1,iCell) ) / theta(k,iCell) / dz
                end do
+               bn2(1,iCell) = bn2(2,iCell)
+               bn2(nVertLevels,iCell) = bn2(nVertLevels-1,iCell)
             end do
       end subroutine calculate_n2
 

From d42a459d967d230e4a999a1c3a29c31e1dec5cf4 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne2.ib0.cheyenne.ucar.edu>
Date: Fri, 17 Jan 2020 11:31:38 -0700
Subject: [PATCH 018/146] fix theta to be in diag pool

---
 src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 0f62fc2aa1..f8c0ee7d4b 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4864,9 +4864,9 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_array(state, 'w', w, 2)
       call mpas_pool_get_array(state, 'theta_m', theta_m, 2)
       call mpas_pool_get_array(state, 'theta_m', theta_m_save, 1)
-      call mpas_pool_get_array(state, 'theta', theta, 2)
       call mpas_pool_get_array(state, 'scalars', scalars, 2)
 
+      call mpas_pool_get_array(diag, 'theta', theta)
       call mpas_pool_get_array(diag, 'uReconstructZonal', ur_cell)
       call mpas_pool_get_array(diag, 'uReconstructMeridional', vr_cell)
       call mpas_pool_get_array(diag, 'rho_edge', rho_edge)

From f0fc10df7424091c79d53ba52bf454bf1a83fbf0 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne2.ib0.cheyenne.ucar.edu>
Date: Fri, 17 Jan 2020 15:52:59 -0700
Subject: [PATCH 019/146] fix to use theta_m and qv instead of non-updated
 theta

---
 .../dynamics/mpas_atm_dissipation_models.F            | 11 +++++++----
 .../dynamics/mpas_atm_time_integration.F              | 10 ++++------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 43b89248b6..df7df75521 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -77,7 +77,7 @@ end subroutine smagorinsky_2d
 
 !---------------------------------------                                                                                                              
 
-      subroutine calculate_n2( bn2, theta, exner, pressure_b, pp, zgrid, scalars, index_qv, &
+      subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, &
                                cellStart, cellEnd, nCells)
 
       use mpas_atm_dimensions  !  pull nVertLevels and num_scalars from here                                                                             
@@ -85,19 +85,22 @@ subroutine calculate_n2( bn2, theta, exner, pressure_b, pp, zgrid, scalars, inde
       integer, intent(in) :: cellStart, cellEnd, nCells
       integer, intent(in) :: index_qv
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: bn2
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta, exner, pressure_b, pp
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m, exner, pressure_b, pp
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
 ! local
-      real (kind=RKIND) :: dz
+      real (kind=RKIND) :: dz, theta
 
 
             do iCell = cellStart,cellEnd
 !DIR$ IVDEP
                do k=2, nVertLevels-1
                   ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
+                  theta = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell))
+                  thetap = theta_m(k+1,iCell) / (1._RKIND + rvord * scalars(index_qv,k+1,iCell))
+                  thetam = theta_m(k-1,iCell) / (1._RKIND + rvord * scalars(index_qv,k-1,iCell))
                   dz = 0.5 * (zgrid(k+2,iCell)+zgrid(k+1,iCell)) - 0.5 * (zgrid(k,iCell)+zgrid(k-1,iCell))
-                  bn2(k,iCell) = gravity * (theta(k+1,iCell) - theta(k-1,iCell) ) / theta(k,iCell) / dz
+                  bn2(k,iCell) = gravity * (thetap - thetam ) / theta / dz
                end do
                bn2(1,iCell) = bn2(2,iCell)
                bn2(nVertLevels,iCell) = bn2(nVertLevels-1,iCell)
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index f8c0ee7d4b..00f935e311 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4777,7 +4777,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       real (kind=RKIND), dimension(:), pointer ::  fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, &
                                                    meshScalingDel2, meshScalingDel4
       real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
-                                                    divergence, vorticity, ke, pv_edge, theta_m, theta, rw, tend_rho, &
+                                                    divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
                                                     rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
                                                     h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save
 
@@ -4866,7 +4866,6 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_array(state, 'theta_m', theta_m_save, 1)
       call mpas_pool_get_array(state, 'scalars', scalars, 2)
 
-      call mpas_pool_get_array(diag, 'theta', theta)
       call mpas_pool_get_array(diag, 'uReconstructZonal', ur_cell)
       call mpas_pool_get_array(diag, 'uReconstructMeridional', vr_cell)
       call mpas_pool_get_array(diag, 'rho_edge', rho_edge)
@@ -4975,7 +4974,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
          nCellsSolve, nEdgesSolve, vertexDegree, maxEdges, maxEdges2, num_scalars, index_qv, moist_start, moist_end, &
          fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, meshScalingDel2, meshScalingDel4, &
          weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
-         divergence, vorticity, ke, pv_edge, theta_m, theta, rw, tend_rho, &
+         divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
          rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
          h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
          theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
@@ -5001,7 +5000,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       nCellsSolve, nEdgesSolve, vertexDegree, maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, moist_start, moist_end, &
       fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, meshScalingDel2, meshScalingDel4, &
       weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
-      divergence, vorticity, ke, pv_edge, theta_m, theta, rw, tend_rho, &
+      divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
       rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
       h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
       theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
@@ -5055,7 +5054,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: ke
       real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: pv_edge
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: theta_m
-      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: theta
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: rw
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: tend_rho
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: rt_diabatic_tend
@@ -5282,7 +5280,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          else if (config_les_model == "3d_smagorinsky") then
 
             ! call to 3D smagorinsky here...
-                  call calculate_n2( bn2, theta, exner, pressure_b, pp, zgrid, scalars, index_qv, &
+                  call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, &
                                      cellStart, cellEnd, nCells)
 
          end if

From 116368b7943c13ad4f3debf55dde5ec7c7329b3d Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne2.ib0.cheyenne.ucar.edu>
Date: Fri, 17 Jan 2020 16:15:04 -0700
Subject: [PATCH 020/146] use local 1d theta array for efficiency

---
 .../dynamics/mpas_atm_dissipation_models.F           | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index df7df75521..987611a584 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -89,22 +89,24 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
 ! local
-      real (kind=RKIND) :: dz, theta
+      real (kind=RKIND) :: dz
+      real (kind=RKIND), dimension(nVertLevels) :: theta
 
 
             do iCell = cellStart,cellEnd
 !DIR$ IVDEP
+               do k=1, nVertLevels
+                  theta(k) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell))
+               end do
                do k=2, nVertLevels-1
                   ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
-                  theta = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell))
-                  thetap = theta_m(k+1,iCell) / (1._RKIND + rvord * scalars(index_qv,k+1,iCell))
-                  thetam = theta_m(k-1,iCell) / (1._RKIND + rvord * scalars(index_qv,k-1,iCell))
                   dz = 0.5 * (zgrid(k+2,iCell)+zgrid(k+1,iCell)) - 0.5 * (zgrid(k,iCell)+zgrid(k-1,iCell))
-                  bn2(k,iCell) = gravity * (thetap - thetam ) / theta / dz
+                  bn2(k,iCell) = gravity * (theta(k+1) - theta(k-1) ) / theta(k) / dz
                end do
                bn2(1,iCell) = bn2(2,iCell)
                bn2(nVertLevels,iCell) = bn2(nVertLevels-1,iCell)
             end do
+
       end subroutine calculate_n2
 
 !---------------------------------------                                                                                                              

From b673e28325a2f024dc7207d6a74fd05aecf4df97 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne1.ib0.cheyenne.ucar.edu>
Date: Fri, 24 Jan 2020 11:16:18 -0700
Subject: [PATCH 021/146] add qtot drag term to thetav in dry case

---
 .../dynamics/mpas_atm_dissipation_models.F           | 12 +++++++-----
 .../dynamics/mpas_atm_time_integration.F             |  2 +-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 987611a584..db0492e262 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -77,7 +77,7 @@ end subroutine smagorinsky_2d
 
 !---------------------------------------                                                                                                              
 
-      subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, &
+      subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, qtot, &
                                cellStart, cellEnd, nCells)
 
       use mpas_atm_dimensions  !  pull nVertLevels and num_scalars from here                                                                             
@@ -85,23 +85,25 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
       integer, intent(in) :: cellStart, cellEnd, nCells
       integer, intent(in) :: index_qv
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: bn2
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m, exner, pressure_b, pp
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m, exner, pressure_b, pp, qtot
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
 ! local
       real (kind=RKIND) :: dz
-      real (kind=RKIND), dimension(nVertLevels) :: theta
+      real (kind=RKIND), dimension(nVertLevels) :: thetav
 
 
             do iCell = cellStart,cellEnd
 !DIR$ IVDEP
                do k=1, nVertLevels
-                  theta(k) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell))
+! qtot is qv plus all species - thetav includes moist loading (rvord-1 gives 0.61 * qv)
+                  thetav(k) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell)) &
+                                               * (1._RKIND + rvord * scalars(index_qv,k,iCell) - qtot(k, iCell))
                end do
                do k=2, nVertLevels-1
                   ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
                   dz = 0.5 * (zgrid(k+2,iCell)+zgrid(k+1,iCell)) - 0.5 * (zgrid(k,iCell)+zgrid(k-1,iCell))
-                  bn2(k,iCell) = gravity * (theta(k+1) - theta(k-1) ) / theta(k) / dz
+                  bn2(k,iCell) = gravity * (thetav(k+1) - thetav(k-1) ) / thetav(k) / dz
                end do
                bn2(1,iCell) = bn2(2,iCell)
                bn2(nVertLevels,iCell) = bn2(nVertLevels-1,iCell)
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 00f935e311..2c6a1f4a57 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5280,7 +5280,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          else if (config_les_model == "3d_smagorinsky") then
 
             ! call to 3D smagorinsky here...
-                  call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, &
+                  call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, qtot, &
                                      cellStart, cellEnd, nCells)
 
          end if

From a9d7bfa108cf1468350658671ac16d5f5220dd9f Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne6.ib0.cheyenne.ucar.edu>
Date: Mon, 27 Jan 2020 15:07:00 -0700
Subject: [PATCH 022/146] add cloudy conditions N2 calculation following WRF
 and Durran and Klemp (1982)

---
 .../dynamics/mpas_atm_dissipation_models.F    | 40 ++++++++++++++-----
 .../dynamics/mpas_atm_time_integration.F      | 11 ++---
 2 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index db0492e262..6605e0bad4 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -77,36 +77,58 @@ end subroutine smagorinsky_2d
 
 !---------------------------------------                                                                                                              
 
-      subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, qtot, &
+      subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
                                cellStart, cellEnd, nCells)
 
       use mpas_atm_dimensions  !  pull nVertLevels and num_scalars from here                                                                             
 
       integer, intent(in) :: cellStart, cellEnd, nCells
-      integer, intent(in) :: index_qv
+      integer, intent(in) :: index_qv, index_qc
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: bn2
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m, exner, pressure_b, pp, qtot
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
 ! local
-      real (kind=RKIND) :: dz
-      real (kind=RKIND), dimension(nVertLevels) :: thetav
-
+      real (kind=RKIND) :: dz, esw, qc_cr
+      real (kind=RKIND), dimension(nVertLevels) :: theta, thetav, qvsw, temp, coefa
 
+            qc_cr = 0.00001         ! in kg/kg
+            
             do iCell = cellStart,cellEnd
 !DIR$ IVDEP
                do k=1, nVertLevels
 ! qtot is qv plus all species - thetav includes moist loading (rvord-1 gives 0.61 * qv)
-                  thetav(k) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell)) &
-                                               * (1._RKIND + rvord * scalars(index_qv,k,iCell) - qtot(k, iCell))
+                  theta(k) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell))
+                  thetav(k) = theta(k) * (1._RKIND + rvord * scalars(index_qv,k,iCell) - qtot(k, iCell))
+
+                  temp(k) = exner(k,iCell) * theta(k)
+
+                  p = pressure_b(k,iCell) + pp(k,iCell)
+                  esw = 1000. * svp1 * exp(svp2 * (temp(k) - svpt0) / (temp(k) - svp3))
+                  if (p < esw) esw = p * 0.99     ! fix for pressure < esw
+                  qvsw(k) = ep_2 * esw / (p - esw)
+
+                  coefa(k) = ( 1.0 + xlv * qvsw(k)/ R_d / temp(k) ) / &
+                     ( 1.0 + xlv * xlv *qvsw(k) / Cp / R_v / temp(k) / temp(k) ) 
+
                end do
+
                do k=2, nVertLevels-1
-                  ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
                   dz = 0.5 * (zgrid(k+2,iCell)+zgrid(k+1,iCell)) - 0.5 * (zgrid(k,iCell)+zgrid(k-1,iCell))
-                  bn2(k,iCell) = gravity * (thetav(k+1) - thetav(k-1) ) / thetav(k) / dz
+
+                  if ( scalars(index_qc,k,iCell) < qc_cr ) then
+                     ! Dry Brunt-Vaisala frequency
+                     bn2(k,iCell) = gravity * (thetav(k+1) - thetav(k-1) ) / thetav(k) / dz
+                  else
+                     ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
+                     bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) / dz & 
+                                 + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) / dz ) &
+                                 - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
+                  endif
                end do
                bn2(1,iCell) = bn2(2,iCell)
                bn2(nVertLevels,iCell) = bn2(nVertLevels-1,iCell)
+
             end do
 
       end subroutine calculate_n2
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 2c6a1f4a57..fc875548f5 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4833,7 +4833,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       real (kind=RKIND), pointer :: config_rayleigh_damp_u_timescale_days
       integer, pointer :: config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels
       integer, pointer :: config_number_rayleigh_damp_u_levels
-      integer, pointer :: index_qv
+      integer, pointer :: index_qv, index_qc
 
       logical :: inactive_rthdynten
 
@@ -4959,6 +4959,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_dimension(state, 'moist_start', moist_start)
       call mpas_pool_get_dimension(state, 'moist_end', moist_end)
       call mpas_pool_get_dimension(state, 'index_qv', index_qv)
+      call mpas_pool_get_dimension(state, 'index_qc', index_qc)
 
       call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell)
       call mpas_pool_get_array(mesh, 'nAdvCellsForEdge', nAdvCellsForEdge)
@@ -4971,7 +4972,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_array(mesh, 'cf3', cf3)
 
       call atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels, &
-         nCellsSolve, nEdgesSolve, vertexDegree, maxEdges, maxEdges2, num_scalars, index_qv, moist_start, moist_end, &
+         nCellsSolve, nEdgesSolve, vertexDegree, maxEdges, maxEdges2, num_scalars, index_qv, index_qc, moist_start, moist_end, &
          fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, meshScalingDel2, meshScalingDel4, &
          weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
          divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
@@ -4997,7 +4998,7 @@ end subroutine atm_compute_dyn_tend
 
 
    subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dummy, &
-      nCellsSolve, nEdgesSolve, vertexDegree, maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, moist_start, moist_end, &
+      nCellsSolve, nEdgesSolve, vertexDegree, maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, index_qc, moist_start, moist_end, &
       fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, meshScalingDel2, meshScalingDel4, &
       weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
       divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
@@ -5030,7 +5031,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       ! Dummy arguments
       !
       integer :: nCells, nEdges, nVertices, nVertLevels_dummy, nCellsSolve, nEdgesSolve, vertexDegree, &
-                 maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, moist_start, moist_end
+                 maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, index_qc, moist_start, moist_end
 
       real (kind=RKIND), dimension(nEdges+1) :: fEdge
       real (kind=RKIND), dimension(nEdges+1) :: dvEdge
@@ -5280,7 +5281,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          else if (config_les_model == "3d_smagorinsky") then
 
             ! call to 3D smagorinsky here...
-                  call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, qtot, &
+                  call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
                                      cellStart, cellEnd, nCells)
 
          end if

From f12589da0f1c779fcf61f34c05a5e3d3db16ed3a Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.ib0.cheyenne.ucar.edu>
Date: Thu, 30 Jan 2020 09:43:20 -0700
Subject: [PATCH 023/146] remove thetav use

---
 .../dynamics/mpas_atm_dissipation_models.F               | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 6605e0bad4..c53904413d 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -90,16 +90,15 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
 ! local
       real (kind=RKIND) :: dz, esw, qc_cr
-      real (kind=RKIND), dimension(nVertLevels) :: theta, thetav, qvsw, temp, coefa
+      real (kind=RKIND), dimension(nVertLevels) :: theta, qvsw, temp, coefa
 
             qc_cr = 0.00001         ! in kg/kg
             
             do iCell = cellStart,cellEnd
 !DIR$ IVDEP
                do k=1, nVertLevels
-! qtot is qv plus all species - thetav includes moist loading (rvord-1 gives 0.61 * qv)
+
                   theta(k) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell))
-                  thetav(k) = theta(k) * (1._RKIND + rvord * scalars(index_qv,k,iCell) - qtot(k, iCell))
 
                   temp(k) = exner(k,iCell) * theta(k)
 
@@ -118,7 +117,9 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
 
                   if ( scalars(index_qc,k,iCell) < qc_cr ) then
                      ! Dry Brunt-Vaisala frequency
-                     bn2(k,iCell) = gravity * (thetav(k+1) - thetav(k-1) ) / thetav(k) / dz
+                     bn2(k,iCell) = gravity * ((theta(k+1) - theta(k-1) ) / theta(k)  / dz  &
+                                  + rvord * (scalars(index_qv,k+1,iCell) - scalars(index_qv,k-1,iCell)) / dz &
+                                 - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
                   else
                      ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
                      bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) / dz & 

From 7abd792ab4aa31c702637e94cd40feba4cba876d Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Thu, 4 Dec 2025 14:09:06 -0700
Subject: [PATCH 024/146] add bn2 to output and commented out test call

---
 src/core_atmosphere/Registry.xml                     |  6 ++++++
 .../dynamics/mpas_atm_time_integration.F             | 12 +++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 94e6b43f22..23788430c1 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -1959,6 +1959,12 @@
                 <var name="kdiff" type="real" dimensions="nVertLevels nCells Time" units="m^2 s^{-1}"
                      description="Smagorinsky horizontal eddy viscosity"/>
 
+                <var name="bn2" type="real" dimensions="nVertLevels nCells Time" units="s^{-2}"
+                     description="Square of Brunt-Vaisala frequency"/>
+
+                <var name="tend_rtheta_adv" type="real" dimensions="nVertLevels nCells Time" units="kg K m^{-3} s^{-1}"
+                     description="flux divergence for rho*theta_m/zz, used in the Tiedtke convective parameterization"/>
+
                 <var name="surface_pressure" type="real" dimensions="nCells Time" units="Pa"
                      description="Diagnosed surface pressure"/>
 
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index fc875548f5..5b5edebf62 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4779,7 +4779,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
                                                     divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
                                                     rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
-                                                    h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save
+                                                    h_divergence, kdiff, bn2, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save
 
       real (kind=RKIND), dimension(:,:), pointer :: theta_m_save
 
@@ -4874,6 +4874,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_array(diag, 'rho_p_save', rr_save)
       call mpas_pool_get_array(diag, 'v', v)
       call mpas_pool_get_array(diag, 'kdiff', kdiff)
+      call mpas_pool_get_array(diag, 'bn2', bn2)
       call mpas_pool_get_array(diag, 'ru', ru)
       call mpas_pool_get_array(diag, 'ru_save', ru_save)
       call mpas_pool_get_array(diag, 'rw', rw)
@@ -4977,7 +4978,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
          weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
          divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
          rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
-         h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
+         h_divergence, kdiff, bn2, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
          theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
          cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, &
          latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
@@ -5003,7 +5004,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
       divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
       rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
-      h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
+      h_divergence, kdiff, bn2, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
       theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
       cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, &
       latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
@@ -5263,6 +5264,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
             else if(config_horiz_mixing == "2d_fixed") then
 
+! testing
+!                 call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
+!                                    cellStart, cellEnd, nCells)
+! testing
+
                !$acc parallel default(present)
                !$acc loop gang worker
                do iCell = cellStart, cellEnd

From a6faf86febfc6dbde29dcce876e2ea8c328db3c8 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Fri, 6 Mar 2020 08:46:26 -0700
Subject: [PATCH 025/146] added a 3D vertical eddy viscosity to the Registry
 for use in LES appliations. added a new 3D Smagorinsky eddy viscosity
 computation and vertical mixing for the dynamics variables.  All code
 compiles but not tested.

---
 src/core_atmosphere/Registry.xml              |   3 +
 .../dynamics/mpas_atm_dissipation_models.F    | 932 ++++++++++++++++--
 2 files changed, 838 insertions(+), 97 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 23788430c1..ad6a27b4f3 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -1959,6 +1959,9 @@
                 <var name="kdiff" type="real" dimensions="nVertLevels nCells Time" units="m^2 s^{-1}"
                      description="Smagorinsky horizontal eddy viscosity"/>
 
+                <var name="eddy_visc_vert" type="real" dimensions="nVertLevels nCells Time" units="m^2 s^{-1}"
+                     description="vertical eddy viscosity"/>
+
                 <var name="bn2" type="real" dimensions="nVertLevels nCells Time" units="s^{-2}"
                      description="Square of Brunt-Vaisala frequency"/>
 
diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index c53904413d..37a9d3aabf 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -10,6 +10,7 @@ module mpas_atm_dissipation_models
 
   use mpas_kind_types, only : RKIND
   use mpas_atmphys_constants
+  use mpas_constants
 
     contains
 
@@ -75,6 +76,131 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
 
       end subroutine smagorinsky_2d
 
+!---------------------------------------
+
+      subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,              &
+                                 w, c_s, bv_freq2, zgrid, config_len_disp,                          &
+                                 deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
+                                 deformation_coef_c, deformation_coef_s,                            &
+                                 invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
+                                 cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
+                                 nCells, nEdges, nVertLevels, maxEdges                             )
+
+      implicit none
+      
+      integer, intent(in) :: cellStart, cellEnd, nCells, nEdges, nVertLevels, maxEdges
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: uCell
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: vCell
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: bv_freq2
+      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w
+      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_c2
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_s2
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_cs
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_c
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_s
+      real (kind=RKIND), intent(in) :: c_s, config_len_disp, invDt, config_visc4_2dsmag
+      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+      integer, dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell
+      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: eddy_visc_horz, eddy_visc_vert
+      real (kind=RKIND), intent(in) :: h_mom_eddy_visc4, h_theta_eddy_visc4
+
+      ! local variables
+
+      integer :: iCell, iEdge, k, ie, cell1, cell2
+      real (kind=RKIND), dimension(nVertLevels) :: d_11, d_22, d_33, d_12, d_13, d_23 
+      real (kind=RKIND), dimension(nVertLevels) :: dudx, dudy, dvdx, dvdy
+      real (kind=RKIND), dimension(nVertLevels+1) :: dwdx, dwdy
+      real (kind=RKIND), dimension(nVertLevels) :: dudz, dvdz, dwdz
+      real (kind=RKIND) :: rdz, def2, pr_inv, wk
+
+      pr_inv = 1./prandtl
+
+      do iCell = cellStart,cellEnd
+
+        dudx(1:nVertLevels) = 0.0
+        dudy(1:nVertLevels) = 0.0
+        dvdx(1:nVertLevels) = 0.0
+        dvdy(1:nVertLevels) = 0.0
+        dwdx(1:nVertLevels+1) = 0.0
+        dwdy(1:nVertLevels+1) = 0.0
+
+        dudz(1:nVertLevels) = 0.0
+        dvdz(1:nVertLevels) = 0.0
+        dwdz(1:nVertLevels) = 0.0
+               
+        do iEdge=1,nEdgesOnCell(iCell)
+
+          ie = EdgesOnCell(iEdge,iCell)
+          cell1 = edgesOnCell(1,ie)
+          cell2 = edgesOnCell(2,ie)
+
+          do k=1,nVertLevels
+            dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,ie) &
+                              - deformation_coef_cs(iEdge,iCell)*v(k,ie)
+            dudy(k) = dudy(k) + deformation_coef_cs(iEdge,iCell)*u(k,ie) &
+                              - deformation_coef_s2(iEdge,iCell)*v(k,ie)
+            dvdx(k) = dvdx(k) + deformation_coef_cs(iEdge,iCell)*u(k,ie) &
+                              + deformation_coef_c2(iEdge,iCell)*v(k,ie)
+            dvdy(k) = dvdy(k) + deformation_coef_s2(iEdge,iCell)*u(k,ie) &
+                              + deformation_coef_cs(iEdge,iCell)*v(k,ie)
+          end do
+
+          do k=1,nVertLevels+1
+            wk = 0.5*(w(k,cell1)+w(k,cell2))
+            dwdx(k) = dwdx(k) + deformation_coef_c(iEdge,iCell)*wk
+            dwdy(k) = dwdy(k) + deformation_coef_s(iEdge,iCell)*wk
+          end do
+
+        end do
+
+        do k=1,nVertLevels
+          rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
+          dwdz(k) = (w(k+1,iCell)-w(k,iCell))*rdz
+        end do
+
+        do k=2,nVertLevels-1
+          rdz = 2./(zgrid(k+2,iCell)+zgrid(k+1,iCell)-zgrid(k,iCell)-zgrid(k-1,iCell))
+          dudz(k) = (u(k+1,iCell)-u(k-1,iCell))*rdz
+          dvdz(k) = (v(k+1,iCell)-v(k-1,iCell))*rdz
+        end do
+
+        k = 1
+        rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
+        dudz(k) = (u(k+1,iCell)-u(k,iCell))*rdz
+        dvdz(k) = (v(k+1,iCell)-v(k,iCell))*rdz
+
+        k = nVertLevels-1
+        rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
+        dudz(k+1) = (u(k+1,iCell)-u(k,iCell))*rdz
+        dvdz(k+1) = (v(k+1,iCell)-v(k,iCell))*rdz
+
+        do k=1, nVertLevels
+          ! here is the 3D Smagorinsky formulation, 
+          ! followed by imposition of an upper bound on the eddy viscosity
+          d_11(k) = 2*dudx(k)
+          d_22(k) = 2*dvdy(k)
+          d_33(k) = 2*dwdz(k)
+          d_12(k) = dudy(k) + dvdx(k)
+          d_13(k) = dwdx(k) + dudz(k)
+          d_23(k) = dwdy(k) + dvdz(k)
+
+          def2 = 0.5*(d_11(k)**2 + d_22(k)**2 + d_33(k)**2) + d_12(k)**2 + d_13(k)**2 + d_23(k)**2
+
+          eddy_visc_horz(k,iCell) = (c_s * config_len_disp)**2 * max(0.,def2 - pr_inv*bv_freq2(k,iCell))
+          eddy_visc_horz(k,iCell) = min(eddy_visc_horz(k,iCell),(0.01*config_len_disp**2) * invDt)
+          eddy_visc_vert(k,iCell) = eddy_visc_horz(k,iCell)
+
+        end do
+
+      end do ! loop over all owned cells (columns)
+
+      end subroutine smagorinsky_3d
+
 !---------------------------------------                                                                                                              
 
       subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
@@ -361,165 +487,587 @@ end subroutine u_dissipation
 
 !------------------------
 
-      subroutine w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                nCells, nEdges,                                           &
-                                nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                invAreaCell, invDcEdge, dvEdge,                           &
-                                meshScalingDel2, meshScalingDel4,                         &
-                                rdzw, rdzu,                                               &
-                                v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
-                                delsq_w,                                                  &
-                                w, rho_edge, kdiff, rho_zz,                               &
-                                tend_w_euler                                             )
-
+      subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
+                                   cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
+                                   cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
+                                   nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign,                          &
+                                   invAreaCell, invAreaTriangle, invDvEdge, invDcEdge,                          &
+                                   angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4,                 &
+                                   config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
+                                   config_del4u_div_factor, zgrid,                                              &
+                                   eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
+                                   fzm, fzp, config_les_model,                                                  &
+                                   delsq_u, delsq_vorticity, delsq_divergence,                                  &
+                                   u, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, tend_u_euler    )
 
       use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
 
       implicit none
 
+      integer, intent(in) :: edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd
+      integer, intent(in) :: vertexStart, vertexEnd, vertexDegree
       integer, intent(in) :: cellStart, cellEnd
-      integer, intent(in) :: cellSolveStart, cellSolveEnd
-      integer, intent(in) :: nCells, nEdges
+      integer, intent(in) :: nCells, nEdges, nVertices
+      logical, intent(in) :: config_mix_full
 
-      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
-      integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
+      character (len=StrKIND) :: config_les_model
 
       integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+      integer, dimension(2,nEdges+1), intent(in) :: verticesOnEdge
+      integer, dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell
+      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+      integer, dimension(vertexDegree,nVertices+1), intent(in) :: edgesOnVertex
 
       real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
       real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
+      real (kind=RKIND), intent(in) :: config_del4u_div_factor
 
       real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
+      real (kind=RKIND), dimension(vertexDegree,nVertices+1), intent(in) :: edgesOnVertex_sign
+      real (kind=RKIND), dimension(nVertices+1), intent(in) :: invAreaTriangle
       real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
       real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDvEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: angleEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dcEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
       real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
       real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
-      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
-      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
+      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
 
-      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: kdiff
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: divergence
+      real (kind=RKIND), dimension(nVertLevels,nVertices+1), intent(in) :: vorticity
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzm
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzp
 
-      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(inout) :: tend_w_euler
 
-      ! storage passed in from calling routine                                                                                                        
-      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_w
+      !  scratch space from calling routine
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: delsq_u
+      real (kind=RKIND), dimension(nVertLevels,nVertices+1) :: delsq_vorticity
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_divergence
 
-      ! local variables                                                                                                                               
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: u_init, v_init
 
-      integer :: cell1, cell2, iEdge, iCell, i, k
-      real (kind=RKIND) :: r_areaCell, edge_sign, w_turb_flux
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(out) :: tend_u_euler
 
-!  !OMP BARRIER  why is this openmp barrier here???                                                                                                   
+      ! local variables                                                                                                                               
 
-         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (w) ).                                                                          
-       !                                                                                                                                            
-       ! First, storage to hold the result from the first del^2 computation.                                                                        
-       !  we copied code from the theta mixing, hence the theta* names.                                                                             
+      integer :: iEdge, cell1, cell2, vertex1, vertex2, iVertex, iCell, i, k
+      real (kind=RKIND) :: r_dc, r_dv, u_diffusion, kdiffu, r, edge_sign, u_mix_scale
+      real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
+      real (kind=RKIND), dimension(nVertLevels) :: u_mix
 
+      real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
+      real (kind=RKIND) :: rho_k_cell1, rho_k_cell2, rho_k_at_w
+      real (kind=RKIND) :: zz_cell1, zz_cell2, zz_at_w
 
-         delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0
+!$OMP BARRIER                                                                                                                                         
 
-         do iCell=cellStart,cellEnd
-            tend_w_euler(1:nVertLevels+1,iCell) = 0.0
-            r_areaCell = invAreaCell(iCell)
-            do i=1,nEdgesOnCell(iCell)
-               iEdge = edgesOnCell(i,iCell)
+         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).                                                                          
+         ! First, storage to hold the result from the first del^2 computation.                                                                        
 
-               edge_sign = 0.5 * r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+         delsq_u(1:nVertLevels,edgeStart:edgeEnd) = 0.0
 
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
+         do iEdge=edgeStart,edgeEnd
+            cell1 = cellsOnEdge(1,iEdge)
+            cell2 = cellsOnEdge(2,iEdge)
+            vertex1 = verticesOnEdge(1,iEdge)
+            vertex2 = verticesOnEdge(2,iEdge)
+            r_dc = invDcEdge(iEdge)
+            r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
 
 !DIR$ IVDEP                                                                                                                                           
-              do k=2,nVertLevels
+            do k=1,nVertLevels
+
+               ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity                                                         
+               !                    only valid for h_mom_eddy_visc4 == constant                                                                       
+              u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) * r_dc  &
+                              -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv
+
+               delsq_u(k,iEdge) = delsq_u(k,iEdge) + u_diffusion
+
+               kdiffu = 0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2))
+
+               ! include 2nd-orer diffusion here                                                                                                      
+               tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) &
+                                       + rho_edge(k,iEdge)* kdiffu * u_diffusion * meshScalingDel2(iEdge)
 
-                  w_turb_flux =  edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
-                  delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux
-                  w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * &
-                                  (kdiff(k,cell1)+kdiff(k,cell2)+kdiff(k-1,cell1)+kdiff(k-1,cell2))
-                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux
-               end do
             end do
          end do
 
+         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active                                                                               
+
 !$OMP BARRIER                                                                                                                                         
 
-         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active                                                                               
+            do iVertex=vertexStart,vertexEnd
+               delsq_vorticity(1:nVertLevels,iVertex) = 0.0
+               do i=1,vertexDegree
+                  iEdge = edgesOnVertex(i,iVertex)
+                  edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge) * edgesOnVertex_sign(i,iVertex)
+                  do k=1,nVertLevels
+                     delsq_vorticity(k,iVertex) = delsq_vorticity(k,iVertex) + edge_sign * delsq_u(k,iEdge)
+                  end do
+               end do
+            end do
 
-            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...                                                 
-               r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell)
+            do iCell=cellStart,cellEnd
+               delsq_divergence(1:nVertLevels,iCell) = 0.0
+               r = invAreaCell(iCell)
                do i=1,nEdgesOnCell(iCell)
                   iEdge = edgesOnCell(i,iCell)
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
-
-                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge)
-
-                  do k=2,nVertLevels
-                     tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1))
+                  edge_sign = r * dvEdge(iEdge) * edgesOnCell_sign(i,iCell)
+                  do k=1,nVertLevels
+                     delsq_divergence(k,iCell) = delsq_divergence(k,iCell) + edge_sign * delsq_u(k,iEdge)
                   end do
-
                end do
             end do
 
-         end if ! 4th order mixing is active                                                                                                          
+!$OMP BARRIER                                                                                                                                         
 
-      if ( v_mom_eddy_visc2 > 0.0 ) then  ! vertical mixing
+            do iEdge=edgeSolveStart,edgeSolveEnd
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
+               vertex1 = verticesOnEdge(1,iEdge)
+               vertex2 = verticesOnEdge(2,iEdge)
+
+               u_mix_scale = meshScalingDel4(iEdge)*h_mom_eddy_visc4
+               r_dc = u_mix_scale * config_del4u_div_factor * invDcEdge(iEdge)
+               r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
 
-         do iCell=cellSolveStart,cellSolveEnd
 !DIR$ IVDEP                                                                                                                                           
-            do k=2,nVertLevels
-               tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*(  &
-                                           (w(k+1,iCell)-w(k  ,iCell))*rdzw(k)                              &
-                                          -(w(k  ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k)
+               do k=1,nVertLevels
+
+                  ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity                                                      
+                  !                    only valid for h_mom_eddy_visc4 == constant                                                                    
+                  !                                                                                                                                   
+                  ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor                                           
+                  !    relative to the rotational part.  The stability constraint on the divergence component is much less                            
+                  !    stringent than the rotational part, and this flexibility may be useful.                                                        
+                  !                                                                                                                                   
+                  u_diffusion =  rho_edge(k,iEdge) *  ( ( delsq_divergence(k,cell2)  - delsq_divergence(k,cell1) ) * r_dc  &
+                                                       -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) * r_dv )
+                  tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - u_diffusion
+
+               end do
             end do
-         end do
 
-      end if
+         end if ! 4th order mixing is active                                                                                                          
 
-      end subroutine w_dissipation
+      !                                                                                                                                               
+      !  vertical mixing for u - 2nd order filter in physical (z) space                                                                               
+      !                                                                                                                                               
+         if ( v_mom_eddy_visc2 > 0.0 ) then
 
-!-----------------------------------------------------
+            if (config_mix_full) then  ! mix full state                                                                                               
 
-      subroutine theta_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                    nCells, nEdges,                                           &
-                                    nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                    invAreaCell, invDcEdge, dvEdge,                           &
-                                    meshScalingDel2, meshScalingDel4,                         &
-                                    config_mix_full, t_init, zgrid,                           &
-                                    rdzw, rdzu,                                               &
-                                    v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
-                                    delsq_theta,                                              &
-                                    theta_m, rho_edge, kdiff, rho_zz,                         &
-                                    tend_theta_euler                                         )
+               do iEdge=edgeSolveStart,edgeSolveEnd
 
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
 
-      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
-      implicit none
+                  do k=2,nVertLevels-1
 
-      integer, intent(in) :: cellStart, cellEnd
-      integer, intent(in) :: cellSolveStart, cellSolveEnd
-      integer, intent(in) :: nCells, nEdges
+                     z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
+                     z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
+                     z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
+                     z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
 
-      logical, intent(in) :: config_mix_full
+                     zm = 0.5*(z1+z2)
+                     z0 = 0.5*(z2+z3)
+                     zp = 0.5*(z3+z4)
 
-      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
-      integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
+                     tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &
+                                        (u(k+1,iEdge)-u(k  ,iEdge))/(zp-z0)                      &
+                                       -(u(k  ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm))
+                  end do
+               end do
 
-      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+            else  ! idealized cases where we mix on the perturbation from the initial 1-D state                                                       
 
-      real (kind=RKIND), intent(in) :: h_theta_eddy_visc4
-      real (kind=RKIND), intent(in) :: v_theta_eddy_visc2
-      real (kind=RKIND), intent(in) :: prandtl_inv
+               do iEdge=edgeSolveStart,edgeSolveEnd
 
-      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
-      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
+
+                  do k=1,nVertLevels
+                     u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) &
+                                           - v_init(k) * sin( angleEdge(iEdge) )
+                  end do
+
+                  do k=2,nVertLevels-1
+
+                     z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
+                     z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
+                     z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
+                     z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
+
+                     zm = 0.5*(z1+z2)
+                     z0 = 0.5*(z2+z3)
+                     zp = 0.5*(z3+z4)
+
+                     tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &
+                                        (u_mix(k+1)-u_mix(k  ))/(zp-z0)                      &
+                                       -(u_mix(k  )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm))
+                  end do
+               end do
+
+            end if  ! mix perturbation state                                                                                             
+
+         end if  ! vertical mixing of horizontal momentum for fixe viscosity
+
+         if ( config_les_model == "3d_smagorinsky") then
+
+               do iEdge=edgeSolveStart,edgeSolveEnd
+
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
+                  turb_vflux(nVertlevels+1) = 0.  !  no turbulent flux out of the domain
+                  turb_vflux(1) = 0.  !  lower bc flux handled where ???
+
+                  do k=2,nVertLevels
+                     rho_k_cell1 =  fzm(k)*rho_zz(k  ,cell1)*zz(k  ,cell1)*eddy_visc_vert(k  ,cell1) &
+                                   +fzp(k)*rho_zz(k-1,cell1)*zz(k-1,cell1)*eddy_visc_vert(k-1,cell1)
+                     rho_k_cell2 =  fzm(k)*rho_zz(k  ,cell2)*zz(k  ,cell2)*eddy_visc_vert(k  ,cell2) &
+                                   +fzp(k)*rho_zz(k-1,cell2)*zz(k-1,cell2)*eddy_visc_vert(k-1,cell2)
+                     rho_k_at_w = 0.5*(rho_k_cell1+rho_k_cell2)
+
+                     zz_cell1 = fzm(k)*zz(k,cell1)+fzp(k)*zz(k-1,cell1)
+                     zz_cell2 = fzm(k)*zz(k,cell2)+fzp(k)*zz(k-1,cell2)
+                     zz_at_w = 0.5*(zz_cell1+zz_cell2)
+                     turb_vflux(k) = rho_k_at_w*zz_at_w*rdzu(k)*(u(k,iEdge)-u(k-1,iEdge))
+                  end do
+
+                  do k=1,nVertLevels
+                    tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
+                  end do
+
+               end do
+
+         end if
+
+
+      end subroutine u_dissipation_3d
+
+!------------------------
+
+      subroutine w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                nCells, nEdges,                                           &
+                                nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                invAreaCell, invDcEdge, dvEdge,                           &
+                                meshScalingDel2, meshScalingDel4,                         &
+                                rdzw, rdzu,                                               &
+                                v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
+                                delsq_w,                                                  &
+                                w, rho_edge, kdiff, rho_zz,                               &
+                                tend_w_euler                                             )
+
+
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+
+      implicit none
+
+      integer, intent(in) :: cellStart, cellEnd
+      integer, intent(in) :: cellSolveStart, cellSolveEnd
+      integer, intent(in) :: nCells, nEdges
+
+      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+      integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
+
+      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+
+      real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
+      real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
+
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
+      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
+
+      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: kdiff
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
+
+      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(inout) :: tend_w_euler
+
+      ! storage passed in from calling routine                                                                                                        
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_w
+
+      ! local variables                                                                                                                               
+
+      integer :: cell1, cell2, iEdge, iCell, i, k
+      real (kind=RKIND) :: r_areaCell, edge_sign, w_turb_flux
+
+!  !OMP BARRIER  why is this openmp barrier here???                                                                                                   
+
+         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (w) ).                                                                          
+       !                                                                                                                                            
+       ! First, storage to hold the result from the first del^2 computation.                                                                        
+       !  we copied code from the theta mixing, hence the theta* names.                                                                             
+
+
+         delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0
+
+         do iCell=cellStart,cellEnd
+            tend_w_euler(1:nVertLevels+1,iCell) = 0.0
+            r_areaCell = invAreaCell(iCell)
+            do i=1,nEdgesOnCell(iCell)
+               iEdge = edgesOnCell(i,iCell)
+
+               edge_sign = 0.5 * r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
+
+!DIR$ IVDEP                                                                                                                                           
+              do k=2,nVertLevels
+
+                  w_turb_flux =  edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
+                  delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux
+                  w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * &
+                                  (kdiff(k,cell1)+kdiff(k,cell2)+kdiff(k-1,cell1)+kdiff(k-1,cell2))
+                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux
+               end do
+            end do
+         end do
+
+!$OMP BARRIER                                                                                                                                         
+
+         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active                                                                               
+
+            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...                                                 
+               r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell)
+               do i=1,nEdgesOnCell(iCell)
+                  iEdge = edgesOnCell(i,iCell)
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
+
+                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge)
+
+                  do k=2,nVertLevels
+                     tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1))
+                  end do
+
+               end do
+            end do
+
+         end if ! 4th order mixing is active                                                                                                          
+
+      if ( v_mom_eddy_visc2 > 0.0 ) then  ! vertical mixing
+
+         do iCell=cellSolveStart,cellSolveEnd
+!DIR$ IVDEP                                                                                                                                           
+            do k=2,nVertLevels
+               tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*(  &
+                                           (w(k+1,iCell)-w(k  ,iCell))*rdzw(k)                              &
+                                          -(w(k  ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k)
+            end do
+         end do
+
+      end if
+
+      end subroutine w_dissipation
+
+!------------------------
+
+      subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                   nCells, nEdges,                                           &
+                                   nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                   invAreaCell, invDcEdge, dvEdge,                           &
+                                   meshScalingDel2, meshScalingDel4,                         &
+                                   rdzw, rdzu,                                               &
+                                   v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
+                                   delsq_w,                                                  &
+                                   w, rho_edge, rho_zz, zz,                                  &
+                                   eddy_visc_horz, eddy_visc_vert,                           &
+                                   config_les_model,                                         &
+                                   tend_w_euler                                             )
+
+
+      ! 3D w dissipation using the 3D smagorinsky eddy viscosities.  
+      ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter
+
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+
+      implicit none
+
+      integer, intent(in) :: cellStart, cellEnd
+      integer, intent(in) :: cellSolveStart, cellSolveEnd
+      integer, intent(in) :: nCells, nEdges
+
+      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+      integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
+
+      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+
+      character (len=StrKIND) :: config_les_model
+
+      real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
+      real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
+
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
+      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
+
+      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
+
+      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(inout) :: tend_w_euler
+
+      ! storage passed in from calling routine                                                                                                        
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_w
+      real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
+
+      ! local variables                                                                                                                               
+
+      integer :: cell1, cell2, iEdge, iCell, i, k
+      real (kind=RKIND) :: r_areaCell, edge_sign, w_turb_flux
+
+!  !OMP BARRIER  why is this openmp barrier here???                                                                                                   
+
+         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (w) ).                                                                          
+       !                                                                                                                                            
+       ! First, storage to hold the result from the first del^2 computation.                                                                        
+       !  we copied code from the theta mixing, hence the theta* names.                                                                             
+
+
+         delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0
+
+         do iCell=cellStart,cellEnd
+            tend_w_euler(1:nVertLevels+1,iCell) = 0.0
+            r_areaCell = invAreaCell(iCell)
+            do i=1,nEdgesOnCell(iCell)
+               iEdge = edgesOnCell(i,iCell)
+
+               edge_sign = 0.5 * r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
+
+!DIR$ IVDEP                                                                                                                                           
+              do k=2,nVertLevels
+
+                  w_turb_flux =  edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
+                  delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux
+                  w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * &
+                                  ( eddy_visc_horz(k  ,cell1)+eddy_visc_horz(k  ,cell2)      &
+                                   +eddy_visc_horz(k-1,cell1)+eddy_visc_horz(k-1,cell2) )
+                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux
+               end do
+            end do
+         end do
+
+!$OMP BARRIER                                                                                                                                         
+
+         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active                                                                               
+
+            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...                                                 
+               r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell)
+               do i=1,nEdgesOnCell(iCell)
+                  iEdge = edgesOnCell(i,iCell)
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
+
+                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge)
+
+                  do k=2,nVertLevels
+                     tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1))
+                  end do
+
+               end do
+            end do
+
+         end if ! 4th order mixing is active                                                                                                          
+
+      if ( v_mom_eddy_visc2 > 0.0 ) then  ! vertical mixing
+
+         do iCell=cellSolveStart,cellSolveEnd
+!DIR$ IVDEP                                                                                                                                           
+            do k=2,nVertLevels
+               tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*(  &
+                                           (w(k+1,iCell)-w(k  ,iCell))*rdzw(k)                              &
+                                          -(w(k  ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k)
+            end do
+         end do
+
+      end if
+
+         if ( config_les_model == "3d_smagorinsky") then
+
+             do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
+                ! compute turbulent fluxes
+                do k=1,nVertLevels
+                  turb_vflux(k) = rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*zz(k,iCell)  &
+                                  *rdzu(k)*(w(k+1,iCell)-w(k,iCell))
+                end do
+                do k=2,nVertLevels
+                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) &
+                                              + rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
+                end do
+             end do
+
+          end if
+
+      end subroutine w_dissipation_3d
+
+!-----------------------------------------------------
+
+      subroutine theta_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                    nCells, nEdges,                                           &
+                                    nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                    invAreaCell, invDcEdge, dvEdge,                           &
+                                    meshScalingDel2, meshScalingDel4,                         &
+                                    config_mix_full, t_init, zgrid,                           &
+                                    rdzw, rdzu,                                               &
+                                    v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+                                    delsq_theta,                                              &
+                                    theta_m, rho_edge, kdiff, rho_zz,                         &
+                                    tend_theta_euler                                         )
+
+
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+      implicit none
+
+      integer, intent(in) :: cellStart, cellEnd
+      integer, intent(in) :: cellSolveStart, cellSolveEnd
+      integer, intent(in) :: nCells, nEdges
+
+      logical, intent(in) :: config_mix_full
+
+      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+      integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
+
+      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+
+      real (kind=RKIND), intent(in) :: h_theta_eddy_visc4
+      real (kind=RKIND), intent(in) :: v_theta_eddy_visc2
+      real (kind=RKIND), intent(in) :: prandtl_inv
+
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
+      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
       real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
       real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
       real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
@@ -636,4 +1184,194 @@ subroutine theta_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
       end subroutine theta_dissipation
 
+!-----------------------------------------------------
+
+      subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                       nCells, nEdges,                                           &
+                                       nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                       invAreaCell, invDcEdge, dvEdge,                           &
+                                       meshScalingDel2, meshScalingDel4,                         &
+                                       config_mix_full, t_init, zgrid,                           &
+                                       rdzw, rdzu, fzm, fzp,                                     &
+                                       v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+                                       delsq_theta,                                              &
+                                       theta_m, rho_edge, rho_zz, zz,                            &
+                                       eddy_visc_horz, eddy_visc_vert,                           &
+                                       config_les_model,                                         &
+                                       tend_theta_euler                                         )
+
+
+      ! 3D theta_m dissipation using the 3D smagorinsky eddy viscosities.  
+      ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter
+
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+      implicit none
+
+      integer, intent(in) :: cellStart, cellEnd
+      integer, intent(in) :: cellSolveStart, cellSolveEnd
+      integer, intent(in) :: nCells, nEdges
+
+      logical, intent(in) :: config_mix_full
+
+      character (len=StrKIND) :: config_les_model
+
+      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+      integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
+
+      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+
+      real (kind=RKIND), intent(in) :: h_theta_eddy_visc4
+      real (kind=RKIND), intent(in) :: v_theta_eddy_visc2
+      real (kind=RKIND), intent(in) :: prandtl_inv
+
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
+      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzm
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzp
+      real (kind=RKIND), dimension(nVertLevels+1, nCells+1), intent(in) :: zgrid
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz
+
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: t_init
+
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
+
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(inout) :: tend_theta_euler
+
+      ! storage passed in from calling routine                                                                                                        
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_theta
+
+      ! local variables                                                                                                                               
+      integer :: cell1, cell2, iEdge, iCell, i, k
+      real (kind=RKIND) :: r_areaCell, edge_sign, theta_turb_flux, pr_scale
+      real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
+      real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
+      real (kind=RKIND) :: rho_k_at_w, zz_at_w
+
+         delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
+
+         do iCell=cellStart,cellEnd
+            tend_theta_euler(1:nVertLevels,iCell) = 0.0
+            r_areaCell = invAreaCell(iCell)
+            do i=1,nEdgesOnCell(iCell)
+               iEdge = edgesOnCell(i,iCell)
+               edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+               pr_scale = prandtl_inv * meshScalingDel2(iEdge)
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
+!DIR$ IVDEP
+               do k=1,nVertLevels
+
+!  we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below.
+!  This is in conservative form.
+
+                  theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge)
+                  delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
+                  theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_vert(k,cell2)) * pr_scale
+                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux
+
+               end do
+            end do
+          end do
+
+!$OMP BARRIER
+            
+         if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
+
+            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
+               r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
+               do i=1,nEdgesOnCell(iCell)
+
+                  iEdge = edgesOnCell(i,iCell)
+                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
+
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
+
+                  do k=1,nVertLevels
+                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
+                  end do
+               end do
+            end do
+
+         end if ! 4th order mixing is active 
+
+         !  idealized case vertical mixing
+
+         if ( v_theta_eddy_visc2 > 0.0 ) then  ! vertical mixing for theta_m
+
+            if (config_mix_full) then
+
+               do iCell = cellSolveStart,cellSolveEnd
+                  do k=2,nVertLevels-1
+                     z1 = zgrid(k-1,iCell)
+                     z2 = zgrid(k  ,iCell)
+                     z3 = zgrid(k+1,iCell)
+                     z4 = zgrid(k+2,iCell)
+
+                     zm = 0.5*(z1+z2)
+                     z0 = 0.5*(z2+z3)
+                     zp = 0.5*(z3+z4)
+
+                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
+                                              (theta_m(k+1,iCell)-theta_m(k  ,iCell))/(zp-z0)                 &
+                                             -(theta_m(k  ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
+                  end do
+               end do
+
+         else  ! idealized cases where we mix on the perturbation from the initial 1-D state
+
+               do iCell = cellSolveStart,cellSolveEnd
+                  do k=2,nVertLevels-1
+                     z1 = zgrid(k-1,iCell)
+                     z2 = zgrid(k  ,iCell)
+                     z3 = zgrid(k+1,iCell)
+                     z4 = zgrid(k+2,iCell)
+
+                     zm = 0.5*(z1+z2)
+                     z0 = 0.5*(z2+z3)
+                     zp = 0.5*(z3+z4)
+
+                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
+                                              ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k  ,iCell)-t_init(k,iCell)))/(zp-z0)      &
+                                             -((theta_m(k  ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
+                  end do
+               end do
+
+            end if
+
+         end if
+
+         if ( config_les_model == "3d_smagorinsky") then
+
+             do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
+                ! compute turbulent fluxes
+                turb_vflux(nVertlevels+1) = 0.  !  no turbulent flux out of the domain
+                turb_vflux(1) = 0.  !  lower bc flux handled where ???
+                do k=2,nVertLevels
+                  rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
+                               +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
+                  zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
+                  turb_vflux(k) = rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
+                end do
+
+                do k=1,nVertLevels
+                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) &
+                                              + rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
+                end do
+             end do
+
+          end if
+
+      end subroutine theta_dissipation_3d
+
 end module mpas_atm_dissipation_models

From 88aa0b2246fba3155c45154583d0db5265a42b60 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Mon, 9 Mar 2020 15:12:55 -0600
Subject: [PATCH 026/146] Added a horizontal eddy_viscosity array to go along
 with the vertical eddy viscosity array.  For use in the LES models.

---
 src/core_atmosphere/Registry.xml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index ad6a27b4f3..7d56a042c5 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -1960,7 +1960,10 @@
                      description="Smagorinsky horizontal eddy viscosity"/>
 
                 <var name="eddy_visc_vert" type="real" dimensions="nVertLevels nCells Time" units="m^2 s^{-1}"
-                     description="vertical eddy viscosity"/>
+                     description="vertical eddy viscosity for les models"/>
+
+                <var name="eddy_visc_horz" type="real" dimensions="nVertLevels nCells Time" units="m^2 s^{-1}"
+                     description="horizontal eddy viscosity for les models"/>
 
                 <var name="bn2" type="real" dimensions="nVertLevels nCells Time" units="s^{-2}"
                      description="Square of Brunt-Vaisala frequency"/>

From 05f206c6e516ae2237001a044af9350a39872a9f Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Mon, 9 Mar 2020 15:14:22 -0600
Subject: [PATCH 027/146] Bug fixes in the new LES mixing routines.

---
 .../dynamics/mpas_atm_dissipation_models.F    | 47 ++++++++++++-------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 37a9d3aabf..e42d3551f9 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -119,6 +119,8 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
       real (kind=RKIND) :: rdz, def2, pr_inv, wk
 
       pr_inv = 1./prandtl
+      ! testing
+      pr_inv = 0.
 
       do iCell = cellStart,cellEnd
 
@@ -136,8 +138,8 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
         do iEdge=1,nEdgesOnCell(iCell)
 
           ie = EdgesOnCell(iEdge,iCell)
-          cell1 = edgesOnCell(1,ie)
-          cell2 = edgesOnCell(2,ie)
+          cell1 = cellsOnEdge(1,ie)
+          cell2 = cellsOnEdge(2,ie)
 
           do k=1,nVertLevels
             dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,ie) &
@@ -152,8 +154,8 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
 
           do k=1,nVertLevels+1
             wk = 0.5*(w(k,cell1)+w(k,cell2))
-            dwdx(k) = dwdx(k) + deformation_coef_c(iEdge,iCell)*wk
-            dwdy(k) = dwdy(k) + deformation_coef_s(iEdge,iCell)*wk
+             dwdx(k) = dwdx(k) + deformation_coef_c(iEdge,iCell)*wk
+             dwdy(k) = dwdy(k) + deformation_coef_s(iEdge,iCell)*wk
           end do
 
         end do
@@ -165,33 +167,33 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
 
         do k=2,nVertLevels-1
           rdz = 2./(zgrid(k+2,iCell)+zgrid(k+1,iCell)-zgrid(k,iCell)-zgrid(k-1,iCell))
-          dudz(k) = (u(k+1,iCell)-u(k-1,iCell))*rdz
-          dvdz(k) = (v(k+1,iCell)-v(k-1,iCell))*rdz
+          dudz(k) = (uCell(k+1,iCell)-uCell(k-1,iCell))*rdz
+          dvdz(k) = (vCell(k+1,iCell)-vCell(k-1,iCell))*rdz
         end do
 
         k = 1
         rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
-        dudz(k) = (u(k+1,iCell)-u(k,iCell))*rdz
-        dvdz(k) = (v(k+1,iCell)-v(k,iCell))*rdz
+        dudz(k) = (uCell(k+1,iCell)-uCell(k,iCell))*rdz
+        dvdz(k) = (vCell(k+1,iCell)-vCell(k,iCell))*rdz
 
         k = nVertLevels-1
         rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
-        dudz(k+1) = (u(k+1,iCell)-u(k,iCell))*rdz
-        dvdz(k+1) = (v(k+1,iCell)-v(k,iCell))*rdz
+        dudz(k+1) = (uCell(k+1,iCell)-uCell(k,iCell))*rdz
+        dvdz(k+1) = (vCell(k+1,iCell)-vCell(k,iCell))*rdz
 
         do k=1, nVertLevels
           ! here is the 3D Smagorinsky formulation, 
           ! followed by imposition of an upper bound on the eddy viscosity
-          d_11(k) = 2*dudx(k)
-          d_22(k) = 2*dvdy(k)
-          d_33(k) = 2*dwdz(k)
+          d_11(k) = 2.*dudx(k)
+          d_22(k) = 2.*dvdy(k)
+          d_33(k) = 2.*dwdz(k)
           d_12(k) = dudy(k) + dvdx(k)
           d_13(k) = dwdx(k) + dudz(k)
           d_23(k) = dwdy(k) + dvdz(k)
 
           def2 = 0.5*(d_11(k)**2 + d_22(k)**2 + d_33(k)**2) + d_12(k)**2 + d_13(k)**2 + d_23(k)**2
 
-          eddy_visc_horz(k,iCell) = (c_s * config_len_disp)**2 * max(0.,def2 - pr_inv*bv_freq2(k,iCell))
+          eddy_visc_horz(k,iCell) = (c_s * config_len_disp)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
           eddy_visc_horz(k,iCell) = min(eddy_visc_horz(k,iCell),(0.01*config_len_disp**2) * invDt)
           eddy_visc_vert(k,iCell) = eddy_visc_horz(k,iCell)
 
@@ -745,6 +747,11 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                      turb_vflux(k) = rho_k_at_w*zz_at_w*rdzu(k)*(u(k,iEdge)-u(k-1,iEdge))
                   end do
 
+                  ! test conditions for supercell case
+                  turb_vflux(1) = turb_vflux(2)
+                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+                  ! end test conditions
+
                   do k=1,nVertLevels
                     tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
                   end do
@@ -1276,7 +1283,7 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
 
                   theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge)
                   delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
-                  theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_vert(k,cell2)) * pr_scale
+                  theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) * pr_scale
                   tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux
 
                end do
@@ -1351,7 +1358,8 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
 
          end if
 
-         if ( config_les_model == "3d_smagorinsky") then
+         ! test section
+         ! if ( config_les_model == "3d_smagorinsky") then
 
              do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
                 ! compute turbulent fluxes
@@ -1364,13 +1372,18 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
                   turb_vflux(k) = rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
                 end do
 
+                ! test boundary conditions for supercell case
+                turb_vflux(1) = turb_vflux(2)
+                turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+
                 do k=1,nVertLevels
                   tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) &
                                               + rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
                 end do
              end do
 
-          end if
+          ! end if
+          ! end test section
 
       end subroutine theta_dissipation_3d
 

From 92fbf81b8e16a0b28502e4e227c9159b76267344 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Thu, 4 Dec 2025 14:15:30 -0700
Subject: [PATCH 028/146] added test calls to the new 3D LES schemes.  This is
 currently hardwired to always call these LES routines, in this case for the
 sueprcell test case. Code compiles and runs for the supercell test case.

---
 .../dynamics/mpas_atm_time_integration.F      | 126 ++++++++++++++----
 1 file changed, 103 insertions(+), 23 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 5b5edebf62..9406ac6d2b 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4781,6 +4781,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
                                                     rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
                                                     h_divergence, kdiff, bn2, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save
 
+      real (kind=RKIND), dimension(:,:), pointer :: eddy_visc_horz, eddy_visc_vert
+
       real (kind=RKIND), dimension(:,:), pointer :: theta_m_save
 
       real (kind=RKIND), dimension(:,:), pointer :: exner
@@ -4874,6 +4876,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_array(diag, 'rho_p_save', rr_save)
       call mpas_pool_get_array(diag, 'v', v)
       call mpas_pool_get_array(diag, 'kdiff', kdiff)
+      call mpas_pool_get_array(diag, 'eddy_visc_horz', eddy_visc_horz)
+      call mpas_pool_get_array(diag, 'eddy_visc_vert', eddy_visc_vert)
       call mpas_pool_get_array(diag, 'bn2', bn2)
       call mpas_pool_get_array(diag, 'ru', ru)
       call mpas_pool_get_array(diag, 'ru_save', ru_save)
@@ -4978,7 +4982,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
          weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
          divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
          rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
-         h_divergence, kdiff, bn2, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
+         h_divergence, kdiff, bn2, eddy_visc_horz, eddy_visc_vert,  &
+         edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
          theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
          cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, &
          latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
@@ -5004,7 +5009,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
       divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
       rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
-      h_divergence, kdiff, bn2, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
+      h_divergence, kdiff, bn2, eddy_visc_horz, eddy_visc_vert,  &
+      edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
       theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
       cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, &
       latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
@@ -5072,6 +5078,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: cqu
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: h_divergence
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: kdiff
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_horz
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_vert
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: bn2
       real (kind=RKIND), dimension(maxEdges,nCells+1) :: edgesOnCell_sign
       real (kind=RKIND), dimension(vertexDegree,nVertices+1) :: edgesOnVertex_sign
@@ -5136,6 +5144,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND) :: config_visc4_2dsmag
       real (kind=RKIND) :: config_len_disp
       real (kind=RKIND) :: config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2
+      logical, parameter :: test_dissipation_3d=.true.
 
       integer, intent(in) :: rk_step
       real (kind=RKIND), intent(in) :: dt
@@ -5262,6 +5271,19 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                     cellStart, cellEnd, nEdgesOnCell, edgesOnCell,                     &
                                     nCells, nEdges                                                    )
 
+! testing - 3D smagorinsky computation
+                 call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
+                                    cellStart, cellEnd, nCells)
+
+                 call smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, ur_cell, vr_cell,            &
+                                      w, c_s, bn2, zgrid, config_len_disp,                               &
+                                      deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
+                                      deformation_coef_c, deformation_coef_s,                            &
+                                      invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
+                                      cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
+                                      nCells, nEdges, nVertLevels, maxEdges                             )
+! testing
+
             else if(config_horiz_mixing == "2d_fixed") then
 
 ! testing
@@ -5476,6 +5498,23 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
 !$OMP BARRIER
 
+          if(test_dissipation_3d) then
+
+            call  u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
+                                    cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
+                                    cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
+                                    nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign,                          &
+                                    invAreaCell, invAreaTriangle, invDvEdge, invDcEdge,                          &
+                                    angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4,                 &
+                                    config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
+                                    config_del4u_div_factor, zgrid,                                              &
+                                    eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
+                                    fzm, fzp, config_les_model,                                                  &
+                                    delsq_u, delsq_vorticity, delsq_divergence,                                  &
+                                    u, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, tend_u_euler    )
+
+          else
+
             call  u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
                                  cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
                                  cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
@@ -5487,6 +5526,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                  delsq_u, delsq_vorticity, delsq_divergence,                                  &
                                  u, divergence, vorticity, rho_edge, u_init, v_init, tend_u_euler            )
 
+          end if
+
       end if ! (rk_step 1 test for computing mixing terms)
 
 !$OMP BARRIER
@@ -5613,16 +5654,35 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
       if (rk_step == 1) then
 
-         call  w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                              nCells, nEdges,                                           &
-                              nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                              invAreaCell, invDcEdge, dvEdge,                           &
-                              meshScalingDel2, meshScalingDel4,                         &
-                              rdzw, rdzu,                                               &
-                              v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
-                              delsq_w,                                                  &
-                              w, rho_edge, kdiff, rho_zz,                               &
-                              tend_w_euler                                             )
+          if(test_dissipation_3d) then
+
+            call  w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                    nCells, nEdges,                                           &
+                                    nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                    invAreaCell, invDcEdge, dvEdge,                           &
+                                    meshScalingDel2, meshScalingDel4,                         &
+                                    rdzw, rdzu,                                               &
+                                    v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
+                                    delsq_w,                                                  &
+                                    w, rho_edge, rho_zz, zz,                                  &
+                                    eddy_visc_horz, eddy_visc_vert,                           &
+                                    config_les_model,                                         &
+                                    tend_w_euler                                             )
+
+          else
+
+            call  w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                 nCells, nEdges,                                           &
+                                 nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                 invAreaCell, invDcEdge, dvEdge,                           &
+                                 meshScalingDel2, meshScalingDel4,                         &
+                                 rdzw, rdzu,                                               &
+                                 v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
+                                 delsq_w,                                                  &
+                                 w, rho_edge, kdiff, rho_zz,                               &
+                                 tend_w_euler                                             )
+
+          end if
 
       end if ! horizontal mixing for w computed in first rk_step
 
@@ -5767,17 +5827,37 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
       if (rk_step == 1) then
 
-          call theta_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                  nCells, nEdges,                                           &
-                                  nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                  invAreaCell, invDcEdge, dvEdge,                           &
-                                  meshScalingDel2, meshScalingDel4,                         &
-                                  config_mix_full, t_init, zgrid,                           &
-                                  rdzw, rdzu,                                               &
-                                  v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
-                                  delsq_theta,                                              &
-                                  theta_m, rho_edge, kdiff, rho_zz,                         &
-                                  tend_theta_euler                                         )
+          if(test_dissipation_3d) then
+
+            call theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                       nCells, nEdges,                                           &
+                                       nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                       invAreaCell, invDcEdge, dvEdge,                           &
+                                       meshScalingDel2, meshScalingDel4,                         &
+                                       config_mix_full, t_init, zgrid,                           &
+                                       rdzw, rdzu, fzm, fzp,                                              &
+                                       v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+                                       delsq_theta,                                              &
+                                       theta_m, rho_edge, rho_zz, zz,                            &
+                                       eddy_visc_horz, eddy_visc_vert,                           &
+                                       config_les_model,                                         &
+                                       tend_theta_euler                                         )
+
+          else
+
+            call theta_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                    nCells, nEdges,                                           &
+                                    nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                    invAreaCell, invDcEdge, dvEdge,                           &
+                                    meshScalingDel2, meshScalingDel4,                         &
+                                    config_mix_full, t_init, zgrid,                           &
+                                    rdzw, rdzu,                                               &
+                                    v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+                                    delsq_theta,                                              &
+                                    theta_m, rho_edge, kdiff, rho_zz,                         &
+                                    tend_theta_euler                                         )
+
+          end if
 
       end if ! theta mixing calculated first rk_step
 

From 1eaffdfc68ffcbe06fdfb84757b9a07f7d605ed8 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Mon, 9 Mar 2020 15:56:00 -0600
Subject: [PATCH 029/146] Bug fixes for the supercell test case using the 3D
 Smagorinsky LES model. Fixes are for both configuration and algorithm errors.

---
 .../dynamics/mpas_atm_dissipation_models.F        | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index e42d3551f9..bc1db0d6e7 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -166,7 +166,7 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
         end do
 
         do k=2,nVertLevels-1
-          rdz = 2./(zgrid(k+2,iCell)+zgrid(k+1,iCell)-zgrid(k,iCell)-zgrid(k-1,iCell))
+          rdz = 1./(zgrid(k+2,iCell)+zgrid(k+1,iCell)-zgrid(k,iCell)-zgrid(k-1,iCell))
           dudz(k) = (uCell(k+1,iCell)-uCell(k-1,iCell))*rdz
           dvdz(k) = (vCell(k+1,iCell)-vCell(k-1,iCell))*rdz
         end do
@@ -725,7 +725,8 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
          end if  ! vertical mixing of horizontal momentum for fixe viscosity
 
-         if ( config_les_model == "3d_smagorinsky") then
+         ! test section - always execute this code
+         ! if ( config_les_model == "3d_smagorinsky") then
 
                do iEdge=edgeSolveStart,edgeSolveEnd
 
@@ -758,7 +759,8 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
                end do
 
-         end if
+         !  end if 
+         !  end test section
 
 
       end subroutine u_dissipation_3d
@@ -1020,7 +1022,8 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
       end if
 
-         if ( config_les_model == "3d_smagorinsky") then
+         ! test section - always execute this code
+         ! if ( config_les_model == "3d_smagorinsky") then
 
              do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
                 ! compute turbulent fluxes
@@ -1034,7 +1037,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
                 end do
              end do
 
-          end if
+          ! end if
 
       end subroutine w_dissipation_3d
 
@@ -1358,7 +1361,7 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
 
          end if
 
-         ! test section
+         ! test section - always execute this code
          ! if ( config_les_model == "3d_smagorinsky") then
 
              do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column

From eb4b3ef1887469a26a2d778622dab02fd15ecea7 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Mon, 9 Mar 2020 16:38:52 -0600
Subject: [PATCH 030/146] switched signs of the vertical turbulent fluxes and
 vertical flux divergence in the 3D mixing routines to conform to convention. 
 no change in results.

---
 .../dynamics/mpas_atm_dissipation_models.F         | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index bc1db0d6e7..c9c3f3325f 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -745,7 +745,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                      zz_cell1 = fzm(k)*zz(k,cell1)+fzp(k)*zz(k-1,cell1)
                      zz_cell2 = fzm(k)*zz(k,cell2)+fzp(k)*zz(k-1,cell2)
                      zz_at_w = 0.5*(zz_cell1+zz_cell2)
-                     turb_vflux(k) = rho_k_at_w*zz_at_w*rdzu(k)*(u(k,iEdge)-u(k-1,iEdge))
+                     turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(u(k,iEdge)-u(k-1,iEdge))
                   end do
 
                   ! test conditions for supercell case
@@ -754,7 +754,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                   ! end test conditions
 
                   do k=1,nVertLevels
-                    tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
+                    tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
                   end do
 
                end do
@@ -1028,12 +1028,12 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
              do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
                 ! compute turbulent fluxes
                 do k=1,nVertLevels
-                  turb_vflux(k) = rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*zz(k,iCell)  &
-                                  *rdzu(k)*(w(k+1,iCell)-w(k,iCell))
+                  turb_vflux(k) = - rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*zz(k,iCell)  &
+                                    *rdzu(k)*(w(k+1,iCell)-w(k,iCell))
                 end do
                 do k=2,nVertLevels
                   tend_w_euler(k,iCell) = tend_w_euler(k,iCell) &
-                                              + rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
+                                              - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
                 end do
              end do
 
@@ -1372,7 +1372,7 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
                   rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
                                +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
                   zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
-                  turb_vflux(k) = rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
+                  turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
                 end do
 
                 ! test boundary conditions for supercell case
@@ -1381,7 +1381,7 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
 
                 do k=1,nVertLevels
                   tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) &
-                                              + rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
+                                              - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
                 end do
              end do
 

From dbd83ea5686cfdaa081aebb05e30e9c30f212952 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Thu, 4 Dec 2025 14:20:31 -0700
Subject: [PATCH 031/146] Added new lower boundary conditions for LES
 simulations: (1) constant heat flux (2) simple surface drag cleaned up the
 driving layer in subroutine atm_compute_dyn_tend. convective boundary layer
 test results look OK.

---
 .../dynamics/mpas_atm_dissipation_models.F    |  98 +++++++++---
 .../dynamics/mpas_atm_time_integration.F      | 141 ++++++++----------
 2 files changed, 139 insertions(+), 100 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index c9c3f3325f..dad19bd059 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -11,6 +11,12 @@ module mpas_atm_dissipation_models
   use mpas_kind_types, only : RKIND
   use mpas_atmphys_constants
   use mpas_constants
+  use mpas_log
+
+  logical, parameter :: debug_dissipation = .true.
+  logical, parameter :: les_test = .true.
+  real (kind=RKIND), parameter :: tke_heat_flux = 0.24
+  real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013
 
     contains
 
@@ -42,6 +48,9 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
       integer :: iCell, iEdge, k
       real (kind=RKIND), dimension(nVertLevels) :: d_11, d_22, d_12, dudx, dudy, dvdx, dvdy
 
+        if(debug_dissipation)  call mpas_log_write(' begin smagorinsky_2d ')
+
+
             do iCell = cellStart,cellEnd
                dudx(1:nVertLevels) = 0.0
                dudy(1:nVertLevels) = 0.0
@@ -74,6 +83,8 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
             h_mom_eddy_visc4   = config_visc4_2dsmag * config_len_disp**3
             h_theta_eddy_visc4 = h_mom_eddy_visc4
 
+        if(debug_dissipation)  call mpas_log_write(' exiting smagorinsky_2d ')
+
       end subroutine smagorinsky_2d
 
 !---------------------------------------
@@ -107,7 +118,7 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
       integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
 
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: eddy_visc_horz, eddy_visc_vert
-      real (kind=RKIND), intent(in) :: h_mom_eddy_visc4, h_theta_eddy_visc4
+      real (kind=RKIND), intent(out) :: h_mom_eddy_visc4, h_theta_eddy_visc4
 
       ! local variables
 
@@ -118,9 +129,9 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
       real (kind=RKIND), dimension(nVertLevels) :: dudz, dvdz, dwdz
       real (kind=RKIND) :: rdz, def2, pr_inv, wk
 
+      if(debug_dissipation)  call mpas_log_write(' begin smagorinsky_3d ')
+
       pr_inv = 1./prandtl
-      ! testing
-      pr_inv = 0.
 
       do iCell = cellStart,cellEnd
 
@@ -201,6 +212,13 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
 
       end do ! loop over all owned cells (columns)
 
+      ! set up coefficients for 4th-order horizontal background filter
+
+      h_mom_eddy_visc4   = config_visc4_2dsmag * config_len_disp**3
+      h_theta_eddy_visc4 = h_mom_eddy_visc4
+
+      if(debug_dissipation)  call mpas_log_write(' exiting smagorinsky_3d ')
+
       end subroutine smagorinsky_3d
 
 !---------------------------------------                                                                                                              
@@ -217,11 +235,12 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
 ! local
-      real (kind=RKIND) :: dz, esw, qc_cr
+      real (kind=RKIND) :: dz, esw
+      real (kind=RKIND), parameter :: qc_cr = 0.00001  ! in kg/kg
       real (kind=RKIND), dimension(nVertLevels) :: theta, qvsw, temp, coefa
 
-            qc_cr = 0.00001         ! in kg/kg
-            
+         if(debug_dissipation)  call mpas_log_write(' begin BV frequency calculations ')
+
             do iCell = cellStart,cellEnd
 !DIR$ IVDEP
                do k=1, nVertLevels
@@ -260,6 +279,8 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
 
             end do
 
+         if(debug_dissipation)  call mpas_log_write(' exiting BV frequency calculations ')
+
       end subroutine calculate_n2
 
 !---------------------------------------                                                                                                              
@@ -500,7 +521,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                                    eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
                                    fzm, fzp, config_les_model,                                                  &
                                    delsq_u, delsq_vorticity, delsq_divergence,                                  &
-                                   u, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, tend_u_euler    )
+                                   u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, tend_u_euler )
 
       use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
 
@@ -538,6 +559,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
 
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: divergence
       real (kind=RKIND), dimension(nVertLevels,nVertices+1), intent(in) :: vorticity
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
@@ -571,6 +593,15 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       real (kind=RKIND) :: rho_k_cell1, rho_k_cell2, rho_k_at_w
       real (kind=RKIND) :: zz_cell1, zz_cell2, zz_at_w
 
+      real (kind=RKIND) :: velocity_magnitude
+
+        if(debug_dissipation) then
+            call mpas_log_write(' begin u_dissipation_3d ')
+            call mpas_log_write(' les model is '//trim(config_les_model))
+            call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_mom_eddy_visc4/))
+            call mpas_log_write(' 4th order divergence factor is $r ', realArgs=(/config_del4u_div_factor/))
+        end if
+
 !$OMP BARRIER                                                                                                                                         
 
          ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).                                                                          
@@ -725,8 +756,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
          end if  ! vertical mixing of horizontal momentum for fixe viscosity
 
-         ! test section - always execute this code
-         ! if ( config_les_model == "3d_smagorinsky") then
+        if ( config_les_model == "3d_smagorinsky") then
 
                do iEdge=edgeSolveStart,edgeSolveEnd
 
@@ -748,10 +778,17 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                      turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(u(k,iEdge)-u(k-1,iEdge))
                   end do
 
+                if( les_test ) then
+                  velocity_magnitude = sqrt(u(1,iEdge)**2 + v(1,iEdge)**2)
+                  turb_vflux(1) = -rho_edge(1,iEdge)*tke_drag_coefficient*u(1,iEdge)*velocity_magnitude
+                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+                  ! end test conditions
+                else
                   ! test conditions for supercell case
                   turb_vflux(1) = turb_vflux(2)
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
                   ! end test conditions
+                end if
 
                   do k=1,nVertLevels
                     tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
@@ -759,9 +796,9 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
                end do
 
-         !  end if 
-         !  end test section
+         end if 
 
+        if(debug_dissipation)  call mpas_log_write(' exiting u_dissipation_3d ')
 
       end subroutine u_dissipation_3d
 
@@ -960,6 +997,11 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
        ! First, storage to hold the result from the first del^2 computation.                                                                        
        !  we copied code from the theta mixing, hence the theta* names.                                                                             
 
+        if(debug_dissipation) then
+            call mpas_log_write(' begin w_dissipation_3d ')
+            call mpas_log_write(' les model is '//trim(config_les_model))
+            call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_mom_eddy_visc4/))
+        end if
 
          delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0
 
@@ -1022,8 +1064,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
       end if
 
-         ! test section - always execute this code
-         ! if ( config_les_model == "3d_smagorinsky") then
+         if ( config_les_model == "3d_smagorinsky") then
 
              do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
                 ! compute turbulent fluxes
@@ -1037,7 +1078,9 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
                 end do
              end do
 
-          ! end if
+          end if
+
+        if(debug_dissipation)  call mpas_log_write(' exiting w_dissipation_3d ')
 
       end subroutine w_dissipation_3d
 
@@ -1267,6 +1310,12 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
       real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
       real (kind=RKIND) :: rho_k_at_w, zz_at_w
 
+        if(debug_dissipation) then
+            call mpas_log_write(' begin theta_dissipation_3d ')
+            call mpas_log_write(' les model is '//trim(config_les_model))
+            call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_theta_eddy_visc4/))
+        end if
+
          delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
 
          do iCell=cellStart,cellEnd
@@ -1361,8 +1410,7 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
 
          end if
 
-         ! test section - always execute this code
-         ! if ( config_les_model == "3d_smagorinsky") then
+         if ( config_les_model == "3d_smagorinsky") then
 
              do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
                 ! compute turbulent fluxes
@@ -1375,9 +1423,16 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
                   turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
                 end do
 
-                ! test boundary conditions for supercell case
-                turb_vflux(1) = turb_vflux(2)
-                turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+                ! test boundary conditions for supercell and les test cases
+
+                if( les_test ) then
+                  turb_vflux(1) = tke_heat_flux*rho_zz(1,iCell)  ! this is correct for DRY CASE ONLY
+                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+                else
+                  turb_vflux(1) = turb_vflux(2)
+                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+                end if
+
 
                 do k=1,nVertLevels
                   tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) &
@@ -1385,8 +1440,9 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
                 end do
              end do
 
-          ! end if
-          ! end test section
+         end if
+
+        if(debug_dissipation)  call mpas_log_write(' exiting theta_dissipation_3d ')
 
       end subroutine theta_dissipation_3d
 
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 9406ac6d2b..a04c9cc97a 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5265,13 +5265,22 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
             if(config_horiz_mixing == "2d_smagorinsky") then
 
-               call smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,                                 &
+               call smagorinsky_2d( eddy_visc_horz, u, v, c_s, config_len_disp,                        &
                                     deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
                                     invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
                                     cellStart, cellEnd, nEdgesOnCell, edgesOnCell,                     &
                                     nCells, nEdges                                                    )
 
-! testing - 3D smagorinsky computation
+            else if(config_horiz_mixing == "2d_fixed") then
+
+               eddy_visc_horz(1:nVertLevels,cellStart:cellEnd) = config_h_theta_eddy_visc2
+               h_mom_eddy_visc4 = config_h_mom_eddy_visc4
+               h_theta_eddy_visc4 = config_h_theta_eddy_visc4
+
+            end if
+
+         else if (config_les_model == "3d_smagorinsky") then
+
                  call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
                                     cellStart, cellEnd, nCells)
 
@@ -5282,35 +5291,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                       invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
                                       cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
                                       nCells, nEdges, nVertLevels, maxEdges                             )
-! testing
-
-            else if(config_horiz_mixing == "2d_fixed") then
-
-! testing
-!                 call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
-!                                    cellStart, cellEnd, nCells)
-! testing
-
-               !$acc parallel default(present)
-               !$acc loop gang worker
-               do iCell = cellStart, cellEnd
-                  !$acc loop vector
-                  do k = 1, nVertLevels
-                     kdiff(k,iCell) = config_h_theta_eddy_visc2
-                  end do
-               end do
-               !$acc end parallel
-
-               h_mom_eddy_visc4 = config_h_mom_eddy_visc4
-               h_theta_eddy_visc4 = config_h_theta_eddy_visc4
-
-            end if
-
-         else if (config_les_model == "3d_smagorinsky") then
-
-            ! call to 3D smagorinsky here...
-                  call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
-                                     cellStart, cellEnd, nCells)
 
          end if
 
@@ -5329,6 +5309,9 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                   visc2cam = visc2cam*(1.0-real(nVertLevels-k)/real(config_number_cam_damping_levels))
                   kdiff(k  ,iCell) = max(kdiff(k  ,iCell),visc2cam)
                end do
+               eddy_visc_horz(nVertLevels-2,iCell) = max(eddy_visc_horz(nVertLevels-2,iCell),    2.0833*config_len_disp*config_mpas_cam_coef)
+               eddy_visc_horz(nVertLevels-1,iCell) = max(eddy_visc_horz(nVertLevels-1,iCell),2.0*2.0833*config_len_disp*config_mpas_cam_coef)
+               eddy_visc_horz(nVertLevels  ,iCell) = max(eddy_visc_horz(nVertLevels  ,iCell),4.0*2.0833*config_len_disp*config_mpas_cam_coef)
             end do
             !$acc end parallel
 
@@ -5498,7 +5481,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
 !$OMP BARRIER
 
-          if(test_dissipation_3d) then
+!          if(test_dissipation_3d) then
 
             call  u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
                                     cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
@@ -5511,22 +5494,22 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                     eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
                                     fzm, fzp, config_les_model,                                                  &
                                     delsq_u, delsq_vorticity, delsq_divergence,                                  &
-                                    u, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, tend_u_euler    )
+                                    u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, tend_u_euler )
 
-          else
-
-            call  u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
-                                 cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
-                                 cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
-                                 nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign,                          &
-                                 invAreaCell, invAreaTriangle, invDvEdge, invDcEdge,                          &
-                                 angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4,                 &
-                                 config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
-                                 config_del4u_div_factor, zgrid, kdiff,                                       &
-                                 delsq_u, delsq_vorticity, delsq_divergence,                                  &
-                                 u, divergence, vorticity, rho_edge, u_init, v_init, tend_u_euler            )
-
-          end if
+!          else  ! this is the original MPAS dissipation code
+!
+!            call  u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
+!                                 cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
+!                                 cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
+!                                 nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign,                          &
+!                                 invAreaCell, invAreaTriangle, invDvEdge, invDcEdge,                          &
+!                                 angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4,                 &
+!                                 config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
+!                                 config_del4u_div_factor, zgrid, kdiff,                                       &
+!                                 delsq_u, delsq_vorticity, delsq_divergence,                                  &
+!                                 u, divergence, vorticity, rho_edge, u_init, v_init, tend_u_euler            )
+!
+!          end if
 
       end if ! (rk_step 1 test for computing mixing terms)
 
@@ -5654,7 +5637,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
       if (rk_step == 1) then
 
-          if(test_dissipation_3d) then
+!          if(test_dissipation_3d) then
 
             call  w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
                                     nCells, nEdges,                                           &
@@ -5669,22 +5652,22 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                     config_les_model,                                         &
                                     tend_w_euler                                             )
 
-          else
-
-            call  w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                 nCells, nEdges,                                           &
-                                 nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                 invAreaCell, invDcEdge, dvEdge,                           &
-                                 meshScalingDel2, meshScalingDel4,                         &
-                                 rdzw, rdzu,                                               &
-                                 v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
-                                 delsq_w,                                                  &
-                                 w, rho_edge, kdiff, rho_zz,                               &
-                                 tend_w_euler                                             )
-
-          end if
+!          else
+!
+!            call  w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+!                                 nCells, nEdges,                                           &
+!                                 nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+!                                 invAreaCell, invDcEdge, dvEdge,                           &
+!                                 meshScalingDel2, meshScalingDel4,                         &
+!                                 rdzw, rdzu,                                               &
+!                                 v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
+!                                 delsq_w,                                                  &
+!                                 w, rho_edge, kdiff, rho_zz,                               &
+!                                 tend_w_euler                                             )
+!
+!          end if
 
-      end if ! horizontal mixing for w computed in first rk_step
+      end if ! mixing for w computed in first rk_step
 
 ! Note for OpenMP parallelization: We could avoid allocating the delsq_w scratch
 !   array, and just use the delsq_theta array as was previously done; however,
@@ -5827,7 +5810,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
       if (rk_step == 1) then
 
-          if(test_dissipation_3d) then
+!          if(test_dissipation_3d) then
 
             call theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
                                        nCells, nEdges,                                           &
@@ -5843,21 +5826,21 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                        config_les_model,                                         &
                                        tend_theta_euler                                         )
 
-          else
-
-            call theta_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                    nCells, nEdges,                                           &
-                                    nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                    invAreaCell, invDcEdge, dvEdge,                           &
-                                    meshScalingDel2, meshScalingDel4,                         &
-                                    config_mix_full, t_init, zgrid,                           &
-                                    rdzw, rdzu,                                               &
-                                    v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
-                                    delsq_theta,                                              &
-                                    theta_m, rho_edge, kdiff, rho_zz,                         &
-                                    tend_theta_euler                                         )
-
-          end if
+!          else  ! this is the original MPAS dissipation code
+!
+!            call theta_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+!                                    nCells, nEdges,                                           &
+!                                    nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+!                                    invAreaCell, invDcEdge, dvEdge,                           &
+!                                    meshScalingDel2, meshScalingDel4,                         &
+!                                    config_mix_full, t_init, zgrid,                           &
+!                                    rdzw, rdzu,                                               &
+!                                    v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+!                                    delsq_theta,                                              &
+!                                    theta_m, rho_edge, kdiff, rho_zz,                         &
+!                                    tend_theta_euler                                         )
+!
+!          end if
 
       end if ! theta mixing calculated first rk_step
 

From 59f1348c754f6c41383776b0a03be457193f1076 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Mon, 16 Mar 2020 15:52:47 -0600
Subject: [PATCH 032/146] Added a scalar variable named tke.  This is the
 turbulent kinetic energy used in the prognostic 1.5 order TKE (LES) scheme. 
 Initialized the values to 0.1 (i.e. a small number) becuase there must be a
 seed to grow tke.

---
 src/core_init_atmosphere/Registry.xml          |  2 ++
 src/core_init_atmosphere/mpas_init_atm_cases.F | 10 ++++++++++
 2 files changed, 12 insertions(+)

diff --git a/src/core_init_atmosphere/Registry.xml b/src/core_init_atmosphere/Registry.xml
index d24568c866..b4f4622246 100644
--- a/src/core_init_atmosphere/Registry.xml
+++ b/src/core_init_atmosphere/Registry.xml
@@ -1196,6 +1196,8 @@
                         <var name="nwfa" array_group="number" units="nb kg^{-1}"
                              description="Gocart water-friendly aerosol number concentration"
                              packages="mp_thompson_aers_in"/>
+                        <var name="tke" array_group="turbulence" units="m^2 s^{-2}"
+                             description="Turbulent kinetic energy for the prognostic tke LES scheme"/>
                 </var_array>
 
                 <!-- Climatology for ocean mixed-layer depth -->
diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index 82b2517e76..0160139fb8 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -6235,6 +6235,7 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
       integer :: iCell, iCell1, iCell2 , iEdge, vtx1, vtx2, ivtx, i, k, nz, nz1, itr, cell1, cell2
       integer, pointer :: nEdges, nVertices, maxEdges, nCellsSolve
       integer, pointer :: index_qv
+      integer, pointer :: index_tke
 
       real (kind=RKIND), dimension(nVertLevels + 1 ) :: znu, znw, znwc, znwv
       real (kind=RKIND), dimension(nVertLevels + 1 ) :: znuc, znuv
@@ -6368,6 +6369,7 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
       call mpas_pool_get_array(state, 'scalars', scalars)
 
       call mpas_pool_get_dimension(state, 'index_qv', index_qv)
+      call mpas_pool_get_dimension(state, 'index_tke', index_tke)
 
       scalars(:,:,:) = 0.
 
@@ -6607,6 +6609,14 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
            t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i))
         end do
       end do
+!
+!  initial seed for tke
+!
+      do i=1,nCells
+        do k = 1,nz1  
+          scalars(index_tke,k,i) = 0.1
+        end do
+      end do
 
       do itr=1,30
 

From 31f21ff571126a8ea48eb19845ff1991c53f2e9c Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Tue, 17 Mar 2020 11:09:52 -0600
Subject: [PATCH 033/146] Added passive test of prognostic tke.  Does not yet
 pass debug step.

---
 src/core_atmosphere/Registry.xml              |  10 ++
 .../dynamics/mpas_atm_dissipation_models.F    | 111 ++++++++++++++++--
 .../dynamics/mpas_atm_time_integration.F      |  21 +++-
 3 files changed, 124 insertions(+), 18 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 7d56a042c5..f78ca1b2fb 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -1696,6 +1696,12 @@
                         <var name="nr" array_group="number" units="nb kg^{-1}"
                              description="Rain number concentration"
                              packages="mp_thompson_in;mp_thompson_aers_in"/>
+                             packages="mp_thompson_in"/>
+
+                        <var name="tke" array_group="turbulence" units="m^2 s^{-2}"
+                             description="Turbulent kinetic energy for the prognostic tke LES scheme"/>
+
+                </var_array>
 
                         <var name="nc" array_group="number" units="nb kg^{-1}"
                              description="Cloud water number concentration"
@@ -2071,6 +2077,10 @@
                         <var name="tend_nwfa" name_in_code="nwfa" array_group="number" units="nb m^{-3} s^{-1}"
                              description="Tendency of water-friendly aerosol number concentration multiplied by dry air density divided by d(zeta)/dz"
                              packages="mp_thompson_aers_in"/>
+                             packages="mp_thompson_in"/>
+
+                        <var name="tend_tke" name_in_code="tke" array_group="turbulence" units="kg m^{-3} m^2 s^{-2} s^{-1}" 
+                             description="Tendency of tke  multiplied by dry air density divided by d(zeta)/dz"/>
                 </var_array>
 #endif
         </var_struct>
diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index dad19bd059..ea5181a8f7 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -89,22 +89,26 @@ end subroutine smagorinsky_2d
 
 !---------------------------------------
 
-      subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,              &
+      subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,                &
                                  w, c_s, bv_freq2, zgrid, config_len_disp,                          &
                                  deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
                                  deformation_coef_c, deformation_coef_s,                            &
                                  invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
+                                 scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2,         &
                                  cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
-                                 nCells, nEdges, nVertLevels, maxEdges                             )
+                                 nCells, nEdges, nVertLevels, maxEdges, num_scalars                )
 
       implicit none
       
-      integer, intent(in) :: cellStart, cellEnd, nCells, nEdges, nVertLevels, maxEdges
+      integer, intent(in) :: cellStart, cellEnd, nCells, nEdges, nVertLevels, maxEdges, index_tke, num_scalars
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: uCell
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: vCell
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: bv_freq2
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+      real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
+      real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1) :: tend_scalars
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
       real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_c2
@@ -112,6 +116,7 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
       real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_cs
       real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_c
       real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_s
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
       real (kind=RKIND), intent(in) :: c_s, config_len_disp, invDt, config_visc4_2dsmag
       integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
       integer, dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell
@@ -128,6 +133,11 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
       real (kind=RKIND), dimension(nVertLevels+1) :: dwdx, dwdy
       real (kind=RKIND), dimension(nVertLevels) :: dudz, dvdz, dwdz
       real (kind=RKIND) :: rdz, def2, pr_inv, wk
+      real (kind=RKIND) :: shear_production, buoyancy, dissipation, delta_z, delta_s, bv, tke_length, diss_length
+      real (kind=RKIND) :: l_horizontal, l_vertical, c_k, c_dissipation
+      real (kind=RKIND) :: prandtl_horizontal_inv, prandtl_vertical_inv
+
+      logical, parameter :: test_tke=.true.
 
       if(debug_dissipation)  call mpas_log_write(' begin smagorinsky_3d ')
 
@@ -210,6 +220,63 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
 
         end do
 
+!***************************************************
+
+        if (test_tke) then
+
+          do k=1,nVertLevels
+
+            shear_production = eddy_visc_horz(k,iCell)*(d_11(k)**2 + d_22(k)**2 + d_12(k)**2) &
+                              +eddy_visc_vert(k,iCell)*(d_33(k)**2 + d_13(k)**2 + d_13(k)**2)
+
+            buoyancy = -eddy_visc_vert(k,iCell)*bv_freq2(k,iCell)
+
+            ! dissipation                                                                                     
+
+            delta_z = zgrid(k+1,iCell)-zgrid(k,iCell)
+            delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
+            bv = max( sqrt(abs(bv_freq2(k,iCell))), epsilon )
+            tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/bv
+
+            diss_length = min(delta_s,tke_length)
+            if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
+
+            c_k = 0.25
+            c_dissipation = 1.9*c_k + max( 0.0, 0.93 - 1.9*c_k )*diss_length/delta_s
+
+            dissipation = -c_dissipation*(scalars(index_tke,k,iCell)**(1.5))/diss_length
+
+            ! computing eddy viscosities *********                                                            
+
+            ! non-isotropic mixing                                                                            
+
+            l_horizontal = config_len_disp
+            l_vertical = min(delta_z,tke_length)
+            if(bv_freq2(k,iCell) <= 0) diss_length = delta_z
+
+            ! isotropic mixing
+
+            l_horizontal = min(delta_s,tke_length)
+            if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
+            l_vertical = l_horizontal
+
+            prandtl_horizontal_inv = 3.
+            prandtl_vertical_inv = 1.0+(2.0*l_vertical/delta_z) ! going to need to store this off somewhere   
+
+            ! eddy viscocities set here if we are running the 1.5 order prognostic tke scheme
+            ! eddy_visc_horz(k,iCell) = c_k*l_horizontal*sqrt(scalars(index_tke,k,iCell))
+            ! eddy_visc_vert(k,iCell) = c_k*l_vertical*sqrt(scalars(index_tke,k,iCell))
+
+            ! RHS term for the subgrid ke.                                                                    
+
+            tend_scalars(index_tke,k,iCell) = rho_zz(k,iCell)*( shear_production + buoyancy + dissipation )
+
+          end do
+
+        end if ! end of test_tke
+
+!***************************************************
+
       end do ! loop over all owned cells (columns)
 
       ! set up coefficients for 4th-order horizontal background filter
@@ -235,9 +302,10 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
 ! local
-      real (kind=RKIND) :: dz, esw
+      real (kind=RKIND) :: dz, rdz, esw
       real (kind=RKIND), parameter :: qc_cr = 0.00001  ! in kg/kg
       real (kind=RKIND), dimension(nVertLevels) :: theta, qvsw, temp, coefa
+      logical :: dry_bv_frequency
 
          if(debug_dissipation)  call mpas_log_write(' begin BV frequency calculations ')
 
@@ -261,18 +329,37 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
 
                do k=2, nVertLevels-1
                   dz = 0.5 * (zgrid(k+2,iCell)+zgrid(k+1,iCell)) - 0.5 * (zgrid(k,iCell)+zgrid(k-1,iCell))
-
-                  if ( scalars(index_qc,k,iCell) < qc_cr ) then
+                  rdz = 1.0/dz
+
+                  ! if ( scalars(index_qc,k,iCell) < qc_cr ) then
+                  !   ! Dry Brunt-Vaisala frequency
+                  !   bn2(k,iCell) = gravity * ((theta(k+1) - theta(k-1) ) / theta(k)  / dz  &
+                  !                + rvord * (scalars(index_qv,k+1,iCell) - scalars(index_qv,k-1,iCell)) / dz &
+                  !               - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
+                  ! else
+                  !   ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
+                  !   bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) / dz & 
+                  !               + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) / dz ) &
+                  !               - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
+                  ! endif
+
+                  dry_bv_frequency = .true.
+                  if(index_qc .gt. 0) then  ! if moist simulation, qc exists
+                    if ( scalars(index_qc,k,iCell) .ge. qc_cr ) dry_bv_frequency = .false.
+                  end if
+
+                  if (dry_bv_frequency) then
                      ! Dry Brunt-Vaisala frequency
-                     bn2(k,iCell) = gravity * ((theta(k+1) - theta(k-1) ) / theta(k)  / dz  &
-                                  + rvord * (scalars(index_qv,k+1,iCell) - scalars(index_qv,k-1,iCell)) / dz &
-                                 - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
+                     bn2(k,iCell) = gravity * ((theta(k+1) - theta(k-1) ) / theta(k)  * rdz  &
+                                  + rvord * (scalars(index_qv,k+1,iCell) - scalars(index_qv,k-1,iCell)) * rdz &
+                                 - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) * rdz )
                   else
                      ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
-                     bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) / dz & 
-                                 + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) / dz ) &
-                                 - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
+                     bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) * rdz & 
+                                 + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) * rdz ) &
+                                 - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) * rdz )
                   endif
+
                end do
                bn2(1,iCell) = bn2(2,iCell)
                bn2(nVertLevels,iCell) = bn2(nVertLevels-1,iCell)
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index a04c9cc97a..a3dd869be5 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4791,7 +4791,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
 
       real (kind=RKIND), dimension(:,:), pointer :: rthdynten
 
-      real (kind=RKIND), dimension(:,:,:), pointer :: scalars
+      real (kind=RKIND), dimension(:,:,:), pointer :: scalars, tend_scalars
 
       real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler
 
@@ -4835,7 +4835,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       real (kind=RKIND), pointer :: config_rayleigh_damp_u_timescale_days
       integer, pointer :: config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels
       integer, pointer :: config_number_rayleigh_damp_u_levels
-      integer, pointer :: index_qv, index_qc
+      integer, pointer :: index_qv, index_qc, index_tke
 
       logical :: inactive_rthdynten
 
@@ -4947,6 +4947,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_array(tend, 'w_euler', tend_w_euler)
       call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf)
       call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy)
+      call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars)
+
 
       call mpas_pool_get_array(diag, 'cqw', cqw)
       call mpas_pool_get_array(diag, 'cqu', cqu)
@@ -4965,6 +4967,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_dimension(state, 'moist_end', moist_end)
       call mpas_pool_get_dimension(state, 'index_qv', index_qv)
       call mpas_pool_get_dimension(state, 'index_qc', index_qc)
+      call mpas_pool_get_dimension(state, 'index_tke', index_tke)
 
       call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell)
       call mpas_pool_get_array(mesh, 'nAdvCellsForEdge', nAdvCellsForEdge)
@@ -4978,11 +4981,12 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
 
       call atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels, &
          nCellsSolve, nEdgesSolve, vertexDegree, maxEdges, maxEdges2, num_scalars, index_qv, index_qc, moist_start, moist_end, &
+         tend_scalars, &
          fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, meshScalingDel2, meshScalingDel4, &
          weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
          divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
          rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
-         h_divergence, kdiff, bn2, eddy_visc_horz, eddy_visc_vert,  &
+         h_divergence, kdiff, bn2, eddy_visc_horz, eddy_visc_vert, index_tke, &
          edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
          theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
          cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, &
@@ -5005,11 +5009,12 @@ end subroutine atm_compute_dyn_tend
 
    subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dummy, &
       nCellsSolve, nEdgesSolve, vertexDegree, maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, index_qc, moist_start, moist_end, &
+      tend_scalars, &
       fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, meshScalingDel2, meshScalingDel4, &
       weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
       divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
       rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
-      h_divergence, kdiff, bn2, eddy_visc_horz, eddy_visc_vert,  &
+      h_divergence, kdiff, bn2, eddy_visc_horz, eddy_visc_vert, index_tke, &
       edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
       theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
       cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, &
@@ -5038,7 +5043,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       ! Dummy arguments
       !
       integer :: nCells, nEdges, nVertices, nVertLevels_dummy, nCellsSolve, nEdgesSolve, vertexDegree, &
-                 maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, index_qc, moist_start, moist_end
+                 maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, index_qc, moist_start, moist_end, index_tke
 
       real (kind=RKIND), dimension(nEdges+1) :: fEdge
       real (kind=RKIND), dimension(nEdges+1) :: dvEdge
@@ -5090,6 +5095,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: exner
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: rr_save
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1) :: scalars
+      real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1) :: tend_scalars
       real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: tend_u_euler
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: tend_w_euler
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: tend_theta_euler
@@ -5281,6 +5287,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
          else if (config_les_model == "3d_smagorinsky") then
 
+                 ! call mpas_log_write(' BV call, index qv, qc, tke $i $i $i ', intArgs=(/index_qv, index_qc, index_tke/))
+
                  call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
                                     cellStart, cellEnd, nCells)
 
@@ -5289,8 +5297,9 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                       deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
                                       deformation_coef_c, deformation_coef_s,                            &
                                       invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
+                                      scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2,         &
                                       cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
-                                      nCells, nEdges, nVertLevels, maxEdges                             )
+                                      nCells, nEdges, nVertLevels, maxEdges, num_scalars                )
 
          end if
 

From 7e1581b745dbbb19e617c44188417318add61a97 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Tue, 17 Mar 2020 19:59:43 -0600
Subject: [PATCH 034/146] 1.5 order prognostic tke mods. The prognostic tke
 does not feed back on the solution in this formulation.  further testing
 ahead.

---
 .../dynamics/mpas_atm_dissipation_models.F    | 46 +++++++++++--------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index ea5181a8f7..0d76397baf 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -136,8 +136,10 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
       real (kind=RKIND) :: shear_production, buoyancy, dissipation, delta_z, delta_s, bv, tke_length, diss_length
       real (kind=RKIND) :: l_horizontal, l_vertical, c_k, c_dissipation
       real (kind=RKIND) :: prandtl_horizontal_inv, prandtl_vertical_inv
+      real (kind=RKIND) :: eddy_visc_h, eddy_visc_v
 
       logical, parameter :: test_tke=.true.
+      real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
 
       if(debug_dissipation)  call mpas_log_write(' begin smagorinsky_3d ')
 
@@ -226,28 +228,13 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
 
           do k=1,nVertLevels
 
-            shear_production = eddy_visc_horz(k,iCell)*(d_11(k)**2 + d_22(k)**2 + d_12(k)**2) &
-                              +eddy_visc_vert(k,iCell)*(d_33(k)**2 + d_13(k)**2 + d_13(k)**2)
-
-            buoyancy = -eddy_visc_vert(k,iCell)*bv_freq2(k,iCell)
-
-            ! dissipation                                                                                     
-
             delta_z = zgrid(k+1,iCell)-zgrid(k,iCell)
             delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
-            bv = max( sqrt(abs(bv_freq2(k,iCell))), epsilon )
+            bv = max( sqrt(abs(bv_freq2(k,iCell))), epsilon_bv )
             tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/bv
-
             diss_length = min(delta_s,tke_length)
             if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
 
-            c_k = 0.25
-            c_dissipation = 1.9*c_k + max( 0.0, 0.93 - 1.9*c_k )*diss_length/delta_s
-
-            dissipation = -c_dissipation*(scalars(index_tke,k,iCell)**(1.5))/diss_length
-
-            ! computing eddy viscosities *********                                                            
-
             ! non-isotropic mixing                                                                            
 
             l_horizontal = config_len_disp
@@ -256,9 +243,30 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
 
             ! isotropic mixing
 
-            l_horizontal = min(delta_s,tke_length)
-            if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
-            l_vertical = l_horizontal
+            ! l_horizontal = min(delta_s,tke_length)
+            ! if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
+            ! l_vertical = l_horizontal
+
+            ! eddy viscocities set here if we are running the 1.5 order prognostic tke scheme
+            eddy_visc_h = c_k*l_horizontal*sqrt(scalars(index_tke,k,iCell))
+            eddy_visc_v = c_k*l_vertical*sqrt(scalars(index_tke,k,iCell))
+
+            ! terms for the prognostic tke integration
+
+            shear_production = eddy_visc_h*(d_11(k)**2 + d_22(k)**2 + d_12(k)**2) &
+                              +eddy_visc_v*(d_33(k)**2 + d_13(k)**2 + d_13(k)**2)
+
+            buoyancy = -eddy_visc_v*bv_freq2(k,iCell)
+
+            ! dissipation                                                                                     
+
+            c_k = 0.25
+            c_dissipation = 1.9*c_k + max( 0.0, 0.93 - 1.9*c_k )*diss_length/delta_s
+            if( (k.eq. 1) .or. (k.eq.nVertLevels) ) c_dissipation = 3.9
+
+            dissipation = -c_dissipation*(scalars(index_tke,k,iCell)**(1.5))/diss_length
+
+            ! computing eddy viscosities *********                                                            
 
             prandtl_horizontal_inv = 3.
             prandtl_vertical_inv = 1.0+(2.0*l_vertical/delta_z) ! going to need to store this off somewhere   

From 84ad242fffcee936e513556eb762d90ee7c7c43c Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Wed, 18 Mar 2020 16:50:43 -0600
Subject: [PATCH 035/146] Added mixing routines for scalars within the LES
 models, both diagnostic and prognostic versions.  The progostic tke option is
 still running in a no-feedback mode.

---
 .../dynamics/mpas_atm_dissipation_models.F    | 320 ++++++++++++++++++
 .../dynamics/mpas_atm_time_integration.F      |  44 ++-
 2 files changed, 351 insertions(+), 13 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 0d76397baf..510b213184 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -1541,4 +1541,324 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
 
       end subroutine theta_dissipation_3d
 
+!-----------------------------------------------------
+
+      subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                            nCells, nEdges,                                           &
+                                            nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                            invAreaCell, invDcEdge, dvEdge,                           &
+                                            meshScalingDel2, meshScalingDel4,                         &
+                                            config_mix_full, t_init, zgrid,                           &
+                                            rdzw, rdzu, fzm, fzp,                                     &
+                                            v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+                                            delsq_theta,                                              &
+                                            theta_m, rho_edge, rho_zz, zz,                            &
+                                            eddy_visc_horz, eddy_visc_vert,                           &
+                                            bv_freq2, config_len_disp, scalars, tend_scalars,         &
+                                            index_tke, index_qv, num_scalars_dummy, mix_scalars,      &
+                                            config_les_model,                                         &
+                                            tend_theta_euler                                         )
+
+
+      ! 3D theta_m dissipation using the 3D smagorinsky eddy viscosities.  
+      ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter
+
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+      implicit none
+
+      integer, intent(in) :: cellStart, cellEnd
+      integer, intent(in) :: cellSolveStart, cellSolveEnd
+      integer, intent(in) :: nCells, nEdges
+      integer, intent(in) :: num_scalars_dummy
+      integer, intent(in) :: index_tke, index_qv
+
+      logical, intent(in) :: config_mix_full, mix_scalars
+
+      character (len=StrKIND) :: config_les_model
+
+      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+      integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
+
+      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+
+      real (kind=RKIND), intent(in) :: h_theta_eddy_visc4
+      real (kind=RKIND), intent(in) :: v_theta_eddy_visc2
+      real (kind=RKIND), intent(in) :: prandtl_inv
+      real (kind=RKIND), intent(in) :: config_len_disp
+
+      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
+      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
+      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzm
+      real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzp
+      real (kind=RKIND), dimension(nVertLevels+1, nCells+1), intent(in) :: zgrid
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz
+
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: t_init
+
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: bv_freq2
+      real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
+      real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(inout) :: tend_scalars
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
+
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(inout) :: tend_theta_euler
+
+      ! storage passed in from calling routine                                                                                                        
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_theta
+
+      ! local variables                                                                                                                               
+      integer :: cell1, cell2, iEdge, iCell, i, k, iScalar
+      real (kind=RKIND) :: r_areaCell, edge_sign, theta_turb_flux, pr_scale
+      real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
+      real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
+      real (kind=RKIND), dimension(num_scalars,nVertLevels+1) :: turb_vflux_scalars
+      real (kind=RKIND) :: rho_k_at_w, zz_at_w
+
+      logical, parameter :: vmix_scalars = .true.
+      logical, parameter :: hmix_scalars = .true.
+      logical, parameter :: hmix4_scalars = .false.
+
+        if(debug_dissipation) then
+            call mpas_log_write(' begin scalar_dissipation_3d ')
+            call mpas_log_write(' les model is '//trim(config_les_model))
+            call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_theta_eddy_visc4/))
+        end if
+
+         delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
+
+         do iCell=cellStart,cellEnd
+            tend_theta_euler(1:nVertLevels,iCell) = 0.0
+            r_areaCell = invAreaCell(iCell)
+            do i=1,nEdgesOnCell(iCell)
+               iEdge = edgesOnCell(i,iCell)
+               edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+               pr_scale = prandtl_inv * meshScalingDel2(iEdge)
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
+!DIR$ IVDEP
+               do k=1,nVertLevels
+
+!  we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below.
+!  This is in conservative form.
+
+                  theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge)
+                  delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
+                  theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) * pr_scale
+                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux
+
+               end do
+            end do
+          end do
+
+!$OMP BARRIER
+            
+         if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
+
+            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
+               r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
+               do i=1,nEdgesOnCell(iCell)
+
+                  iEdge = edgesOnCell(i,iCell)
+                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
+
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
+
+                  do k=1,nVertLevels
+                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
+                  end do
+               end do
+            end do
+
+         end if ! 4th order mixing is active 
+
+       if(mix_scalars .and. hmix_scalars) then  ! dissipation for scalars, including 4th-order filter.  Likely needs optimization
+         
+         do iScalar=1,num_scalars
+         
+         delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
+
+         do iCell=cellStart,cellEnd
+            ! tend_theta_euler(1:nVertLevels,iCell) = 0.0
+            r_areaCell = invAreaCell(iCell)
+            do i=1,nEdgesOnCell(iCell)
+               iEdge = edgesOnCell(i,iCell)
+               edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+               pr_scale = prandtl_inv * meshScalingDel2(iEdge)
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
+!DIR$ IVDEP
+               do k=1,nVertLevels
+
+!  we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below.
+!  This is in conservative form.
+
+                  theta_turb_flux = edge_sign*(scalars(iScalar,k,cell2) - scalars(iScalar,k,cell1))*rho_edge(k,iEdge)
+                  delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
+                  theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) * pr_scale
+                  tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) + theta_turb_flux
+
+               end do
+            end do
+          end do
+
+!$OMP BARRIER
+            
+         if ((h_theta_eddy_visc4 > 0.0).and. hmix4_scalars) then  ! 4th order mixing is active
+
+            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
+               r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
+               do i=1,nEdgesOnCell(iCell)
+
+                  iEdge = edgesOnCell(i,iCell)
+                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
+
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
+
+                  do k=1,nVertLevels
+                     tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
+                  end do
+               end do
+            end do
+
+         end if ! 4th order mixing is active 
+         
+        end do ! loop over scalars for horizontal mixing
+
+      end if ! horizontal scalar mixing
+
+
+         !  idealized case vertical mixing.  No scalar mixing here.
+
+         if ( v_theta_eddy_visc2 > 0.0 ) then  ! vertical mixing for theta_m
+
+            if (config_mix_full) then
+
+               do iCell = cellSolveStart,cellSolveEnd
+                  do k=2,nVertLevels-1
+                     z1 = zgrid(k-1,iCell)
+                     z2 = zgrid(k  ,iCell)
+                     z3 = zgrid(k+1,iCell)
+                     z4 = zgrid(k+2,iCell)
+
+                     zm = 0.5*(z1+z2)
+                     z0 = 0.5*(z2+z3)
+                     zp = 0.5*(z3+z4)
+
+                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
+                                              (theta_m(k+1,iCell)-theta_m(k  ,iCell))/(zp-z0)                 &
+                                             -(theta_m(k  ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
+                  end do
+               end do
+
+         else  ! idealized cases where we mix on the perturbation from the initial 1-D state
+
+               do iCell = cellSolveStart,cellSolveEnd
+                  do k=2,nVertLevels-1
+                     z1 = zgrid(k-1,iCell)
+                     z2 = zgrid(k  ,iCell)
+                     z3 = zgrid(k+1,iCell)
+                     z4 = zgrid(k+2,iCell)
+
+                     zm = 0.5*(z1+z2)
+                     z0 = 0.5*(z2+z3)
+                     zp = 0.5*(z3+z4)
+
+                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
+                                              ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k  ,iCell)-t_init(k,iCell)))/(zp-z0)      &
+                                             -((theta_m(k  ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
+                  end do
+               end do
+
+            end if
+
+         end if
+
+         if ( config_les_model /= "none") then
+
+             do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
+                ! compute turbulent fluxes
+                turb_vflux(nVertlevels+1) = 0.  !  no turbulent flux out of the domain
+                turb_vflux(1) = 0.  !  lower bc flux handled where ???
+                do k=2,nVertLevels
+                  rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
+                               +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
+                  zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
+                  turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
+                end do
+
+                ! test boundary conditions for supercell and les test cases
+
+                if( les_test ) then
+                  turb_vflux(1) = tke_heat_flux*rho_zz(1,iCell)  ! this is correct for DRY CASE ONLY
+                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+                else
+                  turb_vflux(1) = turb_vflux(2)
+                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+                end if
+
+
+                do k=1,nVertLevels
+                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) &
+                                              - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
+                end do
+
+             end do
+
+             if (mix_scalars .and. vmix_scalars) then
+
+             do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
+                ! compute turbulent fluxes
+                turb_vflux_scalars(:,nVertlevels+1) = 0.  !  no turbulent flux out of the domain
+                turb_vflux_scalars(:,1) = 0.  !  lower bc flux handled where ???
+                do k=2,nVertLevels
+                  rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
+                               +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
+                  zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
+                  do iScalar=1,num_scalars
+                     turb_vflux_scalars(iScalar,k) = - rho_k_at_w*zz_at_w*rdzu(k)*  &
+                                                      (scalars(iScalar,k,iCell)-scalars(iScalar,k-1,iCell))
+                  end do
+                end do
+
+                ! test boundary conditions for supercell and les test cases
+
+            ! need lower bc for qv here...
+
+            !    if( les_test ) then
+            !      turb_vflux_scalars(1) = tke_heat_flux*rho_zz(1,iCell)  ! this is correct for DRY CASE ONLY
+            !      turb_vflux_scalars(nVertLevels+1) = turb_vflux_scalars(nVertLevels)
+            !    else
+            !      turb_vflux_scalars(1) = turb_vflux_scalars(2)
+            !      turb_vflux_scalars(nVertLevels+1) = turb_vflux_scalars(nVertLevels)
+            !    end if
+
+
+                do k=1,nVertLevels
+                  do iScalar=1,num_scalars
+                    tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) &
+                         - rdzw(k)*(turb_vflux_scalars(iScalar,k+1)-turb_vflux_scalars(iScalar,k))
+                  end do
+                end do
+
+             end do
+
+             end if ! mix scalars
+             
+         end if
+
+        if(debug_dissipation)  call mpas_log_write(' exiting scalar_dissipation_3d ')
+
+      end subroutine scalar_dissipation_3d_les
+
 end module mpas_atm_dissipation_models
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index a3dd869be5..f79485b5be 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5151,6 +5151,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND) :: config_len_disp
       real (kind=RKIND) :: config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2
       logical, parameter :: test_dissipation_3d=.true.
+      logical, parameter :: mix_scalars = .true.
 
       integer, intent(in) :: rk_step
       real (kind=RKIND), intent(in) :: dt
@@ -5821,19 +5822,36 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
 !          if(test_dissipation_3d) then
 
-            call theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                       nCells, nEdges,                                           &
-                                       nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                       invAreaCell, invDcEdge, dvEdge,                           &
-                                       meshScalingDel2, meshScalingDel4,                         &
-                                       config_mix_full, t_init, zgrid,                           &
-                                       rdzw, rdzu, fzm, fzp,                                              &
-                                       v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
-                                       delsq_theta,                                              &
-                                       theta_m, rho_edge, rho_zz, zz,                            &
-                                       eddy_visc_horz, eddy_visc_vert,                           &
-                                       config_les_model,                                         &
-                                       tend_theta_euler                                         )
+            !call theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+            !                           nCells, nEdges,                                           &
+            !                           nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+            !                           invAreaCell, invDcEdge, dvEdge,                           &
+            !                           meshScalingDel2, meshScalingDel4,                         &
+            !                           config_mix_full, t_init, zgrid,                           &
+            !                           rdzw, rdzu, fzm, fzp,                                     &
+            !                           v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+            !                           delsq_theta,                                              &
+            !                           theta_m, rho_edge, rho_zz, zz,                            &
+            !                           eddy_visc_horz, eddy_visc_vert,                           &
+            !                           config_les_model,                                         &
+            !                           tend_theta_euler                                         )
+
+            call scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                            nCells, nEdges,                                           &
+                                            nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                            invAreaCell, invDcEdge, dvEdge,                           &
+                                            meshScalingDel2, meshScalingDel4,                         &
+                                            config_mix_full, t_init, zgrid,                           &
+                                            rdzw, rdzu, fzm, fzp,                                     &
+                                            v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+                                            delsq_theta,                                              &
+                                            theta_m, rho_edge, rho_zz, zz,                            &
+                                            eddy_visc_horz, eddy_visc_vert,                           &
+                                            bn2, config_len_disp, scalars, tend_scalars,              &
+                                            index_tke, index_qv, num_scalars, mix_scalars,            &
+                                            config_les_model,                                         &
+                                            tend_theta_euler                                         )
+
 
 !          else  ! this is the original MPAS dissipation code
 !

From bf39525c523e673133b26abea9c0d19a728ae0fc Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Fri, 20 Mar 2020 10:59:32 -0600
Subject: [PATCH 036/146] Clean up and changes to run the prognostic tke les
 model.  initial tests of the prognostic scheme with a convective boundary
 layer appear to be OK. More testing to follow.

---
 .../dynamics/mpas_atm_dissipation_models.F    | 153 +++++++++---------
 .../dynamics/mpas_atm_time_integration.F      |  21 ++-
 2 files changed, 90 insertions(+), 84 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 510b213184..9b1d122866 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -12,6 +12,7 @@ module mpas_atm_dissipation_models
   use mpas_atmphys_constants
   use mpas_constants
   use mpas_log
+  use mpas_derived_types, only : MPAS_LOG_CRIT
 
   logical, parameter :: debug_dissipation = .true.
   logical, parameter :: les_test = .true.
@@ -89,17 +90,20 @@ end subroutine smagorinsky_2d
 
 !---------------------------------------
 
-      subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,                &
-                                 w, c_s, bv_freq2, zgrid, config_len_disp,                          &
-                                 deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
-                                 deformation_coef_c, deformation_coef_s,                            &
-                                 invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
-                                 scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2,         &
-                                 cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
-                                 nCells, nEdges, nVertLevels, maxEdges, num_scalars                )
+      subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,                  &
+                             u, v, uCell, vCell,                                                &
+                             w, c_s, bv_freq2, zgrid, config_len_disp,                          &
+                             deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
+                             deformation_coef_c, deformation_coef_s,                            &
+                             invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
+                             scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2,         &
+                             cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
+                             nCells, nEdges, nVertLevels, maxEdges, num_scalars                )
 
       implicit none
       
+      character (len=StrKIND), intent(in) :: config_les_model
+
       integer, intent(in) :: cellStart, cellEnd, nCells, nEdges, nVertLevels, maxEdges, index_tke, num_scalars
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v
@@ -107,7 +111,7 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: vCell
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: bv_freq2
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
-      real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
+      real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(inout) :: scalars
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1) :: tend_scalars
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
@@ -141,10 +145,16 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
       logical, parameter :: test_tke=.true.
       real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
 
-      if(debug_dissipation)  call mpas_log_write(' begin smagorinsky_3d ')
+      if(debug_dissipation)  call mpas_log_write(' begin les_models ')
+      if(debug_dissipation)  call mpas_log_write(' les scheme is '//trim(config_les_model))
 
       pr_inv = 1./prandtl
 
+      ! set up coefficients for 4th-order horizontal background filter
+
+      h_mom_eddy_visc4   = config_visc4_2dsmag * config_len_disp**3
+      h_theta_eddy_visc4 = h_mom_eddy_visc4
+
       do iCell = cellStart,cellEnd
 
         dudx(1:nVertLevels) = 0.0
@@ -205,26 +215,28 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
         dvdz(k+1) = (vCell(k+1,iCell)-vCell(k,iCell))*rdz
 
         do k=1, nVertLevels
-          ! here is the 3D Smagorinsky formulation, 
-          ! followed by imposition of an upper bound on the eddy viscosity
           d_11(k) = 2.*dudx(k)
           d_22(k) = 2.*dvdy(k)
           d_33(k) = 2.*dwdz(k)
           d_12(k) = dudy(k) + dvdx(k)
           d_13(k) = dwdx(k) + dudz(k)
           d_23(k) = dwdy(k) + dvdz(k)
+        end do
 
-          def2 = 0.5*(d_11(k)**2 + d_22(k)**2 + d_33(k)**2) + d_12(k)**2 + d_13(k)**2 + d_23(k)**2
-
-          eddy_visc_horz(k,iCell) = (c_s * config_len_disp)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
-          eddy_visc_horz(k,iCell) = min(eddy_visc_horz(k,iCell),(0.01*config_len_disp**2) * invDt)
-          eddy_visc_vert(k,iCell) = eddy_visc_horz(k,iCell)
+        if (config_les_model == "3d_smagorinsky") then
 
-        end do
+          do k=1, nVertLevels
+            def2 = 0.5*(d_11(k)**2 + d_22(k)**2 + d_33(k)**2) + d_12(k)**2 + d_13(k)**2 + d_23(k)**2
+            eddy_visc_horz(k,iCell) = (c_s * config_len_disp)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
+            eddy_visc_horz(k,iCell) = min(eddy_visc_horz(k,iCell),(0.01*config_len_disp**2) * invDt)
+            eddy_visc_vert(k,iCell) = eddy_visc_horz(k,iCell)
+          end do
 
-!***************************************************
+        else if (config_les_model == "prognostic_1.5_order") then
 
-        if (test_tke) then
+          do k=1,nVertLevels  ! bound the tke here, currently hardwired
+            scalars(index_tke,k,iCell) = max(0.,min(100.,scalars(index_tke,k,iCell)))
+          end do
 
           do k=1,nVertLevels
 
@@ -232,7 +244,7 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
             delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
             bv = max( sqrt(abs(bv_freq2(k,iCell))), epsilon_bv )
             tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/bv
-            diss_length = min(delta_s,tke_length)
+            diss_length = min(delta_s,max(tke_length,0.01*delta_s))
             if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
 
             ! non-isotropic mixing                                                                            
@@ -251,6 +263,10 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
             eddy_visc_h = c_k*l_horizontal*sqrt(scalars(index_tke,k,iCell))
             eddy_visc_v = c_k*l_vertical*sqrt(scalars(index_tke,k,iCell))
 
+            ! testing
+            !eddy_visc_horz(k,iCell) = eddy_visc_h
+            !eddy_visc_vert(k,iCell) = eddy_visc_v
+
             ! terms for the prognostic tke integration
 
             shear_production = eddy_visc_h*(d_11(k)**2 + d_22(k)**2 + d_12(k)**2) &
@@ -271,37 +287,30 @@ subroutine smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, uCell, vCell,
             prandtl_horizontal_inv = 3.
             prandtl_vertical_inv = 1.0+(2.0*l_vertical/delta_z) ! going to need to store this off somewhere   
 
-            ! eddy viscocities set here if we are running the 1.5 order prognostic tke scheme
-            ! eddy_visc_horz(k,iCell) = c_k*l_horizontal*sqrt(scalars(index_tke,k,iCell))
-            ! eddy_visc_vert(k,iCell) = c_k*l_vertical*sqrt(scalars(index_tke,k,iCell))
-
             ! RHS term for the subgrid ke.                                                                    
 
             tend_scalars(index_tke,k,iCell) = rho_zz(k,iCell)*( shear_production + buoyancy + dissipation )
 
           end do
 
-        end if ! end of test_tke
+        else
 
-!***************************************************
+          call mpas_log_write(' in les_models, no les scheme for '//trim(config_les_model), messageType=MPAS_LOG_CRIT)
+          
+        end if ! end of config_les_model test
 
       end do ! loop over all owned cells (columns)
 
-      ! set up coefficients for 4th-order horizontal background filter
-
-      h_mom_eddy_visc4   = config_visc4_2dsmag * config_len_disp**3
-      h_theta_eddy_visc4 = h_mom_eddy_visc4
-
-      if(debug_dissipation)  call mpas_log_write(' exiting smagorinsky_3d ')
+      if(debug_dissipation)  call mpas_log_write(' les_models ')
 
-      end subroutine smagorinsky_3d
+      end subroutine les_models
 
-!---------------------------------------                                                                                                              
+!---------------------------------------
 
       subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
                                cellStart, cellEnd, nCells)
 
-      use mpas_atm_dimensions  !  pull nVertLevels and num_scalars from here                                                                             
+      use mpas_atm_dimensions  !  pull nVertLevels and num_scalars from here
 
       integer, intent(in) :: cellStart, cellEnd, nCells
       integer, intent(in) :: index_qv, index_qc
@@ -378,7 +387,7 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
 
       end subroutine calculate_n2
 
-!---------------------------------------                                                                                                              
+!---------------------------------------
 
       subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
                                 cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
@@ -391,7 +400,7 @@ subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vert
                                 delsq_u, delsq_vorticity, delsq_divergence,                                  &
                                 u, divergence, vorticity, rho_edge, u_init, v_init, tend_u_euler            )
 
-      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here
 
       implicit none
 
@@ -440,17 +449,17 @@ subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vert
 
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(out) :: tend_u_euler
 
-      ! local variables                                                                                                                               
+      ! local variables
 
       integer :: iEdge, cell1, cell2, vertex1, vertex2, iVertex, iCell, i, k
       real (kind=RKIND) :: r_dc, r_dv, u_diffusion, kdiffu, r, edge_sign, u_mix_scale
       real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
       real (kind=RKIND), dimension(nVertLevels) :: u_mix
 
-!$OMP BARRIER                                                                                                                                         
+!$OMP BARRIER 
 
-         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).                                                                          
-         ! First, storage to hold the result from the first del^2 computation.                                                                        
+         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).
+         ! First, storage to hold the result from the first del^2 computation.
 
          delsq_u(1:nVertLevels,edgeStart:edgeEnd) = 0.0
 
@@ -462,11 +471,11 @@ subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vert
             r_dc = invDcEdge(iEdge)
             r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
 
-!DIR$ IVDEP                                                                                                                                           
+!DIR$ IVDEP  
             do k=1,nVertLevels
 
-               ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity                                                         
-               !                    only valid for h_mom_eddy_visc4 == constant                                                                       
+               ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
+               ! only valid for h_mom_eddy_visc4 == constant
               u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) * r_dc  &
                               -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv
 
@@ -474,17 +483,17 @@ subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vert
 
                kdiffu = 0.5*(kdiff(k,cell1)+kdiff(k,cell2))
 
-               ! include 2nd-orer diffusion here                                                                                                      
+               ! include 2nd-orer diffusion here
+
                tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) &
                                        + rho_edge(k,iEdge)* kdiffu * u_diffusion * meshScalingDel2(iEdge)
 
             end do
          end do
 
-         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active                                                                               
-
-!$OMP BARRIER                                                                                                                                         
+         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
+!$OMP BARRIER
             do iVertex=vertexStart,vertexEnd
                delsq_vorticity(1:nVertLevels,iVertex) = 0.0
                do i=1,vertexDegree
@@ -508,8 +517,7 @@ subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vert
                end do
             end do
 
-!$OMP BARRIER                                                                                                                                         
-
+!$OMP BARRIER
             do iEdge=edgeSolveStart,edgeSolveEnd
                cell1 = cellsOnEdge(1,iEdge)
                cell2 = cellsOnEdge(2,iEdge)
@@ -520,16 +528,16 @@ subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vert
                r_dc = u_mix_scale * config_del4u_div_factor * invDcEdge(iEdge)
                r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
 
-!DIR$ IVDEP                                                                                                                                           
+!DIR$ IVDEP
                do k=1,nVertLevels
 
-                  ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity                                                      
-                  !                    only valid for h_mom_eddy_visc4 == constant                                                                    
-                  !                                                                                                                                   
-                  ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor                                           
-                  !    relative to the rotational part.  The stability constraint on the divergence component is much less                            
-                  !    stringent than the rotational part, and this flexibility may be useful.                                                        
-                  !                                                                                                                                   
+                  ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
+                  !                    only valid for h_mom_eddy_visc4 == constant
+                  !
+                  ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor
+                  !    relative to the rotational part.  The stability constraint on the divergence component is much less
+                  !    stringent than the rotational part, and this flexibility may be useful.
+                  !
                   u_diffusion =  rho_edge(k,iEdge) *  ( ( delsq_divergence(k,cell2)  - delsq_divergence(k,cell1) ) * r_dc  &
                                                        -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) * r_dv )
                   tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - u_diffusion
@@ -537,14 +545,13 @@ subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vert
                end do
             end do
 
-         end if ! 4th order mixing is active                                                                                                          
-
-      !                                                                                                                                               
-      !  vertical mixing for u - 2nd order filter in physical (z) space                                                                               
-      !                                                                                                                                               
+         end if ! 4th order mixing is active
+      !
+      !  vertical mixing for u - 2nd order filter in physical (z) space              
+      !
          if ( v_mom_eddy_visc2 > 0.0 ) then
 
-            if (config_mix_full) then  ! mix full state                                                                                               
+            if (config_mix_full) then  ! mix full state 
 
                do iEdge=edgeSolveStart,edgeSolveEnd
 
@@ -568,9 +575,9 @@ subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vert
                   end do
                end do
 
-            else  ! idealized cases where we mix on the perturbation from the initial 1-D state                                                       
+            else  ! idealized cases where we mix on the perturbation from the initial 1-D state
 
-                      do iEdge=edgeSolveStart,edgeSolveEnd
+               do iEdge=edgeSolveStart,edgeSolveEnd
 
                   cell1 = cellsOnEdge(1,iEdge)
                   cell2 = cellsOnEdge(2,iEdge)
@@ -597,9 +604,9 @@ subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vert
                   end do
                end do
 
-            end if  ! mix perturbation state                                                                                                          
+            end if  ! mix perturbation state
 
-         end if  ! vertical mixing of horizontal momentum                                                                                             
+         end if  ! vertical mixing of horizontal momentum
 
       end subroutine u_dissipation
 
@@ -849,9 +856,9 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
             end if  ! mix perturbation state                                                                                             
 
-         end if  ! vertical mixing of horizontal momentum for fixe viscosity
+         end if  ! vertical mixing of horizontal momentum for les formulation
 
-        if ( config_les_model == "3d_smagorinsky") then
+        if ( config_les_model /= "none") then
 
                do iEdge=edgeSolveStart,edgeSolveEnd
 
@@ -1159,7 +1166,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
       end if
 
-         if ( config_les_model == "3d_smagorinsky") then
+         if ( config_les_model /= "none") then
 
              do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
                 ! compute turbulent fluxes
@@ -1505,7 +1512,7 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
 
          end if
 
-         if ( config_les_model == "3d_smagorinsky") then
+         if ( config_les_model /= "none") then
 
              do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
                 ! compute turbulent fluxes
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index f79485b5be..961ceaefa3 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5286,21 +5286,22 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
             end if
 
-         else if (config_les_model == "3d_smagorinsky") then
+         else if (config_les_model /= "none") then
 
                  ! call mpas_log_write(' BV call, index qv, qc, tke $i $i $i ', intArgs=(/index_qv, index_qc, index_tke/))
 
                  call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
                                     cellStart, cellEnd, nCells)
 
-                 call smagorinsky_3d( eddy_visc_horz, eddy_visc_vert, u, v, ur_cell, vr_cell,            &
-                                      w, c_s, bn2, zgrid, config_len_disp,                               &
-                                      deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
-                                      deformation_coef_c, deformation_coef_s,                            &
-                                      invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
-                                      scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2,         &
-                                      cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
-                                      nCells, nEdges, nVertLevels, maxEdges, num_scalars                )
+                 call les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,                  &
+                                  u, v, ur_cell, vr_cell,                                            &
+                                  w, c_s, bn2, zgrid, config_len_disp,                               &
+                                  deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
+                                  deformation_coef_c, deformation_coef_s,                            &
+                                  invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
+                                  scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2,         &
+                                  cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
+                                  nCells, nEdges, nVertLevels, maxEdges, num_scalars                )
 
          end if
 
@@ -5330,8 +5331,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       end if
 
       ! tendency for density.
-      ! accumulate total water here for later use in w tendency calculation.
-
       ! accumulate horizontal mass-flux
 
       !$acc parallel default(present)

From 203fdef7c346af77fdcc0702e91e7bdcb10dddc1 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Tue, 24 Mar 2020 09:25:43 -0600
Subject: [PATCH 037/146] bug fixes and the addition of the Prandtl number
 formulation to the scalar mixing routines for the prognostic tke scheme. 
 more testing to follow.

---
 .../dynamics/mpas_atm_dissipation_models.F    | 61 +++++++++++++------
 1 file changed, 44 insertions(+), 17 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 9b1d122866..d99eb221d1 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -14,10 +14,13 @@ module mpas_atm_dissipation_models
   use mpas_log
   use mpas_derived_types, only : MPAS_LOG_CRIT
 
-  logical, parameter :: debug_dissipation = .true.
+  logical, parameter :: debug_dissipation = .false.
   logical, parameter :: les_test = .true.
   real (kind=RKIND), parameter :: tke_heat_flux = 0.24
   real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013
+  real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
+  real (kind=RKIND), parameter :: c_k = 0.25
+
 
     contains
 
@@ -138,12 +141,12 @@ subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,
       real (kind=RKIND), dimension(nVertLevels) :: dudz, dvdz, dwdz
       real (kind=RKIND) :: rdz, def2, pr_inv, wk
       real (kind=RKIND) :: shear_production, buoyancy, dissipation, delta_z, delta_s, bv, tke_length, diss_length
-      real (kind=RKIND) :: l_horizontal, l_vertical, c_k, c_dissipation
+      real (kind=RKIND) :: l_horizontal, l_vertical, c_dissipation
       real (kind=RKIND) :: prandtl_horizontal_inv, prandtl_vertical_inv
       real (kind=RKIND) :: eddy_visc_h, eddy_visc_v
 
       logical, parameter :: test_tke=.true.
-      real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
+      ! real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
 
       if(debug_dissipation)  call mpas_log_write(' begin les_models ')
       if(debug_dissipation)  call mpas_log_write(' les scheme is '//trim(config_les_model))
@@ -243,7 +246,9 @@ subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,
             delta_z = zgrid(k+1,iCell)-zgrid(k,iCell)
             delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
             bv = max( sqrt(abs(bv_freq2(k,iCell))), epsilon_bv )
-            tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/bv
+            tke_length = delta_s
+            if(bv_freq2(k,iCell) .gt. 1.e-06) &
+                tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/bv
             diss_length = min(delta_s,max(tke_length,0.01*delta_s))
             if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
 
@@ -264,8 +269,8 @@ subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,
             eddy_visc_v = c_k*l_vertical*sqrt(scalars(index_tke,k,iCell))
 
             ! testing
-            !eddy_visc_horz(k,iCell) = eddy_visc_h
-            !eddy_visc_vert(k,iCell) = eddy_visc_v
+            eddy_visc_horz(k,iCell) = eddy_visc_h
+            eddy_visc_vert(k,iCell) = eddy_visc_v
 
             ! terms for the prognostic tke integration
 
@@ -276,9 +281,8 @@ subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,
 
             ! dissipation                                                                                     
 
-            c_k = 0.25
             c_dissipation = 1.9*c_k + max( 0.0, 0.93 - 1.9*c_k )*diss_length/delta_s
-            if( (k.eq. 1) .or. (k.eq.nVertLevels) ) c_dissipation = 3.9
+            !  if( (k.eq. 1) .or. (k.eq.nVertLevels) ) c_dissipation = 3.9
 
             dissipation = -c_dissipation*(scalars(index_tke,k,iCell)**(1.5))/diss_length
 
@@ -1626,9 +1630,10 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       integer :: cell1, cell2, iEdge, iCell, i, k, iScalar
       real (kind=RKIND) :: r_areaCell, edge_sign, theta_turb_flux, pr_scale
       real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
-      real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
+      real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux, prandtl_inverse
       real (kind=RKIND), dimension(num_scalars,nVertLevels+1) :: turb_vflux_scalars
       real (kind=RKIND) :: rho_k_at_w, zz_at_w
+      real (kind=RKIND) :: delta_z, delta_s, tke_length, bv_frequency2
 
       logical, parameter :: vmix_scalars = .true.
       logical, parameter :: hmix_scalars = .true.
@@ -1795,13 +1800,38 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
              do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
                 ! compute turbulent fluxes
+
+
                 turb_vflux(nVertlevels+1) = 0.  !  no turbulent flux out of the domain
                 turb_vflux(1) = 0.  !  lower bc flux handled where ???
+
+                if ( config_les_model == "3d_smagorinsky") then
+                  do k=2,nVertLevels
+                    prandtl_inverse(k) = prandtl_inv
+                  end do
+                else   ! prognostic_1.5_order
+                  do k=2,nVertLevels
+                    delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell))
+                    delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
+                    bv_frequency2 = 0.5*(bv_freq2(k,iCell)+bv_freq2(k-1,iCell))
+                    tke_length = delta_s
+                    if(bv_frequency2 .gt. 1.e-06) &
+                        tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/sqrt(bv_frequency2)
+                    prandtl_inverse(k) = 1. + 2.*tke_length/delta_z
+                  end do
+                end if
+
                 do k=2,nVertLevels
+
+                  ! delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell))
+                  ! delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
+                  ! bv_frequency2 = 0.5*(bv_freq2(k)+bv_freq(k-1))
+                  ! bv = max( sqrt(abs(bv_frequency2)), epsilon_bv )
+
                   rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
                                +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
                   zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
-                  turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
+                  turb_vflux(k) = - prandtl_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
                 end do
 
                 ! test boundary conditions for supercell and les test cases
@@ -1820,11 +1850,8 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                               - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
                 end do
 
-             end do
-
-             if (mix_scalars .and. vmix_scalars) then
+                if (mix_scalars .and. vmix_scalars) then
 
-             do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
                 ! compute turbulent fluxes
                 turb_vflux_scalars(:,nVertlevels+1) = 0.  !  no turbulent flux out of the domain
                 turb_vflux_scalars(:,1) = 0.  !  lower bc flux handled where ???
@@ -1833,7 +1860,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
                   zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
                   do iScalar=1,num_scalars
-                     turb_vflux_scalars(iScalar,k) = - rho_k_at_w*zz_at_w*rdzu(k)*  &
+                     turb_vflux_scalars(iScalar,k) = - prandtl_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*  &
                                                       (scalars(iScalar,k,iCell)-scalars(iScalar,k-1,iCell))
                   end do
                 end do
@@ -1858,9 +1885,9 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   end do
                 end do
 
-             end do
-
              end if ! mix scalars
+
+             end do  ! loop over cells (columns)
              
          end if
 

From 1f442aecea508b0d492f34dc0651fa1272df0039 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Thu, 2 Apr 2020 11:38:42 -0600
Subject: [PATCH 038/146] changed Prandtl number to 1/3 for LES simulations in
 the MPAS_constants module.

---
 src/framework/mpas_constants.F | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/framework/mpas_constants.F b/src/framework/mpas_constants.F
index 2c8168510a..5822f9c0e6 100644
--- a/src/framework/mpas_constants.F
+++ b/src/framework/mpas_constants.F
@@ -53,7 +53,7 @@ module mpas_constants
    real (kind=RKIND), parameter :: cvpm    = -cv / cp                 !
 #endif
    real (kind=RKIND), parameter :: p0 = 1.0e5_RKIND                   !< Constant: 100000 Pa
-   real (kind=RKIND), parameter :: prandtl = 1.0_RKIND                !< Constant: Prandtl number
+   real (kind=RKIND), parameter :: prandtl = 1.0_RKIND/3.0_RKIND      !< Constant: Prandtl number
 
 
    contains

From 57a88294e26633558ae83e0c40c41439d54e06d2 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Thu, 2 Apr 2020 11:40:56 -0600
Subject: [PATCH 039/146] implemented a perturbation coriolis term option for
 the LES cases.

---
 .../dynamics/mpas_atm_dissipation_models.F    | 38 ++++++++++---------
 .../dynamics/mpas_atm_time_integration.F      | 15 ++++++++
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index d99eb221d1..e0f802eae7 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -16,10 +16,12 @@ module mpas_atm_dissipation_models
 
   logical, parameter :: debug_dissipation = .false.
   logical, parameter :: les_test = .true.
-  real (kind=RKIND), parameter :: tke_heat_flux = 0.24
-  real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013
+  !! real (kind=RKIND), parameter :: tke_heat_flux = 0.03  ! shear case from Moeng et al., first hour
+  real (kind=RKIND), parameter :: tke_heat_flux = 0.0
+  !! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013 ! ocean roughness length
+  real (kind=RKIND), parameter :: tke_drag_coefficient = 0.00935
   real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
-  real (kind=RKIND), parameter :: c_k = 0.25
+  real (kind=RKIND), parameter :: c_k = 0.1
 
 
     contains
@@ -588,7 +590,7 @@ subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vert
 
                   do k=1,nVertLevels
                      u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) &
-                                           - v_init(k) * sin( angleEdge(iEdge) )
+                                           + v_init(k) * sin( angleEdge(iEdge) )
                   end do
 
                   do k=2,nVertLevels-1
@@ -838,7 +840,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
                   do k=1,nVertLevels
                      u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) &
-                                           - v_init(k) * sin( angleEdge(iEdge) )
+                                           + v_init(k) * sin( angleEdge(iEdge) )
                   end do
 
                   do k=2,nVertLevels-1
@@ -1568,6 +1570,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                             bv_freq2, config_len_disp, scalars, tend_scalars,         &
                                             index_tke, index_qv, num_scalars_dummy, mix_scalars,      &
                                             config_les_model,                                         &
+                                            uReconstructZonal, uReconstructMeridional,                &
                                             tend_theta_euler                                         )
 
 
@@ -1609,6 +1612,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzp
       real (kind=RKIND), dimension(nVertLevels+1, nCells+1), intent(in) :: zgrid
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: uReconstructZonal, uReconstructMeridional
 
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: t_init
 
@@ -1638,6 +1642,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       logical, parameter :: vmix_scalars = .true.
       logical, parameter :: hmix_scalars = .true.
       logical, parameter :: hmix4_scalars = .false.
+      real (kind=RKIND) :: moisture_flux, heat_flux, theta_m_flux, c_h, c_q
 
         if(debug_dissipation) then
             call mpas_log_write(' begin scalar_dissipation_3d ')
@@ -1837,7 +1842,15 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                 ! test boundary conditions for supercell and les test cases
 
                 if( les_test ) then
-                  turb_vflux(1) = tke_heat_flux*rho_zz(1,iCell)  ! this is correct for DRY CASE ONLY
+                  moisture_flux = 0.
+                  heat_flux = tke_heat_flux
+
+                  ! bulk formulation will go here.
+
+                  theta_m_flux = heat_flux*(1.0+(rv/rgas)*scalars(index_qv,1,iCell))  &
+                                    +(rv/rgas)*theta_m(1,iCell)*moisture_flux/rho_zz(k,iCell)
+                  ! turb_vflux(1) = tke_heat_flux*rho_zz(1,iCell)  ! this is correct for DRY CASE ONLY
+                  turb_vflux(1) = theta_m_flux*rho_zz(1,iCell)
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
                 else
                   turb_vflux(1) = turb_vflux(2)
@@ -1865,18 +1878,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   end do
                 end do
 
-                ! test boundary conditions for supercell and les test cases
-
-            ! need lower bc for qv here...
-
-            !    if( les_test ) then
-            !      turb_vflux_scalars(1) = tke_heat_flux*rho_zz(1,iCell)  ! this is correct for DRY CASE ONLY
-            !      turb_vflux_scalars(nVertLevels+1) = turb_vflux_scalars(nVertLevels)
-            !    else
-            !      turb_vflux_scalars(1) = turb_vflux_scalars(2)
-            !      turb_vflux_scalars(nVertLevels+1) = turb_vflux_scalars(nVertLevels)
-            !    end if
-
+                if( les_test ) turb_vflux_scalars(index_qv,1) = moisture_flux ! lower b.c. for qv
 
                 do k=1,nVertLevels
                   do iScalar=1,num_scalars
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 961ceaefa3..5d65db2752 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5198,6 +5198,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND) :: flux3, flux4
       real (kind=RKIND) :: q_im2, q_im1, q_i, q_ip1, ua, coef3
 
+      logical, parameter :: perturbation_coriolis = .true.
+      real (kind=RKIND), parameter :: coriolis_value = 1.e-04
+      real (kind=RKIND) :: reference_u
+
       flux4(q_im2, q_im1, q_i, q_ip1, ua) =                     &
                 ua*( 7.*(q_i + q_im1) - (q_ip1 + q_im2) )/12.0
 
@@ -5458,6 +5462,16 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
             end do
          end do
 
+         if (perturbation_coriolis) then  !  this is correct only for constant f
+            do j = 1,nEdgesOnEdge(iEdge)
+               eoe = edgesOnEdge(j,iEdge)
+               do k=1,nVertLevels
+                  reference_u = u_init(k) * cos(angleEdge(eoe)) -  v_init(k) * sin(angleEdge(eoe))
+                  q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * coriolis_value
+               end do
+            end do
+         end if
+
 !DIR$ IVDEP
          !$acc loop vector
          do k=1,nVertLevels
@@ -5849,6 +5863,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                             bn2, config_len_disp, scalars, tend_scalars,              &
                                             index_tke, index_qv, num_scalars, mix_scalars,            &
                                             config_les_model,                                         &
+                                            ur_cell, vr_cell,                                         &
                                             tend_theta_euler                                         )
 
 

From 7ddd21d10bc15436a7975e2d8ecba8920d6c4d7c Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Thu, 2 Apr 2020 11:42:04 -0600
Subject: [PATCH 040/146] introduced the sheared PBL test case for the LES
 implementation, including setting both u_init and v_init properly for the
 perturbation coriolis terms.

---
 .../mpas_init_atm_cases.F                     | 58 ++++++++++++++++---
 1 file changed, 50 insertions(+), 8 deletions(-)

diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index 0160139fb8..2371016310 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -6267,7 +6267,7 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
       real (kind=RKIND), pointer :: config_ztop
 
       real (kind=RKIND), dimension(:,:), pointer :: t_init, w, rw, v, rho, theta
-      real (kind=RKIND), dimension(:), pointer :: u_init, qv_init, angleEdge, fEdge, fVertex
+      real (kind=RKIND), dimension(:), pointer :: u_init, v_init, qv_init, angleEdge, fEdge, fVertex
       real (kind=RKIND) :: u_vel, v_vel, randx
 
       call mpas_pool_get_array(mesh, 'xCell', xCell)
@@ -6336,6 +6336,7 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
       call mpas_pool_get_array(mesh, 'dss', dss)
       call mpas_pool_get_array(mesh, 't_init', t_init)
       call mpas_pool_get_array(mesh, 'u_init', u_init)
+      call mpas_pool_get_array(mesh, 'v_init', v_init)
       call mpas_pool_get_array(mesh, 'qv_init', qv_init)
       call mpas_pool_get_array(mesh, 'angleEdge', angleEdge)
       call mpas_pool_get_array(mesh, 'fEdge', fEdge)
@@ -6510,7 +6511,10 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
                   u_vel = atm_get_sounding('u',ztemp)
                   v_vel = atm_get_sounding('v',ztemp)
                   u(k,i) = cos(angleEdge(i))*u_vel - sin(angleEdge(i))*v_vel
-                  if(i == 1 ) u_init(k) = u(k,i)
+                  if(i == 1 ) then
+                    u_init(k) = u_vel
+                    v_init(k) = v_vel
+                  end if
                end do
             end if
          end do
@@ -6749,25 +6753,63 @@ real (kind=RKIND) function atm_get_sounding( variable, height )
      atm_get_sounding = -999.
 
      if(variable == 'u') then 
-        atm_get_sounding = 0.
+        atm_get_sounding = 15.0
      else if (variable == 'v') then
         atm_get_sounding = 0.
      else if (variable == 'qv') then
-        atm_get_sounding = 0.
+
+        atm_get_sounding = 0. ! dry sounding
+        ! atm_get_sounding = 0.010
+        ! if(height .gt. 500.) atm_get_sounding = 0.004
+
      else if (variable == 'theta') then
 
-        if(height .le. 1000.) then
+        if(height .le. 500.) then
           atm_get_sounding = 300.
-        else if(height .le. 1150.) then
-          atm_get_sounding = 300. + (height-1000.)*8./150.
+        else if(height .le. 600.) then
+          atm_get_sounding = 300. + (height-500.)*3./100.
         else
-          atm_get_sounding = 308. + (height-1150.)*3./1000.
+          atm_get_sounding = 303. + (height-600.)*3./1000.
         end if
+        ! atm_get_sounding = atm_get_sounding - 10.0  ! for water case
 
       end if 
 
    end function atm_get_sounding
 
+   real (kind=RKIND) function atm_get_sounding_1( variable, height )
+
+     implicit none
+     real (kind=RKIND), intent(in) :: height
+     character(len=*), intent(in) :: variable
+
+     atm_get_sounding_1 = -999.
+
+     if(variable == 'u') then 
+        atm_get_sounding_1 = 10.0
+     else if (variable == 'v') then
+        atm_get_sounding_1 = 0.
+     else if (variable == 'qv') then
+
+        ! atm_get_sounding_1 = 0. ! dry sounding_1
+        atm_get_sounding_1 = 0.010
+        if(height .gt. 1000.) atm_get_sounding_1 = 0.004
+
+     else if (variable == 'theta') then
+
+        if(height .le. 1000.) then
+          atm_get_sounding_1 = 300.
+        else if(height .le. 1150.) then
+          atm_get_sounding_1 = 300. + (height-1000.)*8./150.
+        else
+          atm_get_sounding_1 = 308. + (height-1150.)*3./1000.
+        end if
+        atm_get_sounding_1 = atm_get_sounding_1 - 10.0  ! for water case
+
+      end if 
+
+   end function atm_get_sounding_1
+
 !-----------
 
    !-----------------------------------------------------------------------

From 6212e0e5a61b821a2437f970095c660f55c73888 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Thu, 23 Apr 2020 13:59:49 -0600
Subject: [PATCH 041/146] Implemented SAS case sounding in LES initialization.

---
 .../mpas_init_atm_cases.F                     | 92 ++++++++++++++-----
 1 file changed, 67 insertions(+), 25 deletions(-)

diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index 2371016310..e9dc369a29 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -6248,7 +6248,7 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
       real (kind=RKIND) ::  r, xnutr
       real (kind=RKIND) ::  ztemp, zd, zt, dz, str
 
-      real (kind=RKIND), dimension(nVertLevels ) :: qvb
+      real (kind=RKIND), dimension(nVertLevels ) :: qvb, qvp, zg
       real (kind=RKIND), dimension(nVertLevels ) :: t_init_1d
 
       real (kind=RKIND) :: d1, d2, d3, cof1, cof2
@@ -6580,6 +6580,8 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
 !            qvb(k) = scalars(index_qv,k,1)
 !           QVB = 0 PRODUCES DRY REFERENCE STATE
             qvb(k) = 0.
+            qvp(k) = scalars(index_qv,k,1)*1000.
+            zg(k) = .5*(zgrid(k,1)+zgrid(k+1,1))
 !*********************************************************************
       end do
 
@@ -6596,10 +6598,10 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
 
       end do !end of iteration loop
 
-      call mpas_log_write(' base state sounding ')
-      call mpas_log_write(' k,     pb,     rb,     tb,     rtb,     t,     rr,      p,    qvb')
+      call mpas_log_write(' sounding ')
+      call mpas_log_write(' k,     zg,     rb,     tb,     rtb,     t,     rr,      p,    qvp')
       do k=1,nVertLevels
-         call mpas_log_write('$i $r $r $r $r $r $r $r $r', intArgs=(/k/), realArgs=(/pb(k,1),rb(k,1),tb(k,1),rtb(k,1),t(k,1),rr(k,1),p(k,1),qvb(k)/))
+         call mpas_log_write('$i $r $r $r $r $r $r $r $r', intArgs=(/k/), realArgs=(/zg(k),rb(k,1),tb(k,1),rtb(k,1),t(k,1),rr(k,1),p(k,1),qvp(k)/))
       end do
 
 !
@@ -6751,32 +6753,39 @@ real (kind=RKIND) function atm_get_sounding( variable, height )
      character(len=*), intent(in) :: variable
 
      atm_get_sounding = -999.
+! SAS case sounding
 
      if(variable == 'u') then 
-        atm_get_sounding = 15.0
+        atm_get_sounding = 0.0
+
      else if (variable == 'v') then
         atm_get_sounding = 0.
-     else if (variable == 'qv') then
-
-        atm_get_sounding = 0. ! dry sounding
-        ! atm_get_sounding = 0.010
-        ! if(height .gt. 500.) atm_get_sounding = 0.004
 
      else if (variable == 'theta') then
 
-        if(height .le. 500.) then
-          atm_get_sounding = 300.
-        else if(height .le. 600.) then
-          atm_get_sounding = 300. + (height-500.)*3./100.
+        if(height .le. 352.5) then
+          atm_get_sounding = 296.6
+        else if(height .le. 442.5) then
+          atm_get_sounding = 296.6 + (height-352.5)*1.5/90.
+        else
+          atm_get_sounding = 298.1 + (height-442.5)*0.003
+        end if
+
+     else if (variable == 'qv') then
+
+        if(height .le. 352.5) then
+          atm_get_sounding = 11.8/1000.
+        else if(height .le. 442.5) then
+          atm_get_sounding = 11.8/1000. - (height-352.5)*4.0/90./1000.
         else
-          atm_get_sounding = 303. + (height-600.)*3./1000.
+          atm_get_sounding = max(7.8/1000. - (height-442.5)*0.004/1000.,0.0)
         end if
-        ! atm_get_sounding = atm_get_sounding - 10.0  ! for water case
 
       end if 
 
    end function atm_get_sounding
 
+
    real (kind=RKIND) function atm_get_sounding_1( variable, height )
 
      implicit none
@@ -6786,30 +6795,63 @@ real (kind=RKIND) function atm_get_sounding_1( variable, height )
      atm_get_sounding_1 = -999.
 
      if(variable == 'u') then 
-        atm_get_sounding_1 = 10.0
+        atm_get_sounding_1 = 15.0
      else if (variable == 'v') then
         atm_get_sounding_1 = 0.
      else if (variable == 'qv') then
 
-        ! atm_get_sounding_1 = 0. ! dry sounding_1
-        atm_get_sounding_1 = 0.010
-        if(height .gt. 1000.) atm_get_sounding_1 = 0.004
+        atm_get_sounding_1 = 0. ! dry sounding
+        ! atm_get_sounding_1 = 0.010
+        ! if(height .gt. 500.) atm_get_sounding_1 = 0.004
 
      else if (variable == 'theta') then
 
-        if(height .le. 1000.) then
+        if(height .le. 500.) then
           atm_get_sounding_1 = 300.
-        else if(height .le. 1150.) then
-          atm_get_sounding_1 = 300. + (height-1000.)*8./150.
+        else if(height .le. 600.) then
+          atm_get_sounding_1 = 300. + (height-500.)*3./100.
         else
-          atm_get_sounding_1 = 308. + (height-1150.)*3./1000.
+          atm_get_sounding_1 = 303. + (height-600.)*3./1000.
         end if
-        atm_get_sounding_1 = atm_get_sounding_1 - 10.0  ! for water case
+        ! atm_get_sounding_1 = atm_get_sounding_1 - 10.0  ! for water case
 
       end if 
 
    end function atm_get_sounding_1
 
+   real (kind=RKIND) function atm_get_sounding_2( variable, height )
+
+     implicit none
+     real (kind=RKIND), intent(in) :: height
+     character(len=*), intent(in) :: variable
+
+     atm_get_sounding_2 = -999.
+
+     if(variable == 'u') then 
+        atm_get_sounding_2 = 10.0
+     else if (variable == 'v') then
+        atm_get_sounding_2 = 0.
+     else if (variable == 'qv') then
+
+        ! atm_get_sounding_2 = 0. ! dry sounding_2
+        atm_get_sounding_2 = 0.010
+        if(height .gt. 1000.) atm_get_sounding_2 = 0.004
+
+     else if (variable == 'theta') then
+
+        if(height .le. 1000.) then
+          atm_get_sounding_2 = 300.
+        else if(height .le. 1150.) then
+          atm_get_sounding_2 = 300. + (height-1000.)*8./150.
+        else
+          atm_get_sounding_2 = 308. + (height-1150.)*3./1000.
+        end if
+        atm_get_sounding_2 = atm_get_sounding_2 - 10.0  ! for water case
+
+      end if 
+
+   end function atm_get_sounding_2
+
 !-----------
 
    !-----------------------------------------------------------------------

From 35910083c352652dff17021bbe45533d607376f5 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Thu, 4 Dec 2025 14:22:38 -0700
Subject: [PATCH 042/146] Added the SAS LES time-varying lower boundary
 moisture and heat fluxes.

---
 .../dynamics/mpas_atm_dissipation_models.F    | 53 +++++++++++++++++--
 .../dynamics/mpas_atm_time_integration.F      | 15 +++++-
 2 files changed, 63 insertions(+), 5 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index e0f802eae7..7fdf0e4794 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -15,7 +15,7 @@ module mpas_atm_dissipation_models
   use mpas_derived_types, only : MPAS_LOG_CRIT
 
   logical, parameter :: debug_dissipation = .false.
-  logical, parameter :: les_test = .true.
+  logical, parameter :: les_test = .false., les_sas_test = .true.
   !! real (kind=RKIND), parameter :: tke_heat_flux = 0.03  ! shear case from Moeng et al., first hour
   real (kind=RKIND), parameter :: tke_heat_flux = 0.0
   !! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013 ! ocean roughness length
@@ -1535,6 +1535,8 @@ subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEn
 
                 if( les_test ) then
                   turb_vflux(1) = tke_heat_flux*rho_zz(1,iCell)  ! this is correct for DRY CASE ONLY
+                  ! SAS case lower flux for theta_m - code goes here
+
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
                 else
                   turb_vflux(1) = turb_vflux(2)
@@ -1569,7 +1571,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                             eddy_visc_horz, eddy_visc_vert,                           &
                                             bv_freq2, config_len_disp, scalars, tend_scalars,         &
                                             index_tke, index_qv, num_scalars_dummy, mix_scalars,      &
-                                            config_les_model,                                         &
+                                            config_les_model, time_of_day_seconds,                    &
                                             uReconstructZonal, uReconstructMeridional,                &
                                             tend_theta_euler                                         )
 
@@ -1586,6 +1588,8 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       integer, intent(in) :: num_scalars_dummy
       integer, intent(in) :: index_tke, index_qv
 
+      real (kind=RKIND), intent(in) :: time_of_day_seconds
+
       logical, intent(in) :: config_mix_full, mix_scalars
 
       character (len=StrKIND) :: config_les_model
@@ -1643,6 +1647,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       logical, parameter :: hmix_scalars = .true.
       logical, parameter :: hmix4_scalars = .false.
       real (kind=RKIND) :: moisture_flux, heat_flux, theta_m_flux, c_h, c_q
+      real (kind=RKIND) :: qv_cell, theta_m_cell, theta_cell
 
         if(debug_dissipation) then
             call mpas_log_write(' begin scalar_dissipation_3d ')
@@ -1842,6 +1847,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                 ! test boundary conditions for supercell and les test cases
 
                 if( les_test ) then
+
                   moisture_flux = 0.
                   heat_flux = tke_heat_flux
 
@@ -1852,9 +1858,27 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   ! turb_vflux(1) = tke_heat_flux*rho_zz(1,iCell)  ! this is correct for DRY CASE ONLY
                   turb_vflux(1) = theta_m_flux*rho_zz(1,iCell)
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+
+                else if (les_sas_test) then
+
+                  ! if(iCell == 1)  call mpas_log_write(' les_sas_test for theta and qv surface fluxes ')
+                  qv_cell = scalars(index_qv,1,iCell)
+                  theta_m_cell = theta_m(1,iCell)
+                  theta_cell = theta_m_cell/(1.0+(rv/rgas)*qv_cell)
+
+                  call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
+                  if(iCell == 1) call mpas_log_write(' SAS t and qv fluxes, $r, $r ', realArgs=(/heat_flux, moisture_flux/))
+
+                  theta_m_flux = heat_flux*(1.0+(rv/rgas)*qv_cell)+(rv/rgas)*theta_cell*moisture_flux
+                  turb_vflux(1) = theta_m_flux*rho_zz(1,iCell)
+                  moisture_flux = moisture_flux*rho_zz(1,iCell) 
+                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+
                 else
+
                   turb_vflux(1) = turb_vflux(2)
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+
                 end if
 
 
@@ -1878,7 +1902,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   end do
                 end do
 
-                if( les_test ) turb_vflux_scalars(index_qv,1) = moisture_flux ! lower b.c. for qv
+                if( les_test .or. les_sas_test ) turb_vflux_scalars(index_qv,1) = moisture_flux ! lower b.c. for qv
 
                 do k=1,nVertLevels
                   do iScalar=1,num_scalars
@@ -1897,4 +1921,27 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
       end subroutine scalar_dissipation_3d_les
 
+!-----------
+
+      subroutine flux_les_sas(heat_flux, moisture_flux, time_of_day_seconds)
+
+      implicit none
+
+      real (kind=RKIND), intent(in) :: time_of_day_seconds
+      real (kind=RKIND), intent(out) :: heat_flux, moisture_flux
+
+      real (kind=RKIND), parameter:: t_start_t_flux = 3600.*6.0
+      real (kind=RKIND), parameter:: t_end_t_flux = 3600.*19.50
+      real (kind=RKIND), parameter:: t_start_q_flux = 3600.*7.0
+      real (kind=RKIND), parameter:: t_end_q_flux = 3600.*19.50
+      real (kind=RKIND) :: rel_time_t_flux, rel_time_q_flux
+
+      rel_time_t_flux = max(0.,(time_of_day_seconds - t_start_t_flux)/(t_end_t_flux - t_start_t_flux))
+      rel_time_q_flux = max(0.,(time_of_day_seconds - t_start_q_flux)/(t_end_q_flux - t_start_q_flux))
+
+      heat_flux = max(0., 0.1*sin(pii*rel_time_t_flux))
+      moisture_flux = max(0., 0.15*sin(pii*rel_time_q_flux))
+
+      end subroutine flux_les_sas
+
 end module mpas_atm_dissipation_models
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 5d65db2752..f8fee790ff 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -24,7 +24,8 @@ module atm_time_integration
    use mpas_vector_reconstruction
    ! Added only clause to keep xlf90 from getting confused from the overloaded abs intrinsic in mpas_timekeeping
    use mpas_derived_types, only : MPAS_Time_type, MPAS_TimeInterval_type, MPAS_NOW
-   use mpas_timekeeping, only: mpas_set_time, mpas_set_timeInterval, mpas_get_time, operator(+)
+   use mpas_timekeeping, only: MPAS_Time_type, MPAS_TimeInterval_type, mpas_get_clock_time, &
+                               mpas_set_time, mpas_set_timeInterval, mpas_get_time, operator(+), add_t_ti
    use mpas_timer
 
 #ifdef DO_PHYSICS
@@ -5202,6 +5203,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), parameter :: coriolis_value = 1.e-04
       real (kind=RKIND) :: reference_u
 
+      type (MPAS_Time_Type) :: currTime
+      integer :: H, M, S, S_n, S_d
+      integer :: ierr
+      real(kind=RKIND) :: time_of_day_seconds
+
       flux4(q_im2, q_im1, q_i, q_ip1, ua) =                     &
                 ua*( 7.*(q_i + q_im1) - (q_ip1 + q_im2) )/12.0
 
@@ -5297,6 +5303,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                  call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
                                     cellStart, cellEnd, nCells)
 
+                 currTime = mpas_get_clock_time(clock, MPAS_NOW, ierr)
+                 call mpas_get_time(curr_time=currTime, H=H, M=M, S=S, S_n=S_n, S_d=S_d)
+                 time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*dt
+                 ! call mpas_log_write(' les integration, timestep midpoint time of day in seconds, $r ', realArgs=(/time_of_day_seconds/))
+      
                  call les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,                  &
                                   u, v, ur_cell, vr_cell,                                            &
                                   w, c_s, bn2, zgrid, config_len_disp,                               &
@@ -5862,7 +5873,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                             eddy_visc_horz, eddy_visc_vert,                           &
                                             bn2, config_len_disp, scalars, tend_scalars,              &
                                             index_tke, index_qv, num_scalars, mix_scalars,            &
-                                            config_les_model,                                         &
+                                            config_les_model, time_of_day_seconds,                    &
                                             ur_cell, vr_cell,                                         &
                                             tend_theta_euler                                         )
 

From f77c123d939aa723e4fd266489cf6b7275cd680a Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Mon, 4 May 2020 12:25:35 -0600
Subject: [PATCH 043/146] changed the initialization for the random potential
 temperature preturbation, the Coriolis parameter and the horizontal wind
 field (u) to match the SAS cases configuration

---
 .../mpas_init_atm_cases.F                     | 43 +++++++++++++------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index e9dc369a29..12de5af3e9 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -6608,20 +6608,35 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
 !     potential temperature perturbation
 !
 
-      do i=1,nCells
-        do k = 1,4  ! same as in WRF
-          call random_number(randx)
-          thi(k,i) = thi(k,i) + 0.1*(randx-0.5)
-           t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i))
-        end do
+!      do i=1,nCells
+!        do k = 1,nz1  ! same as in WRF
+!          call random_number(randx)
+!          thi(k,i) = thi(k,i) + 0.1*(randx-0.5)
+!           t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i))
+!        end do
+!      end do
+
+      do k = 1,nz1  ! same as in WRF
+        if(zg(k) .le. 397.0) then  ! the SAS initial PBL height
+          do i=1,nCells
+            call random_number(randx)
+            thi(k,i) = thi(k,i) + 1.0*(randx-0.5)
+             t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i))
+          end do
+        end if
       end do
 !
 !  initial seed for tke
 !
-      do i=1,nCells
-        do k = 1,nz1  
-          scalars(index_tke,k,i) = 0.1
-        end do
+      scalars(index_tke,:,:) = 0.
+
+      do k = 1,nz1  
+        if( zg(k) .le. 255.) then
+          do i=1,nCells
+            !! scalars(index_tke,k,i) = 0.1
+               scalars(index_tke,k,i) = 0.4*((1.-(zg(k)/255.))**3)
+          end do
+        end if
       end do
 
       do itr=1,30
@@ -6708,11 +6723,13 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
       ! Generate rotated Coriolis field - same settings as in WRF
       !
       do iEdge=1,nEdges
-         fEdge(iEdge) = 1.e-04
+         ! fEdge(iEdge) = 1.e-04
+         fEdge(iEdge) = 7.2921e-05
       end do
 
       do iVtx=1,nVertices
-         fVertex(iVtx) = 1.e-04
+         ! fVertex(iVtx) = 1.e-04
+         fVertex(iVtx) = 7.2921e-05
       end do
 
       !
@@ -6756,7 +6773,7 @@ real (kind=RKIND) function atm_get_sounding( variable, height )
 ! SAS case sounding
 
      if(variable == 'u') then 
-        atm_get_sounding = 0.0
+        atm_get_sounding = 2.0  ! SAS value
 
      else if (variable == 'v') then
         atm_get_sounding = 0.

From b6f044a8a45816d55dbd9a1fd7ff9c6ae9828bf2 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Mon, 4 May 2020 12:27:55 -0600
Subject: [PATCH 044/146] Added a 3D diagnostic variable: the inverse Prandtl
 number used in the scalar mixing formulation in the prognostic LES scheme. 
 This allows for computing it only once and also allows it to be output for
 use in off-line diagnostic calculation of the sub-grid fluxes.

---
 src/core_atmosphere/Registry.xml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index f78ca1b2fb..9d1d5ac8a7 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -1971,6 +1971,9 @@
                 <var name="eddy_visc_horz" type="real" dimensions="nVertLevels nCells Time" units="m^2 s^{-1}"
                      description="horizontal eddy viscosity for les models"/>
 
+                <var name="prandtl_3d_inv" type="real" dimensions="nVertLevels nCells Time" units="unitless"
+                     description="Inverse prandtl number used for vertical mixing in prognostic LES formulation"/>
+
                 <var name="bn2" type="real" dimensions="nVertLevels nCells Time" units="s^{-2}"
                      description="Square of Brunt-Vaisala frequency"/>
 

From 2a9afcaac60b4626ce2bcc7fccf745198d1a9dde Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Mon, 4 May 2020 12:31:42 -0600
Subject: [PATCH 045/146] Threaded the 3D inverse Prandtl number through to the
 routines that need it in the prognostic LES models, and set its values
 appropriately.  Also switched the hardwired formulation to isotropic mixing.

---
 .../dynamics/mpas_atm_dissipation_models.F    | 48 +++++++++++--------
 .../dynamics/mpas_atm_time_integration.F      | 12 +++--
 2 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 7fdf0e4794..891cc6152a 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -99,7 +99,7 @@ subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,
                              u, v, uCell, vCell,                                                &
                              w, c_s, bv_freq2, zgrid, config_len_disp,                          &
                              deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
-                             deformation_coef_c, deformation_coef_s,                            &
+                             deformation_coef_c, deformation_coef_s, prandtl_3d_inv,            &
                              invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
                              scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2,         &
                              cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
@@ -115,6 +115,7 @@ subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: uCell
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: vCell
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: bv_freq2
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: prandtl_3d_inv
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(inout) :: scalars
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1) :: tend_scalars
@@ -249,22 +250,24 @@ subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,
             delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
             bv = max( sqrt(abs(bv_freq2(k,iCell))), epsilon_bv )
             tke_length = delta_s
+            ! isentropic mixing formulation
             if(bv_freq2(k,iCell) .gt. 1.e-06) &
                 tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/bv
+                tke_length = min(tke_length, delta_z)
             diss_length = min(delta_s,max(tke_length,0.01*delta_s))
             if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
 
             ! non-isotropic mixing                                                                            
 
-            l_horizontal = config_len_disp
-            l_vertical = min(delta_z,tke_length)
-            if(bv_freq2(k,iCell) <= 0) diss_length = delta_z
+            ! l_horizontal = config_len_disp
+            ! l_vertical = min(delta_z,tke_length)
+            ! if(bv_freq2(k,iCell) <= 0) diss_length = delta_z
 
             ! isotropic mixing
 
-            ! l_horizontal = min(delta_s,tke_length)
-            ! if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
-            ! l_vertical = l_horizontal
+            l_horizontal = min(delta_s,tke_length)
+            if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
+            l_vertical = l_horizontal
 
             ! eddy viscocities set here if we are running the 1.5 order prognostic tke scheme
             eddy_visc_h = c_k*l_horizontal*sqrt(scalars(index_tke,k,iCell))
@@ -291,7 +294,8 @@ subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,
             ! computing eddy viscosities *********                                                            
 
             prandtl_horizontal_inv = 3.
-            prandtl_vertical_inv = 1.0+(2.0*l_vertical/delta_z) ! going to need to store this off somewhere   
+            prandtl_3d_inv(k,iCell) = 1.0+(2.0*l_vertical/delta_z) 
+            
 
             ! RHS term for the subgrid ke.                                                                    
 
@@ -1566,6 +1570,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                             config_mix_full, t_init, zgrid,                           &
                                             rdzw, rdzu, fzm, fzp,                                     &
                                             v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+                                            prandtl_3d_inv,                                           &
                                             delsq_theta,                                              &
                                             theta_m, rho_edge, rho_zz, zz,                            &
                                             eddy_visc_horz, eddy_visc_vert,                           &
@@ -1621,6 +1626,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: t_init
 
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: bv_freq2
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: prandtl_3d_inv
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(inout) :: tend_scalars
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m
@@ -1819,16 +1825,20 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   do k=2,nVertLevels
                     prandtl_inverse(k) = prandtl_inv
                   end do
-                else   ! prognostic_1.5_order
-                  do k=2,nVertLevels
-                    delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell))
-                    delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
-                    bv_frequency2 = 0.5*(bv_freq2(k,iCell)+bv_freq2(k-1,iCell))
-                    tke_length = delta_s
-                    if(bv_frequency2 .gt. 1.e-06) &
-                        tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/sqrt(bv_frequency2)
-                    prandtl_inverse(k) = 1. + 2.*tke_length/delta_z
-                  end do
+                else   ! prognostic_1.5_order, isentropic mixing length
+                  ! do k=2,nVertLevels
+                  !   delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell))
+                  !   delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
+                  !   bv_frequency2 = 0.5*(bv_freq2(k,iCell)+bv_freq2(k-1,iCell))
+                  !   tke_length = delta_s
+                  !   if(bv_frequency2 .gt. 1.e-06) &
+                  !       tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/sqrt(bv_frequency2)
+                  !       tke_length = min(delta_z,tke_length)
+                  !   prandtl_inverse(k) = 1. + 2.*tke_length/delta_z
+                  ! end do
+
+                  prandtl_inverse(k) = 0.5*(prandtl_3d_inv(k,iCell)+prandtl_3d_inv(k-1,iCell))
+
                 end if
 
                 do k=2,nVertLevels
@@ -1940,7 +1950,7 @@ subroutine flux_les_sas(heat_flux, moisture_flux, time_of_day_seconds)
       rel_time_q_flux = max(0.,(time_of_day_seconds - t_start_q_flux)/(t_end_q_flux - t_start_q_flux))
 
       heat_flux = max(0., 0.1*sin(pii*rel_time_t_flux))
-      moisture_flux = max(0., 0.15*sin(pii*rel_time_q_flux))
+      moisture_flux = max(0., 0.15*sin(pii*rel_time_q_flux))/1000.
 
       end subroutine flux_les_sas
 
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index f8fee790ff..58ee52ffbd 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4816,6 +4816,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       real (kind=RKIND), dimension(:,:), pointer :: defc_a, defc_b
       real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2, deformation_coef_s2, deformation_coef_cs
       real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c, deformation_coef_s
+      real (kind=RKIND), dimension(:,:), pointer :: prandtl_3d_inv
+
 
       real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy
 
@@ -4892,6 +4894,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       call mpas_pool_get_array(diag, 'pressure_base', pressure_b)
       call mpas_pool_get_array(diag, 'h_divergence', h_divergence)
       call mpas_pool_get_array(diag, 'exner', exner)
+      call mpas_pool_get_array(diag, 'prandtl_3d_inv', prandtl_3d_inv)
+
 
       call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
 
@@ -4995,7 +4999,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
          rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
          deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
          tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_les_model, &
-         config_del4u_div_factor, &
+         prandtl_3d_inv, config_del4u_div_factor, &
          config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
          config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, &
          config_mpas_cam_coef, &
@@ -5023,7 +5027,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
       deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
       tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_les_model, &
-      config_del4u_div_factor, &
+      prandtl_3d_inv, config_del4u_div_factor, &
       config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
       config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, &
       config_mpas_cam_coef, &
@@ -5087,6 +5091,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_horz
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_vert
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: bn2
+      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: prandtl_3d_inv
       real (kind=RKIND), dimension(maxEdges,nCells+1) :: edgesOnCell_sign
       real (kind=RKIND), dimension(vertexDegree,nVertices+1) :: edgesOnVertex_sign
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: rw_save
@@ -5312,7 +5317,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                   u, v, ur_cell, vr_cell,                                            &
                                   w, c_s, bn2, zgrid, config_len_disp,                               &
                                   deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
-                                  deformation_coef_c, deformation_coef_s,                            &
+                                  deformation_coef_c, deformation_coef_s, prandtl_3d_inv,            &
                                   invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
                                   scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2,         &
                                   cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
@@ -5868,6 +5873,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                             config_mix_full, t_init, zgrid,                           &
                                             rdzw, rdzu, fzm, fzp,                                     &
                                             v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+                                            prandtl_3d_inv,                                           &
                                             delsq_theta,                                              &
                                             theta_m, rho_edge, rho_zz, zz,                            &
                                             eddy_visc_horz, eddy_visc_vert,                           &

From 7bef48fb31cdd8c178be5a328fd87cf2e3e6315e Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne6.cheyenne.ucar.edu>
Date: Wed, 13 May 2020 16:00:22 -0600
Subject: [PATCH 046/146] ustm passed to u_dissipation_3d, diag_physics pool
 accessed

---
 .../dynamics/mpas_atm_dissipation_models.F    |  3 +-
 .../dynamics/mpas_atm_time_integration.F      | 39 ++++++++++++-------
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 891cc6152a..b44e1ebbab 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -633,7 +633,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                                    eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
                                    fzm, fzp, config_les_model,                                                  &
                                    delsq_u, delsq_vorticity, delsq_divergence,                                  &
-                                   u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, tend_u_euler )
+                                   u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, tend_u_euler )
 
       use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
 
@@ -691,6 +691,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_divergence
 
       real (kind=RKIND), dimension(nVertLevels), intent(in) :: u_init, v_init
+      real (kind=RKIND), dimension(nCells+1), intent(in) :: ustm
 
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(out) :: tend_u_euler
 
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 58ee52ffbd..c32c7a5801 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -1149,6 +1149,14 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             end if  
 
             call mpas_timer_start('atm_compute_dyn_tend')
+            block => domain % blocklist
+            do while (associated(block))
+               call mpas_pool_get_subpool(block % structs, 'state', state)
+               call mpas_pool_get_subpool(block % structs, 'diag', diag)
+               call mpas_pool_get_subpool(block % structs, 'mesh', mesh)
+               call mpas_pool_get_subpool(block % structs, 'tend', tend)
+               call mpas_pool_get_subpool(block % structs, 'diag_physics', diag_physics)
+               call mpas_pool_get_subpool(block % structs, 'tend_physics', tend_physics)
    
             allocate(delsq_theta(nVertLevels,nCells+1))
             allocate(delsq_w(nVertLevels,nCells+1))
@@ -1172,15 +1180,16 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             !$acc end parallel
 
 !$OMP PARALLEL DO
-            do thread=1,nThreads
-               call atm_compute_dyn_tend( tend, tend_physics, state, diag, mesh, block % configs, nVertLevels, rk_step, dt, & 
-                                          cellThreadStart(thread), cellThreadEnd(thread), &
-                                          vertexThreadStart(thread), vertexThreadEnd(thread), &
-                                          edgeThreadStart(thread), edgeThreadEnd(thread), &
-                                          cellSolveThreadStart(thread), cellSolveThreadEnd(thread), &
-                                          vertexSolveThreadStart(thread), vertexSolveThreadEnd(thread), &
-                                          edgeSolveThreadStart(thread), edgeSolveThreadEnd(thread))
-            end do
+               do thread=1,nThreads
+                  call atm_compute_dyn_tend( tend, tend_physics, state, diag, mesh, diag_physics, &
+                                             block % configs, nVertLevels, rk_step, dt, & 
+                                             cellThreadStart(thread), cellThreadEnd(thread), &
+                                             vertexThreadStart(thread), vertexThreadEnd(thread), &
+                                             edgeThreadStart(thread), edgeThreadEnd(thread), &
+                                             cellSolveThreadStart(thread), cellSolveThreadEnd(thread), &
+                                             vertexSolveThreadStart(thread), vertexSolveThreadEnd(thread), &
+                                             edgeSolveThreadStart(thread), edgeSolveThreadEnd(thread))
+               end do
 !$OMP END PARALLEL DO
 
             deallocate(delsq_theta)
@@ -4736,7 +4745,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
    end subroutine atm_advance_scalars_mono_work
 
 
-   subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, nVertLevels, rk_step, dt, &
+   subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_physics, configs, nVertLevels, rk_step, dt, &
                                    cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
                                    cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
@@ -4762,6 +4771,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
       type (mpas_pool_type), intent(in) :: state
       type (mpas_pool_type), intent(in) :: diag
       type (mpas_pool_type), intent(in) :: mesh
+      type (mpas_pool_type), intent(in) :: diag_physics
       type (mpas_pool_type), intent(in) :: configs
       integer, intent(in) :: nVertLevels              ! for allocating stack variables
       integer, intent(in) :: rk_step
@@ -4899,6 +4909,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
 
       call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
 
+      call mpas_pool_get_array(diag_physics,'ustm'  ,ustm  )
+
       call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
       call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)
       call mpas_pool_get_array(mesh, 'cellsOnCell', cellsOnCell)
@@ -5005,7 +5017,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
          config_mpas_cam_coef, &
          config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, & 
          config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, &
-         rthdynten, &
+         tend_rtheta_adv, rthdynten, ustm, &
          cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
          cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
 
@@ -5033,7 +5045,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       config_mpas_cam_coef, &
       config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, &
       config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, &
-      rthdynten, &
+      tend_rtheta_adv, rthdynten, ustm, &
       cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
       cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
 
@@ -5091,6 +5103,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_horz
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_vert
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: bn2
+      real (kind=RKIND), dimension(nCells+1) :: ustm
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: prandtl_3d_inv
       real (kind=RKIND), dimension(maxEdges,nCells+1) :: edgesOnCell_sign
       real (kind=RKIND), dimension(vertexDegree,nVertices+1) :: edgesOnVertex_sign
@@ -5533,7 +5546,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                     eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
                                     fzm, fzp, config_les_model,                                                  &
                                     delsq_u, delsq_vorticity, delsq_divergence,                                  &
-                                    u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, tend_u_euler )
+                                    u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, tend_u_euler )
 
 !          else  ! this is the original MPAS dissipation code
 !

From f2ae5beb59154911fa6f98348b82ea7156878fa5 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne6.cheyenne.ucar.edu>
Date: Fri, 15 May 2020 14:08:27 -0600
Subject: [PATCH 047/146] ustm add pointer remove blanks in pool statement

---
 src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index c32c7a5801..96e74f7202 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4817,6 +4817,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
 
       real (kind=RKIND), dimension(:), pointer :: rdzu, rdzw, fzm, fzp, qv_init
       real (kind=RKIND), dimension(:,:), pointer :: t_init 
+      real (kind=RKIND), dimension(:), pointer:: ustm
 
       real (kind=RKIND), pointer :: cf1, cf2, cf3
 
@@ -4909,7 +4910,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
 
       call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
 
-      call mpas_pool_get_array(diag_physics,'ustm'  ,ustm  )
+      call mpas_pool_get_array(diag_physics,'ustm',ustm)
 
       call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
       call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)

From 1ab69a02c4c0a1773e636c8090abe78f52c935fb Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne4.cheyenne.ucar.edu>
Date: Wed, 3 Jun 2020 15:09:15 -0600
Subject: [PATCH 048/146] add les_model check to bypass pbl calls

---
 src/core_atmosphere/physics/mpas_atmphys_driver_pbl.F | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/core_atmosphere/physics/mpas_atmphys_driver_pbl.F b/src/core_atmosphere/physics/mpas_atmphys_driver_pbl.F
index 72a411aeba..43837ee27c 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_driver_pbl.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_driver_pbl.F
@@ -775,6 +775,7 @@ subroutine driver_pbl(itimestep,configs,mesh,sfc_input,diag_physics,tend_physics
                    bl_mynn_tkeadvect
 
  character(len=StrKIND),pointer:: pbl_scheme
+ character(len=StrKIND),pointer:: config_les_model
 
  integer,pointer:: bl_mynn_cloudpdf,    &
                    bl_mynn_mixlength,   &
@@ -813,6 +814,7 @@ subroutine driver_pbl(itimestep,configs,mesh,sfc_input,diag_physics,tend_physics
  call mpas_pool_get_config(configs,'config_do_DAcycling',config_do_DAcycling)
  call mpas_pool_get_config(configs,'config_do_restart'  ,config_do_restart  )
  call mpas_pool_get_config(configs,'config_pbl_scheme'  ,pbl_scheme         )
+ call mpas_pool_get_config(configs, 'config_les_model', config_les_model)
 
 !copy MPAS arrays to local arrays:
  call pbl_from_MPAS(configs,mesh,sfc_input,diag_physics,tend_physics,its,ite)
@@ -820,6 +822,7 @@ subroutine driver_pbl(itimestep,configs,mesh,sfc_input,diag_physics,tend_physics
  initflag = 1
  if(config_do_restart .or. itimestep > 1) initflag = 0
 
+ if(config_les_model == "none") then
  pbl_select: select case (trim(pbl_scheme))
 
     case("bl_ysu")
@@ -964,6 +967,8 @@ subroutine driver_pbl(itimestep,configs,mesh,sfc_input,diag_physics,tend_physics
     case default
 
  end select pbl_select
+ 
+ endif ! les skip pbl
 
 !copy local arrays to MPAS grid:
  call pbl_to_MPAS(configs,diag_physics,tend_physics,its,ite)

From b6f5cba0ddda660c118f99e333304262535880e0 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne4.cheyenne.ucar.edu>
Date: Wed, 17 Jun 2020 15:56:42 -0600
Subject: [PATCH 049/146] add time_of_day_seconds calculation (compiles)

---
 .../physics/mpas_atmphys_driver_sfclayer.F    | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F b/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
index afde4fa523..3435ecbe60 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
@@ -8,7 +8,11 @@
 !=================================================================================================================
  module mpas_atmphys_driver_sfclayer
  use mpas_kind_types
+ use mpas_derived_types
  use mpas_pool_routines
+! use mpas_timekeeping, only: MPAS_Time_type, MPAS_TimeInterval_type, mpas_get_clock_time, &
+!                               mpas_set_time, mpas_set_timeInterval, mpas_get_time, operator(+), add_t_ti
+ use mpas_timekeeping, only: MPAS_Time_type, mpas_get_clock_time, mpas_get_time
  use mpas_timer, only : mpas_timer_start, mpas_timer_stop
 
  use mpas_atmphys_constants
@@ -32,6 +36,7 @@ module mpas_atmphys_driver_sfclayer
  integer,parameter,private:: iz0tlnd  = 0        !=0,(Carlson-Boland).
  integer,parameter,private:: scm_force_flux = 0  !SCM surface forcing by surface fluxes.
                                                  !0=no 1=yes (WRF single column model option only).
+ type (MPAS_Clock_type), pointer, private :: clock
 
 !MPAS driver for parameterization of the surface layer.
 !Laura D. Fowler (send comments to laura@ucar.edu).
@@ -854,6 +859,7 @@ subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite
  logical,pointer:: config_do_restart,config_frac_seaice
  character(len=StrKIND),pointer:: sfclayer_scheme
  real(kind=RKIND),dimension(:),pointer:: areaCell
+ real(kind=RKIND),pointer:: config_dt
 
 !local variables:
  integer:: initflag
@@ -863,6 +869,11 @@ subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite
  character(len=StrKIND):: errmsg
  integer:: errflg
 
+ type (MPAS_Time_Type) :: currTime
+ integer :: H, M, S, S_n, S_d
+ integer :: ierr
+ real(kind=RKIND) :: time_of_day_seconds
+
 !-----------------------------------------------------------------------------------------------------------------
 !call mpas_log_write('')
 !call mpas_log_write('--- enter subroutine driver_sfclayer:')
@@ -874,6 +885,7 @@ subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite
  call mpas_pool_get_config(configs,'config_do_restart'     ,config_do_restart )
  call mpas_pool_get_config(configs,'config_frac_seaice'    ,config_frac_seaice)
  call mpas_pool_get_config(configs,'config_sfclayer_scheme',sfclayer_scheme   )
+ call mpas_pool_get_config(configs,'config_dt',config_dt)
 
  call mpas_pool_get_array(mesh,'areaCell',areaCell)
 
@@ -889,6 +901,12 @@ subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite
 
     case("sf_monin_obukhov")
        call mpas_timer_start('sf_monin_obukhov')
+
+       currTime = mpas_get_clock_time(clock, MPAS_NOW, ierr)
+       call mpas_get_time(curr_time=currTime, H=H, M=M, S=S, S_n=S_n, S_d=S_d)
+       time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*config_dt
+       call mpas_log_write(' sfclay, time of day in seconds, $r ', realArgs=(/time_of_day_seconds/))
+
        call sfclay( &
                    p3d      = pres_hyd_p , psfc     = psfc_p     , t3d      = t_p        , &
                    u3d      = u_p        , v3d      = v_p        , qv3d     = qv_p       , &
@@ -1087,6 +1105,30 @@ subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite
 
  end subroutine driver_sfclayer
 
+      subroutine flux_les_sas(heat_flux, moisture_flux, time_of_day_seconds)
+
+      implicit none
+
+      real (kind=RKIND), intent(in) :: time_of_day_seconds
+      real (kind=RKIND), intent(out) :: heat_flux, moisture_flux
+
+      real (kind=RKIND), parameter:: t_start_t_flux = 3600.*6.0
+      real (kind=RKIND), parameter:: t_end_t_flux = 3600.*19.50
+      real (kind=RKIND), parameter:: t_start_q_flux = 3600.*7.0
+      real (kind=RKIND), parameter:: t_end_q_flux = 3600.*19.50
+      real (kind=RKIND) :: rel_time_t_flux, rel_time_q_flux
+
+      rel_time_t_flux = max(0.,(time_of_day_seconds - t_start_t_flux)/(t_end_t_flux - t_start_t_flux))
+      rel_time_q_flux = max(0.,(time_of_day_seconds - t_start_q_flux)/(t_end_q_flux - t_start_q_flux))
+
+      heat_flux = max(0., 0.1*sin(pii*rel_time_t_flux))
+      moisture_flux = max(0., 0.15*sin(pii*rel_time_q_flux))/1000.
+
+      end subroutine flux_les_sas
+
+
+
+
 !=================================================================================================================
  end module mpas_atmphys_driver_sfclayer
 !=================================================================================================================

From d6d42f0de61cdef68009bdc4827f5ef4f346996c Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.cheyenne.ucar.edu>
Date: Tue, 23 Jun 2020 14:55:07 -0600
Subject: [PATCH 050/146] add flux_les_sas call in driver_sfclayer routine

---
 src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F b/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
index 3435ecbe60..bc0007eb7a 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
@@ -873,6 +873,7 @@ subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite
  integer :: H, M, S, S_n, S_d
  integer :: ierr
  real(kind=RKIND) :: time_of_day_seconds
+ real(kind=RKIND) :: heat_flux, moisture_flux
 
 !-----------------------------------------------------------------------------------------------------------------
 !call mpas_log_write('')
@@ -906,6 +907,7 @@ subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite
        call mpas_get_time(curr_time=currTime, H=H, M=M, S=S, S_n=S_n, S_d=S_d)
        time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*config_dt
        call mpas_log_write(' sfclay, time of day in seconds, $r ', realArgs=(/time_of_day_seconds/))
+       call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
 
        call sfclay( &
                    p3d      = pres_hyd_p , psfc     = psfc_p     , t3d      = t_p        , &

From a7a1dc58ee7988b367cf21a7e1aefe805e438446 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.cheyenne.ucar.edu>
Date: Tue, 23 Jun 2020 15:18:51 -0600
Subject: [PATCH 051/146] add fluxes to sfclay arguments and pass in

---
 src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F | 6 ++++++
 src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F | 5 ++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F b/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
index bc0007eb7a..bc379217b9 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
@@ -874,6 +874,7 @@ subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite
  integer :: ierr
  real(kind=RKIND) :: time_of_day_seconds
  real(kind=RKIND) :: heat_flux, moisture_flux
+ logical :: specified_flux
 
 !-----------------------------------------------------------------------------------------------------------------
 !call mpas_log_write('')
@@ -908,6 +909,7 @@ subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite
        time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*config_dt
        call mpas_log_write(' sfclay, time of day in seconds, $r ', realArgs=(/time_of_day_seconds/))
        call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
+       specified_flux = .true.
 
        call sfclay( &
                    p3d      = pres_hyd_p , psfc     = psfc_p     , t3d      = t_p        , &
@@ -932,6 +934,8 @@ subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite
                    ustm     = ustm_p     , ck       = ck_p       , cka      = cka_p      , &
                    cd       = cd_p       , cda      = cda_p      , isftcflx = isftcflx   , &
                    iz0tlnd  = iz0tlnd    , scm_force_flux = scm_force_flux               , &
+                   heat_flux = heat_flux , moisture_flux = moisture_flux                 , &
+                   specified_flux = specified_flux                                       , &
                    ids = ids , ide = ide , jds = jds , jde = jde , kds = kds , kde = kde , &
                    ims = ims , ime = ime , jms = jms , jme = jme , kms = kms , kme = kme , &
                    its = its , ite = ite , jts = jts , jte = jte , kts = kts , kte = kte   &
@@ -961,6 +965,8 @@ subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite
                       ustm     = ustm_sea   , ck       = ck_sea     , cka      = cka_sea    , &
                       cd       = cd_sea     , cda      = cda_sea    , isftcflx = isftcflx   , &
                       iz0tlnd  = iz0tlnd    , scm_force_flux = scm_force_flux               , &
+                      heat_flux = heat_flux , moisture_flux = moisture_flux                 , &
+                      specified_flux = specified_flux                                       , &
                       ids = ids , ide = ide , jds = jds , jde = jde , kds = kds , kde = kde , &
                       ims = ims , ime = ime , jms = jms , jme = jme , kms = kms , kme = kme , &
                       its = its , ite = ite , jts = jts , jte = jte , kts = kts , kte = kte   &
diff --git a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
index 2b3ba578f0..d9709a5547 100644
--- a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
+++ b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
@@ -25,7 +25,8 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
                      ims,ime, jms,jme, kms,kme,                    &
                      its,ite, jts,jte, kts,kte,                    &
                      ustm,ck,cka,cd,cda,                           &
-                     isftcflx,iz0tlnd,scm_force_flux)
+                     isftcflx,iz0tlnd,scm_force_flux,              &
+                     heat_flux, moisture_flux, specified_flux      )
 !-------------------------------------------------------------------
       IMPLICIT NONE
 !-------------------------------------------------------------------
@@ -184,6 +185,8 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
 
       INTEGER,  OPTIONAL,  INTENT(IN )   ::     ISFTCFLX, IZ0TLND
       INTEGER,  OPTIONAL,  INTENT(IN )   ::     SCM_FORCE_FLUX
+      REAL,     INTENT(IN   )            ::     heat_flux, moisture_flux
+      LOGICAL,  INTENT(IN   )            ::     specified_flux
 
       REAL,     DIMENSION( ims:ime, jms:jme )                    , &
                 INTENT(INOUT  )             ::               QSFC

From e6d26f2bf42aebc84f98d14de2f94aa1cd4848f8 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne4.cheyenne.ucar.edu>
Date: Thu, 30 Jul 2020 10:44:57 -0600
Subject: [PATCH 052/146] pass heat flux args to sfclay1d

---
 src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
index d9709a5547..bd614cb359 100644
--- a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
+++ b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
@@ -246,6 +246,7 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
                 GZ1OZ0(ims,j),WSPD(ims,j),BR(ims,j),ISFFLX,DX2D,   &
                 SVP1,SVP2,SVP3,SVPT0,EP1,EP2,KARMAN,EOMEG,STBOLT,  &
                 P1000mb,                                           &
+                heat_flux, moisture_flux, specified_flux,          &
                 ids,ide, jds,jde, kds,kde,                         &
                 ims,ime, jms,jme, kms,kme,                         &
                 its,ite, jts,jte, kts,kte                          &
@@ -271,6 +272,7 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
                      SVP1,SVP2,SVP3,SVPT0,EP1,EP2,                 &
                      KARMAN,EOMEG,STBOLT,                          &
                      P1000mb,                                      &
+                     heat_flux, moisture_flux, specified_flux,     &
                      ids,ide, jds,jde, kds,kde,                    &
                      ims,ime, jms,jme, kms,kme,                    &
                      its,ite, jts,jte, kts,kte,                    &
@@ -292,6 +294,8 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
       REAL,     INTENT(IN )   ::        EP1,EP2,KARMAN,EOMEG,STBOLT
       REAL,     INTENT(IN )   ::        P1000mb
 
+      REAL,     INTENT(IN   ) ::        heat_flux, moisture_flux
+      LOGICAL,  INTENT(IN   ) ::        specified_flux
 !
       REAL,     DIMENSION( ims:ime )                             , &
                 INTENT(IN   )               ::             MAVAIL, &

From 460c8b5dc2113e9bfd72696c0d653e03ff11603c Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne4.cheyenne.ucar.edu>
Date: Thu, 30 Jul 2020 11:11:56 -0600
Subject: [PATCH 053/146] copy specified fluxes to hfx and qfx before sfclay1d

---
 .../physics/physics_wrf/module_sf_sfclay.F           | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
index bd614cb359..948f6e52ce 100644
--- a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
+++ b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
@@ -230,6 +230,13 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
            T1D(i) =T3D(i,1,j)
         ENDDO
 
+        IF(specified_flux)THEN
+           DO i=its,ite
+              HFX(i,j)=heat_flux
+              QFX(i,j)=moisture_flux
+           ENDDO
+        ENDIF
+
         !  Sending array starting locations of optional variables may cause
         !  troubles, so we explicitly change the call.
 
@@ -246,7 +253,7 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
                 GZ1OZ0(ims,j),WSPD(ims,j),BR(ims,j),ISFFLX,DX2D,   &
                 SVP1,SVP2,SVP3,SVPT0,EP1,EP2,KARMAN,EOMEG,STBOLT,  &
                 P1000mb,                                           &
-                heat_flux, moisture_flux, specified_flux,          &
+                specified_flux,                                    &
                 ids,ide, jds,jde, kds,kde,                         &
                 ims,ime, jms,jme, kms,kme,                         &
                 its,ite, jts,jte, kts,kte                          &
@@ -272,7 +279,7 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
                      SVP1,SVP2,SVP3,SVPT0,EP1,EP2,                 &
                      KARMAN,EOMEG,STBOLT,                          &
                      P1000mb,                                      &
-                     heat_flux, moisture_flux, specified_flux,     &
+                     specified_flux,                               &
                      ids,ide, jds,jde, kds,kde,                    &
                      ims,ime, jms,jme, kms,kme,                    &
                      its,ite, jts,jte, kts,kte,                    &
@@ -294,7 +301,6 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
       REAL,     INTENT(IN )   ::        EP1,EP2,KARMAN,EOMEG,STBOLT
       REAL,     INTENT(IN )   ::        P1000mb
 
-      REAL,     INTENT(IN   ) ::        heat_flux, moisture_flux
       LOGICAL,  INTENT(IN   ) ::        specified_flux
 !
       REAL,     DIMENSION( ims:ime )                             , &

From fb5e94f36e79b1ae266cde5afc14df7e76e17b27 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne4.cheyenne.ucar.edu>
Date: Thu, 30 Jul 2020 15:51:56 -0600
Subject: [PATCH 054/146] add MOL and BR calculations to sfclay1d

---
 .../physics/physics_wrf/module_sf_sfclay.F             | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
index 948f6e52ce..7d726e40ae 100644
--- a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
+++ b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
@@ -398,6 +398,7 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
       REAL    ::  FLUXC,VSGD,Z0Q,VISC,RESTAR,CZIL,GZ0OZQ,GZ0OZT
       REAL    ::  ZW, ZN1, ZN2
       REAL    ::  Z0T, CZC
+      REAL    ::  USTI
 !-------------------------------------------------------------------
       KL=kte
 
@@ -544,6 +545,15 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
 
   260 CONTINUE                                                                   
 
+      IF(specified_flux)THEN
+!     Need to recalculate MOL and BR when HFX is given and TSK is not used
+        DO I=ITS,ITE
+!       MOL is THETA*
+          USTI = MAX(UST(I),0.01)
+          MOL(I) = HFX(I)/(RHOX(I)*CP*USTI)
+          BR(I) = KARMAN*GOVRTH(I)*ZA(I)*MOL(I)/(USTI*USTI)/GZ1OZ0(I)
+        ENDDO
+      ENDIF
 !                                                                                
 !-----DIAGNOSE BASIC PARAMETERS FOR THE APPROPRIATED STABILITY CLASS:            
 !                                                                                

From 88df14a5bf98a19db023dfb58954379ed3874659 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne6.cheyenne.ucar.edu>
Date: Wed, 12 Aug 2020 14:48:22 -0600
Subject: [PATCH 055/146] add ustm drag in u_dissipation_3d les_sas_test

---
 src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index b44e1ebbab..90b843168b 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -705,6 +705,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
       real (kind=RKIND) :: rho_k_cell1, rho_k_cell2, rho_k_at_w
       real (kind=RKIND) :: zz_cell1, zz_cell2, zz_at_w
+      real (kind=RKIND) :: ust_edge
 
       real (kind=RKIND) :: velocity_magnitude
 
@@ -895,6 +896,11 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                   velocity_magnitude = sqrt(u(1,iEdge)**2 + v(1,iEdge)**2)
                   turb_vflux(1) = -rho_edge(1,iEdge)*tke_drag_coefficient*u(1,iEdge)*velocity_magnitude
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+                else if (les_sas_test) then
+                  ust_edge = 0.5*(ustm(cell1) + ustm(cell2))
+                  velocity_magnitude = max(sqrt(u(1,iEdge)**2 + v(1,iEdge)**2),0.1)
+                  turb_vflux(1) = -rho_edge(1,iEdge)*ust_edge*ust_edge*(u(1,iEdge)/velocity_magnitude)
+                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
                   ! end test conditions
                 else
                   ! test conditions for supercell case

From 5c9feb4e8a7fa6a7a5e5f53b2ca5d876ca8e96fa Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.cheyenne.ucar.edu>
Date: Thu, 13 Aug 2020 10:07:27 -0600
Subject: [PATCH 056/146] skip hfx and qfx calcs in sfclay for specified_flux

---
 src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
index 7d726e40ae..77a115231d 100644
--- a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
+++ b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
@@ -844,6 +844,8 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
   330 CONTINUE                                                                   
 !                                                                                
   335 CONTINUE                                                                   
+     
+      IF(specified_flux) GOTO 410
                                                                                   
 !-----COMPUTE THE SURFACE SENSIBLE AND LATENT HEAT FLUXES:                       
       IF ( PRESENT(SCM_FORCE_FLUX) ) THEN

From d0d418d2bcdc914015df02031bfa0b6e83f42e36 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.cheyenne.ucar.edu>
Date: Thu, 13 Aug 2020 16:02:52 -0600
Subject: [PATCH 057/146] pass hfx and qfx to dissipation and use for les_sas

---
 .../dynamics/mpas_atm_dissipation_models.F            |  6 +++++-
 .../dynamics/mpas_atm_time_integration.F              | 11 +++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 90b843168b..b19fd9551c 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -1585,6 +1585,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                             index_tke, index_qv, num_scalars_dummy, mix_scalars,      &
                                             config_les_model, time_of_day_seconds,                    &
                                             uReconstructZonal, uReconstructMeridional,                &
+                                            hfx, qfx,                                                 &
                                             tend_theta_euler                                         )
 
 
@@ -1641,6 +1642,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
+      real (kind=RKIND), dimension(nCells+1), intent(in) :: hfx, qfx
 
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(inout) :: tend_theta_euler
 
@@ -1883,7 +1885,9 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   theta_m_cell = theta_m(1,iCell)
                   theta_cell = theta_m_cell/(1.0+(rv/rgas)*qv_cell)
 
-                  call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
+!                 call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
+                  heat_flux = hfx(iCell)/rho_zz(1,iCell)/cp
+                  moisture_flux = qfx(iCell)
                   if(iCell == 1) call mpas_log_write(' SAS t and qv fluxes, $r, $r ', realArgs=(/heat_flux, moisture_flux/))
 
                   theta_m_flux = heat_flux*(1.0+(rv/rgas)*qv_cell)+(rv/rgas)*theta_cell*moisture_flux
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 96e74f7202..2c78768b84 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4817,7 +4817,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
 
       real (kind=RKIND), dimension(:), pointer :: rdzu, rdzw, fzm, fzp, qv_init
       real (kind=RKIND), dimension(:,:), pointer :: t_init 
-      real (kind=RKIND), dimension(:), pointer:: ustm
+      real (kind=RKIND), dimension(:), pointer:: ustm, hfx, qfx
 
       real (kind=RKIND), pointer :: cf1, cf2, cf3
 
@@ -4911,6 +4911,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
 
       call mpas_pool_get_array(diag_physics,'ustm',ustm)
+      call mpas_pool_get_array(diag_physics,'hfx',hfx)
+      call mpas_pool_get_array(diag_physics,'qfx',qfx)
 
       call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
       call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)
@@ -5018,7 +5020,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
          config_mpas_cam_coef, &
          config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, & 
          config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, &
-         tend_rtheta_adv, rthdynten, ustm, &
+         tend_rtheta_adv, rthdynten, ustm, hfx, qfx, &
          cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
          cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
 
@@ -5046,7 +5048,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       config_mpas_cam_coef, &
       config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, &
       config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, &
-      tend_rtheta_adv, rthdynten, ustm, &
+      tend_rtheta_adv, rthdynten, ustm, hfx, qfx, &
       cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
       cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
 
@@ -5104,7 +5106,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_horz
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_vert
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: bn2
-      real (kind=RKIND), dimension(nCells+1) :: ustm
+      real (kind=RKIND), dimension(nCells+1) :: ustm, hfx, qfx
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: prandtl_3d_inv
       real (kind=RKIND), dimension(maxEdges,nCells+1) :: edgesOnCell_sign
       real (kind=RKIND), dimension(vertexDegree,nVertices+1) :: edgesOnVertex_sign
@@ -5895,6 +5897,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                             index_tke, index_qv, num_scalars, mix_scalars,            &
                                             config_les_model, time_of_day_seconds,                    &
                                             ur_cell, vr_cell,                                         &
+                                            hfx, qfx,                                                 &
                                             tend_theta_euler                                         )
 
 

From 3121ce5a425b484da045cd97b25bf23268f644d7 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.cheyenne.ucar.edu>
Date: Tue, 18 Aug 2020 11:44:13 -0600
Subject: [PATCH 058/146] comment out duplicate flux_les_sas in dissipiation
 module

---
 .../dynamics/mpas_atm_dissipation_models.F    | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index b19fd9551c..501efd8df9 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -1944,25 +1944,25 @@ end subroutine scalar_dissipation_3d_les
 
 !-----------
 
-      subroutine flux_les_sas(heat_flux, moisture_flux, time_of_day_seconds)
+!     subroutine flux_les_sas(heat_flux, moisture_flux, time_of_day_seconds)
 
-      implicit none
+!     implicit none
 
-      real (kind=RKIND), intent(in) :: time_of_day_seconds
-      real (kind=RKIND), intent(out) :: heat_flux, moisture_flux
+!     real (kind=RKIND), intent(in) :: time_of_day_seconds
+!     real (kind=RKIND), intent(out) :: heat_flux, moisture_flux
 
-      real (kind=RKIND), parameter:: t_start_t_flux = 3600.*6.0
-      real (kind=RKIND), parameter:: t_end_t_flux = 3600.*19.50
-      real (kind=RKIND), parameter:: t_start_q_flux = 3600.*7.0
-      real (kind=RKIND), parameter:: t_end_q_flux = 3600.*19.50
-      real (kind=RKIND) :: rel_time_t_flux, rel_time_q_flux
+!     real (kind=RKIND), parameter:: t_start_t_flux = 3600.*6.0
+!     real (kind=RKIND), parameter:: t_end_t_flux = 3600.*19.50
+!     real (kind=RKIND), parameter:: t_start_q_flux = 3600.*7.0
+!     real (kind=RKIND), parameter:: t_end_q_flux = 3600.*19.50
+!     real (kind=RKIND) :: rel_time_t_flux, rel_time_q_flux
 
-      rel_time_t_flux = max(0.,(time_of_day_seconds - t_start_t_flux)/(t_end_t_flux - t_start_t_flux))
-      rel_time_q_flux = max(0.,(time_of_day_seconds - t_start_q_flux)/(t_end_q_flux - t_start_q_flux))
+!     rel_time_t_flux = max(0.,(time_of_day_seconds - t_start_t_flux)/(t_end_t_flux - t_start_t_flux))
+!     rel_time_q_flux = max(0.,(time_of_day_seconds - t_start_q_flux)/(t_end_q_flux - t_start_q_flux))
 
-      heat_flux = max(0., 0.1*sin(pii*rel_time_t_flux))
-      moisture_flux = max(0., 0.15*sin(pii*rel_time_q_flux))/1000.
+!     heat_flux = max(0., 0.1*sin(pii*rel_time_t_flux))
+!     moisture_flux = max(0., 0.15*sin(pii*rel_time_q_flux))/1000.
 
-      end subroutine flux_les_sas
+!     end subroutine flux_les_sas
 
 end module mpas_atm_dissipation_models

From af3233d053950096aeb8d3fa4d5df40582722c58 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.cheyenne.ucar.edu>
Date: Tue, 18 Aug 2020 11:51:02 -0600
Subject: [PATCH 059/146] Set dummy TSK and UST=USTM (no VCONV)

---
 .../physics/physics_wrf/module_sf_sfclay.F         | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
index 77a115231d..9739b08786 100644
--- a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
+++ b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
@@ -410,10 +410,16 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
 !----CONVERT GROUND TEMPERATURE TO POTENTIAL TEMPERATURE:  
 !                                                            
       DO 5 I=its,ite                                   
-        TGDSA(I)=TSK(I)                                    
+        IF(.not.specified_flux)THEN
+          TGDSA(I)=TSK(I)                                    
 ! PSFC cb
-!        THGB(I)=TSK(I)*(100./PSFC(I))**ROVCP                
-        THGB(I)=TSK(I)*(P1000mb/PSFCPA(I))**ROVCP   
+!          THGB(I)=TSK(I)*(100./PSFC(I))**ROVCP                
+          THGB(I)=TSK(I)*(P1000mb/PSFCPA(I))**ROVCP   
+        ELSE
+! with specified_flux TSK value is not input and given value will not affect USTM
+          TGDSA(I)=300.
+          THGB(I)=300.*(P1000mb/PSFCPA(I))**ROVCP   
+        ENDIF
     5 CONTINUE                                               
 !                                                            
 !-----DECOUPLE FLUX-FORM VARIABLES TO GIVE U,V,T,THETA,THETA-VIR.,
@@ -819,6 +825,8 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
         WSPDI(I)=SQRT(UX(I)*UX(I)+VX(I)*VX(I))
         IF ( PRESENT(USTM) ) THEN
         USTM(I)=0.5*USTM(I)+0.5*KARMAN*WSPDI(I)/PSIX
+! For specified_flux VCONV effect not added to UST that will be used for ZOL
+        IF(specified_flux)UST(I)=USTM(I)
         ENDIF
         U10(I)=UX(I)*PSIX10/PSIX                                    
         V10(I)=VX(I)*PSIX10/PSIX                                   

From 16637caa1cf3f3a2369f84980976b88a6ff3b1dd Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.cheyenne.ucar.edu>
Date: Tue, 18 Aug 2020 14:38:11 -0600
Subject: [PATCH 060/146] sign correction for MOL

---
 src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
index 9739b08786..36e53d11f0 100644
--- a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
+++ b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
@@ -556,7 +556,7 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
         DO I=ITS,ITE
 !       MOL is THETA*
           USTI = MAX(UST(I),0.01)
-          MOL(I) = HFX(I)/(RHOX(I)*CP*USTI)
+          MOL(I) = - HFX(I)/(RHOX(I)*CP*USTI)
           BR(I) = KARMAN*GOVRTH(I)*ZA(I)*MOL(I)/(USTI*USTI)/GZ1OZ0(I)
         ENDDO
       ENDIF

From 210df8e127ebe3363fb8a5aa7627ea1abb00d0c2 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne3.cheyenne.ucar.edu>
Date: Wed, 19 Aug 2020 10:59:52 -0600
Subject: [PATCH 061/146] pass clock atm_do_timestep to physics_driver to
 driver_sfclayer

---
 src/core_atmosphere/mpas_atm_core.F                        | 2 +-
 src/core_atmosphere/physics/mpas_atmphys_driver.F          | 7 ++++---
 src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F | 4 +++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F
index f7d04a1f0c..ce9875cc89 100644
--- a/src/core_atmosphere/mpas_atm_core.F
+++ b/src/core_atmosphere/mpas_atm_core.F
@@ -1015,7 +1015,7 @@ subroutine atm_do_timestep(domain, dt, itimestep)
       !proceed with physics if moist_physics is set to true:
       if(moist_physics) then
          call physics_timetracker(domain,dt,clock,itimestep,xtime_s)
-         call physics_driver(domain,itimestep,xtime_s)
+         call physics_driver(domain,clock,itimestep,xtime_s)
       endif
 #endif
 
diff --git a/src/core_atmosphere/physics/mpas_atmphys_driver.F b/src/core_atmosphere/physics/mpas_atmphys_driver.F
index 8e31672657..5d9d5d092c 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_driver.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_driver.F
@@ -108,12 +108,13 @@ module mpas_atmphys_driver
 
 
 !=================================================================================================================
- subroutine physics_driver(domain,itimestep,xtime_s)
+ subroutine physics_driver(domain,clock,itimestep,xtime_s)
 !=================================================================================================================
 
 !input arguments:
  integer,intent(in):: itimestep
  real(kind=RKIND),intent(in):: xtime_s
+ type(MPAS_Clock_type),intent(in):: clock
 
 !inout arguments:
  type(domain_type),intent(inout):: domain
@@ -269,8 +270,8 @@ subroutine physics_driver(domain,itimestep,xtime_s)
        call allocate_sfclayer(block%configs)
 !$OMP PARALLEL DO
        do thread=1,nThreads
-          call driver_sfclayer(itimestep,block%configs,mesh,diag_physics,sfc_input, &
-                               cellSolveThreadStart(thread),cellSolveThreadEnd(thread))
+          call driver_sfclayer(clock,itimestep,block%configs,mesh,diag_physics,sfc_input, &
+                               cellSolveThreadStart(thread), cellSolveThreadEnd(thread))
        end do
 !$OMP END PARALLEL DO
        call deallocate_sfclayer(block%configs)
diff --git a/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F b/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
index bc379217b9..791d933405 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
@@ -841,10 +841,12 @@ subroutine init_sfclayer(configs)
  end subroutine init_sfclayer
 
 !=================================================================================================================
- subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite)
+ subroutine driver_sfclayer(clock,itimestep,configs,mesh,diag_physics,sfc_input,its,ite)
 !=================================================================================================================
 
 !input and inout arguments:
+ type(MPAS_Clock_type),intent(in):: clock
+
  type(mpas_pool_type),intent(in):: mesh
  type(mpas_pool_type),intent(in):: configs
  type(mpas_pool_type),intent(in):: sfc_input

From eba2aeec2a673c0a64a19e5de1f2f43f8b758589 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne3.cheyenne.ucar.edu>
Date: Wed, 19 Aug 2020 15:29:14 -0600
Subject: [PATCH 062/146] put k loop around prandtl_inverse calc in dissipation

---
 src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 501efd8df9..71c694c9a4 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -1846,7 +1846,9 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   !   prandtl_inverse(k) = 1. + 2.*tke_length/delta_z
                   ! end do
 
-                  prandtl_inverse(k) = 0.5*(prandtl_3d_inv(k,iCell)+prandtl_3d_inv(k-1,iCell))
+                  do k=2,nVertLevels
+                    prandtl_inverse(k) = 0.5*(prandtl_3d_inv(k,iCell)+prandtl_3d_inv(k-1,iCell))
+                  end do
 
                 end if
 

From 9895ad556b19bccbc56353f7e3580338a04ca012 Mon Sep 17 00:00:00 2001
From: Bill Skamarock <skamaroc@ucar.edu>
Date: Fri, 25 Sep 2020 14:31:17 -0600
Subject: [PATCH 063/146] bug fixes - fixed an uninitialized value in the w
 turbulent flux at the model top, and fixed a problem with the variable
 prandtl_inv in the scalar 3d mixing routine.

---
 .../dynamics/mpas_atm_dissipation_models.F            | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 71c694c9a4..7d170c8bd3 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -1191,6 +1191,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
                   turb_vflux(k) = - rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*zz(k,iCell)  &
                                     *rdzu(k)*(w(k+1,iCell)-w(k,iCell))
                 end do
+                turb_vflux(nVertLevels+1) = 0.
                 do k=2,nVertLevels
                   tend_w_euler(k,iCell) = tend_w_euler(k,iCell) &
                                               - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
@@ -1653,7 +1654,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       integer :: cell1, cell2, iEdge, iCell, i, k, iScalar
       real (kind=RKIND) :: r_areaCell, edge_sign, theta_turb_flux, pr_scale
       real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
-      real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux, prandtl_inverse
+      real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux, prandtl_1d_inverse
       real (kind=RKIND), dimension(num_scalars,nVertLevels+1) :: turb_vflux_scalars
       real (kind=RKIND) :: rho_k_at_w, zz_at_w
       real (kind=RKIND) :: delta_z, delta_s, tke_length, bv_frequency2
@@ -1832,7 +1833,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
                 if ( config_les_model == "3d_smagorinsky") then
                   do k=2,nVertLevels
-                    prandtl_inverse(k) = prandtl_inv
+                    prandtl_1d_inverse(k) = prandtl_inv
                   end do
                 else   ! prognostic_1.5_order, isentropic mixing length
                   ! do k=2,nVertLevels
@@ -1847,7 +1848,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   ! end do
 
                   do k=2,nVertLevels
-                    prandtl_inverse(k) = 0.5*(prandtl_3d_inv(k,iCell)+prandtl_3d_inv(k-1,iCell))
+                    prandtl_1d_inverse(k) = 0.5*(prandtl_3d_inv(k,iCell)+prandtl_3d_inv(k-1,iCell))
                   end do
 
                 end if
@@ -1862,7 +1863,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
                                +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
                   zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
-                  turb_vflux(k) = - prandtl_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
+                  turb_vflux(k) = - prandtl_1d_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
                 end do
 
                 ! test boundary conditions for supercell and les test cases
@@ -1920,7 +1921,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
                   zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
                   do iScalar=1,num_scalars
-                     turb_vflux_scalars(iScalar,k) = - prandtl_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*  &
+                     turb_vflux_scalars(iScalar,k) = - prandtl_1d_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*  &
                                                       (scalars(iScalar,k,iCell)-scalars(iScalar,k-1,iCell))
                   end do
                 end do

From 82a1a8cc2780d5d9b4c9d143250d89d87ef98e12 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.cheyenne.ucar.edu>
Date: Tue, 27 Oct 2020 11:36:32 -0600
Subject: [PATCH 064/146] hfx = heat_flux*rho*cp (factor added)

---
 src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
index 36e53d11f0..73852e6220 100644
--- a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
+++ b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
@@ -211,6 +211,7 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
       REAL,     DIMENSION( its:ite ) ::                      DX2D
 
       INTEGER ::  I,J
+      REAL ::  RHO
 
       DO J=jts,jte
 
@@ -232,7 +233,8 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
 
         IF(specified_flux)THEN
            DO i=its,ite
-              HFX(i,j)=heat_flux
+              RHO = P1D(i)/R/(T1D(i)*(1.+EP1*QV1D(i)))
+              HFX(i,j)=heat_flux*RHO*CP
               QFX(i,j)=moisture_flux
            ENDDO
         ENDIF

From a7828afda7380955fd30179fb6cc1ff2ce95ed7b Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne6.cheyenne.ucar.edu>
Date: Tue, 10 Nov 2020 14:05:56 -0700
Subject: [PATCH 065/146] add print for ust_edge

---
 src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 7d170c8bd3..3430571a2f 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -898,6 +898,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
                 else if (les_sas_test) then
                   ust_edge = 0.5*(ustm(cell1) + ustm(cell2))
+                  if(iEdge == 1) call mpas_log_write(' SAS ust_edge, $r ', realArgs=(/ust_edge/))
                   velocity_magnitude = max(sqrt(u(1,iEdge)**2 + v(1,iEdge)**2),0.1)
                   turb_vflux(1) = -rho_edge(1,iEdge)*ust_edge*ust_edge*(u(1,iEdge)/velocity_magnitude)
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)

From 5e34d468f13bbe6e9a90334ba0c94fe70b83b145 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne6.cheyenne.ucar.edu>
Date: Tue, 10 Nov 2020 16:00:01 -0700
Subject: [PATCH 066/146] initialize landmask, lu_index(ivgtyp), xland = 1

---
 .../mpas_init_atm_cases.F                     | 35 ++++++++++++++-----
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index 12de5af3e9..785f7a4b68 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -403,11 +403,12 @@ subroutine init_atm_setup_case(domain, stream_manager)
             call mpas_pool_get_dimension(block_ptr % dimensions, 'nVertLevels', nVertLevels)
 
             call mpas_pool_get_subpool(block_ptr % structs, 'mesh', mesh)
+            call mpas_pool_get_subpool(block_ptr % structs, 'fg', fg)
             call mpas_pool_get_subpool(block_ptr % structs, 'state', state)
             call mpas_pool_get_subpool(block_ptr % structs, 'diag', diag)
 
             call mpas_log_write(' calling test case setup ')
-            call init_atm_case_les(domain % dminfo, mesh, nCells, nVertLevels, state, diag, config_init_case, block_ptr % configs)
+            call init_atm_case_les(domain % dminfo, mesh, fg, nCells, nVertLevels, state, diag, config_init_case, block_ptr % configs)
             call decouple_variables(mesh, nCells, nVertLevels, state, diag)
             call mpas_log_write(' returned from test case setup ')
             block_ptr => block_ptr % next
@@ -6203,7 +6204,7 @@ end subroutine init_atm_case_lbc
 
 !---------------------
 
-   subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, test_case, configs)
+   subroutine init_atm_case_les(dminfo, mesh, fg, nCells, nVertLevels, state, diag, test_case, configs)
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    ! Large Eddy Simulation (les) test case setup
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -6212,6 +6213,7 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
 
       type (dm_info), intent(in) :: dminfo
       type (mpas_pool_type), intent(inout) :: mesh
+      type (mpas_pool_type), intent(inout) :: fg
       integer, intent(in) :: nCells
       integer, intent(in) :: nVertLevels
       type (mpas_pool_type), intent(inout) :: state
@@ -6226,6 +6228,9 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
       real (kind=RKIND), dimension(:,:,:), pointer :: scalars
       real (kind=RKIND), dimension(:,:,:), pointer :: zb, zb3
 
+      real (kind=RKIND), dimension(:), pointer :: xland
+      integer, dimension(:), pointer :: landmask, lu_index
+
       !This is temporary variable here. It just need when calculate tangential velocity v.
       integer :: eoe, j
       integer, dimension(:), pointer :: nEdgesOnEdge 
@@ -6372,6 +6377,10 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
       call mpas_pool_get_dimension(state, 'index_qv', index_qv)
       call mpas_pool_get_dimension(state, 'index_tke', index_tke)
 
+      call mpas_pool_get_array(fg, 'xland', xland)
+      call mpas_pool_get_array(mesh, 'landmask', landmask)
+      call mpas_pool_get_array(mesh, 'lu_index', lu_index)
+ 
       scalars(:,:,:) = 0.
 
       call atm_initialize_advection_rk(mesh, nCells, nEdges, maxEdges, on_a_sphere, sphere_radius )
@@ -6543,7 +6552,7 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
       call mpas_dmpar_bcast_real(dminfo, pibtop)
 
       ptopb = p0*pibtop**(1./rcp)
-      call mpas_log_write('ptopb = $r', realArgs=(/0.01_RKIND*ptopb/))
+!     call mpas_log_write('ptopb = $r', realArgs=(/0.01_RKIND*ptopb/))
 
       do i=1, nCells
          pb(nz1,i) = pibtop+.5*dzw(nz1)*gravity*(1.+qvb(nz1))/(cp*tb(nz1,i)*zz(nz1,i))
@@ -6648,7 +6657,7 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
         end do
         pitop = pitop - .5*dzw(nz1)*gravity*(1.+scalars(index_qv,nz1,1))/(cp*t(nz1,1)*zz(nz1,1))
         ptop = p0*pitop**(1./rcp)
-        call mpas_log_write('ptop  = $r $r', realArgs=(/0.01_RKIND*ptop, 0.01_RKIND*ptopb/))
+!       call mpas_log_write('ptop  = $r $r', realArgs=(/0.01_RKIND*ptop, 0.01_RKIND*ptopb/))
 
         call mpas_dmpar_bcast_real(dminfo, ptop)
 
@@ -6666,11 +6675,11 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
                            +fzp(k+1)*(rb(k  ,i)*(scalars(index_qv,k  ,i)-qvb(k))      &
                                      +rr(k  ,i)*(1.+scalars(index_qv,k  ,i))))
           end do
-          if (itr==1.and.i==1) then
-             do k=1,nz1
-                call mpas_log_write('pp-check $r', realArgs=(/pp(k,i)/))
-             end do
-          end if
+!         if (itr==1.and.i==1) then
+!            do k=1,nz1
+!               call mpas_log_write('pp-check $r', realArgs=(/pp(k,i)/))
+!            end do
+!         end if
           do k=1,nz1
              rt(k,i) = (pp(k,i)/(rgas*zz(k,i))                   &
                      -rtb(k,i)*(p(k,i)-pb(k,i)))/p(k,i)       
@@ -6760,6 +6769,14 @@ subroutine init_atm_case_les(dminfo, mesh, nCells, nVertLevels, state, diag, tes
          end do
       end do
 
+      do iCell = 1, nCells
+         !land category
+         landmask(iCell) = 1
+         lu_index(iCell) = 1
+         xland(iCell) = 1.0
+         if (iCell == 1) call mpas_log_write(' landmask, lu_index, xland $i $i $r', intArgs=(/landmask(iCell),lu_index(iCell)/), realArgs=(/xland(iCell)/))
+      end do
+
    end subroutine init_atm_case_les
 
 

From 451424455359c0f24903f800d614bd9446960a36 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.cheyenne.ucar.edu>
Date: Tue, 2 Nov 2021 15:33:00 -0600
Subject: [PATCH 067/146] change hardwired value of f - need to generize to use
 fEdge

---
 src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 2c78768b84..f69f0847c8 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5221,7 +5221,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND) :: q_im2, q_im1, q_i, q_ip1, ua, coef3
 
       logical, parameter :: perturbation_coriolis = .true.
-      real (kind=RKIND), parameter :: coriolis_value = 1.e-04
+      real (kind=RKIND), parameter :: coriolis_value = 7.2921e-05
       real (kind=RKIND) :: reference_u
 
       type (MPAS_Time_Type) :: currTime

From cdb3a2e1b2cc37f074ae1e42e4178c13ef8f6abe Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.cheyenne.ucar.edu>
Date: Wed, 3 Nov 2021 12:59:51 -0600
Subject: [PATCH 068/146] fix rdzw/rdzu for w dissipation, use fEdge instead of
 coriolis_value

---
 src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F | 4 ++--
 src/core_atmosphere/dynamics/mpas_atm_time_integration.F   | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 3430571a2f..63d558dfb8 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -1190,12 +1190,12 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
                 ! compute turbulent fluxes
                 do k=1,nVertLevels
                   turb_vflux(k) = - rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*zz(k,iCell)  &
-                                    *rdzu(k)*(w(k+1,iCell)-w(k,iCell))
+                                    *rdzw(k)*(w(k+1,iCell)-w(k,iCell))
                 end do
                 turb_vflux(nVertLevels+1) = 0.
                 do k=2,nVertLevels
                   tend_w_euler(k,iCell) = tend_w_euler(k,iCell) &
-                                              - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
+                                              - rdzu(k)*(turb_vflux(k)-turb_vflux(k-1))
                 end do
              end do
 
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index f69f0847c8..064b8d4db3 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5221,7 +5221,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND) :: q_im2, q_im1, q_i, q_ip1, ua, coef3
 
       logical, parameter :: perturbation_coriolis = .true.
-      real (kind=RKIND), parameter :: coriolis_value = 7.2921e-05
       real (kind=RKIND) :: reference_u
 
       type (MPAS_Time_Type) :: currTime
@@ -5499,7 +5498,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                eoe = edgesOnEdge(j,iEdge)
                do k=1,nVertLevels
                   reference_u = u_init(k) * cos(angleEdge(eoe)) -  v_init(k) * sin(angleEdge(eoe))
-                  q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * coriolis_value
+                  q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * fEdge(iEdge)
                end do
             end do
          end if

From dcaa6205a88d97d4a0a984c82f43dedf4c287c5f Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne5.cheyenne.ucar.edu>
Date: Wed, 3 Nov 2021 15:07:25 -0600
Subject: [PATCH 069/146] revert to hardwired f until fEdge can be used

---
 src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 064b8d4db3..948b50b7d7 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5498,7 +5498,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                eoe = edgesOnEdge(j,iEdge)
                do k=1,nVertLevels
                   reference_u = u_init(k) * cos(angleEdge(eoe)) -  v_init(k) * sin(angleEdge(eoe))
-                  q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * fEdge(iEdge)
+                  q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * 0.729210E-04
                end do
             end do
          end if

From 19e94ab31d43ed255be9c2e3b0704a43277e9879 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne1.cheyenne.ucar.edu>
Date: Tue, 30 Nov 2021 14:08:23 -0700
Subject: [PATCH 070/146] convert moisture_flux w'q' to surface flux qfx

---
 src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
index 73852e6220..f7399e4b40 100644
--- a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
+++ b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
@@ -235,7 +235,7 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
            DO i=its,ite
               RHO = P1D(i)/R/(T1D(i)*(1.+EP1*QV1D(i)))
               HFX(i,j)=heat_flux*RHO*CP
-              QFX(i,j)=moisture_flux
+              QFX(i,j)=moisture_flux*RHO
            ENDDO
         ENDIF
 

From 2bea0489d118f0efef4acc55c9024c0e5ad7c5c9 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@cheyenne1.cheyenne.ucar.edu>
Date: Wed, 8 Dec 2021 11:58:20 -0700
Subject: [PATCH 071/146] moisture_flux in dynamics should be w'q' = qfx/rho

---
 src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 63d558dfb8..99d899462e 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -1891,7 +1891,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
 !                 call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
                   heat_flux = hfx(iCell)/rho_zz(1,iCell)/cp
-                  moisture_flux = qfx(iCell)
+                  moisture_flux = qfx(iCell)/rho_zz(1,iCell)
                   if(iCell == 1) call mpas_log_write(' SAS t and qv fluxes, $r, $r ', realArgs=(/heat_flux, moisture_flux/))
 
                   theta_m_flux = heat_flux*(1.0+(rv/rgas)*qv_cell)+(rv/rgas)*theta_cell*moisture_flux

From 99e890589fce090dd5f454fd45b1cd818e01baa0 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Thu, 24 Mar 2022 11:34:03 -0600
Subject: [PATCH 072/146] add commented fEdge line while still using hardwired
 f

---
 src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 948b50b7d7..73d9731e52 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5498,6 +5498,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                eoe = edgesOnEdge(j,iEdge)
                do k=1,nVertLevels
                   reference_u = u_init(k) * cos(angleEdge(eoe)) -  v_init(k) * sin(angleEdge(eoe))
+!                 q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * fEdge(iEdge)
                   q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * 0.729210E-04
                end do
             end do

From 684cc611e820186d6fa0bf304f29f16991140b16 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Wed, 22 Jun 2022 15:53:09 -0600
Subject: [PATCH 073/146] initial prep to use les_test option

---
 .../dynamics/mpas_atm_dissipation_models.F             | 10 +++++++---
 .../dynamics/mpas_atm_time_integration.F               |  4 ++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 99d899462e..39ab3c9ad4 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -15,7 +15,7 @@ module mpas_atm_dissipation_models
   use mpas_derived_types, only : MPAS_LOG_CRIT
 
   logical, parameter :: debug_dissipation = .false.
-  logical, parameter :: les_test = .false., les_sas_test = .true.
+  logical, parameter :: les_test = .true., les_sas_test = .false.
   !! real (kind=RKIND), parameter :: tke_heat_flux = 0.03  ! shear case from Moeng et al., first hour
   real (kind=RKIND), parameter :: tke_heat_flux = 0.0
   !! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013 ! ocean roughness length
@@ -1874,10 +1874,14 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   moisture_flux = 0.
                   heat_flux = tke_heat_flux
 
+                  qv_cell = scalars(index_qv,1,iCell)
+                  theta_m_cell = theta_m(1,iCell)
+                  theta_cell = theta_m_cell/(1.0+(rv/rgas)*qv_cell)
                   ! bulk formulation will go here.
 
-                  theta_m_flux = heat_flux*(1.0+(rv/rgas)*scalars(index_qv,1,iCell))  &
-                                    +(rv/rgas)*theta_m(1,iCell)*moisture_flux/rho_zz(k,iCell)
+!                 theta_m_flux = heat_flux*(1.0+(rv/rgas)*scalars(index_qv,1,iCell))  &
+!                                   +(rv/rgas)*theta_m(1,iCell)*moisture_flux/rho_zz(k,iCell)
+                  theta_m_flux = heat_flux*(1.0+(rv/rgas)*qv_cell)+(rv/rgas)*theta_cell*moisture_flux
                   ! turb_vflux(1) = tke_heat_flux*rho_zz(1,iCell)  ! this is correct for DRY CASE ONLY
                   turb_vflux(1) = theta_m_flux*rho_zz(1,iCell)
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 73d9731e52..48dce63ab7 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5498,8 +5498,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                eoe = edgesOnEdge(j,iEdge)
                do k=1,nVertLevels
                   reference_u = u_init(k) * cos(angleEdge(eoe)) -  v_init(k) * sin(angleEdge(eoe))
-!                 q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * fEdge(iEdge)
-                  q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * 0.729210E-04
+                  q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * fEdge(iEdge)
+!                 q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * 0.729210E-04
                end do
             end do
          end if

From 2410a5143d9a13a9666c5ed88d05e1517988a598 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Wed, 30 Oct 2024 09:57:52 -0600
Subject: [PATCH 074/146] fix for compile

---
 src/core_init_atmosphere/mpas_atm_advection.F | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/core_init_atmosphere/mpas_atm_advection.F b/src/core_init_atmosphere/mpas_atm_advection.F
index 3b487f3797..891224a5e3 100644
--- a/src/core_init_atmosphere/mpas_atm_advection.F
+++ b/src/core_init_atmosphere/mpas_atm_advection.F
@@ -785,11 +785,7 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
 
       integer :: iv, ie
       logical :: do_the_cell
-<<<<<<< HEAD
       real (kind=RKIND) :: area_cell, sint2, cost2, sint_cost, dx, dy
-=======
-      real (kind=RKIND) :: area_cell, sint2, cost2, sint_cost
->>>>>>> 94ba9031b (Fixed error in w_x, w_y deformation coefficients.)
 
       logical, pointer :: is_periodic
       real(kind=RKIND), pointer :: x_period, y_period

From f0ee5ebaa6efebbcf985a81c39e143025f1eb7d0 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Wed, 30 Oct 2024 13:21:19 -0600
Subject: [PATCH 075/146] fixes to compile init_atmosphere

---
 src/core_init_atmosphere/mpas_atm_advection.F |  1 -
 .../mpas_init_atm_cases.F                     | 39 ++++++++++---------
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/src/core_init_atmosphere/mpas_atm_advection.F b/src/core_init_atmosphere/mpas_atm_advection.F
index 891224a5e3..bff8843fbc 100644
--- a/src/core_init_atmosphere/mpas_atm_advection.F
+++ b/src/core_init_atmosphere/mpas_atm_advection.F
@@ -1023,7 +1023,6 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
 
    end subroutine atm_initialize_deformation_weights
 
- end module atm_advection
 !-----------------------
 
       subroutine atm_init_test_coefs( deformation_coef_c2, deformation_coef_s2, &
diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index 785f7a4b68..cdfb9324f3 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -367,6 +367,27 @@ subroutine init_atm_setup_case(domain, stream_manager)
          !
          call mpas_stream_mgr_reset_alarms(stream_manager, streamID='lbc', direction=MPAS_STREAM_OUTPUT, ierr=ierr)
 
+      else if (config_init_case == 10) then
+
+         call mpas_log_write(' les test case ')
+         block_ptr => domain % blocklist
+         do while (associated(block_ptr))
+
+            call mpas_pool_get_dimension(block_ptr % dimensions, 'nCells', nCells)
+            call mpas_pool_get_dimension(block_ptr % dimensions, 'nVertLevels', nVertLevels)
+
+            call mpas_pool_get_subpool(block_ptr % structs, 'mesh', mesh)
+            call mpas_pool_get_subpool(block_ptr % structs, 'fg', fg)
+            call mpas_pool_get_subpool(block_ptr % structs, 'state', state)
+            call mpas_pool_get_subpool(block_ptr % structs, 'diag', diag)
+
+            call mpas_log_write(' calling test case setup ')
+            call init_atm_case_les(domain % dminfo, mesh, fg, nCells, nVertLevels, state, diag, config_init_case, block_ptr % configs)
+            call decouple_variables(mesh, nCells, nVertLevels, state, diag)
+            call mpas_log_write(' returned from test case setup ')
+            block_ptr => block_ptr % next
+         end do
+
       else if (config_init_case == 13 ) then
 
          call mpas_log_write(' CAM-MPAS grid ')
@@ -393,24 +414,6 @@ subroutine init_atm_setup_case(domain, stream_manager)
             call init_atm_case_cam_mpas(stream_manager, domain % dminfo, block_ptr, &
                                         mesh, block_ptr % dimensions, block_ptr % configs, nVertLevels)
 
-      else if (config_init_case == 10) then
-
-         call mpas_log_write(' les test case ')
-         block_ptr => domain % blocklist
-         do while (associated(block_ptr))
-
-            call mpas_pool_get_dimension(block_ptr % dimensions, 'nCells', nCells)
-            call mpas_pool_get_dimension(block_ptr % dimensions, 'nVertLevels', nVertLevels)
-
-            call mpas_pool_get_subpool(block_ptr % structs, 'mesh', mesh)
-            call mpas_pool_get_subpool(block_ptr % structs, 'fg', fg)
-            call mpas_pool_get_subpool(block_ptr % structs, 'state', state)
-            call mpas_pool_get_subpool(block_ptr % structs, 'diag', diag)
-
-            call mpas_log_write(' calling test case setup ')
-            call init_atm_case_les(domain % dminfo, mesh, fg, nCells, nVertLevels, state, diag, config_init_case, block_ptr % configs)
-            call decouple_variables(mesh, nCells, nVertLevels, state, diag)
-            call mpas_log_write(' returned from test case setup ')
             block_ptr => block_ptr % next
          end do
 

From af5807a6d4266d0e399e8980abdf349bc672c141 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Fri, 1 Nov 2024 10:19:39 -0600
Subject: [PATCH 076/146] to compile atmosphere

---
 src/core_atmosphere/Registry.xml                 |  4 ----
 .../dynamics/mpas_atm_time_integration.F         | 16 +++-------------
 2 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 9d1d5ac8a7..2f159f3958 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -1696,13 +1696,10 @@
                         <var name="nr" array_group="number" units="nb kg^{-1}"
                              description="Rain number concentration"
                              packages="mp_thompson_in;mp_thompson_aers_in"/>
-                             packages="mp_thompson_in"/>
 
                         <var name="tke" array_group="turbulence" units="m^2 s^{-2}"
                              description="Turbulent kinetic energy for the prognostic tke LES scheme"/>
 
-                </var_array>
-
                         <var name="nc" array_group="number" units="nb kg^{-1}"
                              description="Cloud water number concentration"
                              packages="mp_thompson_aers_in"/>
@@ -2080,7 +2077,6 @@
                         <var name="tend_nwfa" name_in_code="nwfa" array_group="number" units="nb m^{-3} s^{-1}"
                              description="Tendency of water-friendly aerosol number concentration multiplied by dry air density divided by d(zeta)/dz"
                              packages="mp_thompson_aers_in"/>
-                             packages="mp_thompson_in"/>
 
                         <var name="tend_tke" name_in_code="tke" array_group="turbulence" units="kg m^{-3} m^2 s^{-2} s^{-1}" 
                              description="Tendency of tke  multiplied by dry air density divided by d(zeta)/dz"/>
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 48dce63ab7..b687fdc0a3 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -16,14 +16,13 @@
 
 module atm_time_integration
 
-   use mpas_derived_types
    use mpas_pool_routines
    use mpas_kind_types
    use mpas_constants
    use mpas_dmpar
    use mpas_vector_reconstruction
    ! Added only clause to keep xlf90 from getting confused from the overloaded abs intrinsic in mpas_timekeeping
-   use mpas_derived_types, only : MPAS_Time_type, MPAS_TimeInterval_type, MPAS_NOW
+   use mpas_derived_types, only : MPAS_NOW
    use mpas_timekeeping, only: MPAS_Time_type, MPAS_TimeInterval_type, mpas_get_clock_time, &
                                mpas_set_time, mpas_set_timeInterval, mpas_get_time, operator(+), add_t_ti
    use mpas_timer
@@ -1149,14 +1148,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             end if  
 
             call mpas_timer_start('atm_compute_dyn_tend')
-            block => domain % blocklist
-            do while (associated(block))
-               call mpas_pool_get_subpool(block % structs, 'state', state)
-               call mpas_pool_get_subpool(block % structs, 'diag', diag)
-               call mpas_pool_get_subpool(block % structs, 'mesh', mesh)
-               call mpas_pool_get_subpool(block % structs, 'tend', tend)
-               call mpas_pool_get_subpool(block % structs, 'diag_physics', diag_physics)
-               call mpas_pool_get_subpool(block % structs, 'tend_physics', tend_physics)
    
             allocate(delsq_theta(nVertLevels,nCells+1))
             allocate(delsq_w(nVertLevels,nCells+1))
@@ -4848,7 +4839,6 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       logical, pointer :: config_rayleigh_damp_u
       real (kind=RKIND), pointer :: config_rayleigh_damp_u_timescale_days
       integer, pointer :: config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels
-      integer, pointer :: config_number_rayleigh_damp_u_levels
       integer, pointer :: index_qv, index_qc, index_tke
 
       logical :: inactive_rthdynten
@@ -5020,7 +5010,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
          config_mpas_cam_coef, &
          config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, & 
          config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, &
-         tend_rtheta_adv, rthdynten, ustm, hfx, qfx, &
+         rthdynten, ustm, hfx, qfx, &
          cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
          cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
 
@@ -5048,7 +5038,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       config_mpas_cam_coef, &
       config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, &
       config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, &
-      tend_rtheta_adv, rthdynten, ustm, hfx, qfx, &
+      rthdynten, ustm, hfx, qfx, &
       cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
       cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
 

From a28454222ffd25e4a60d2d542917c2ce58c5a240 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Thu, 2 Jan 2025 14:20:53 -0700
Subject: [PATCH 077/146] fix small error in tke shear production and add
 surface heat flux

---
 src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 39ab3c9ad4..7933a777bc 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -17,9 +17,9 @@ module mpas_atm_dissipation_models
   logical, parameter :: debug_dissipation = .false.
   logical, parameter :: les_test = .true., les_sas_test = .false.
   !! real (kind=RKIND), parameter :: tke_heat_flux = 0.03  ! shear case from Moeng et al., first hour
-  real (kind=RKIND), parameter :: tke_heat_flux = 0.0
+  real (kind=RKIND), parameter :: tke_heat_flux = 0.03
   !! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013 ! ocean roughness length
-  real (kind=RKIND), parameter :: tke_drag_coefficient = 0.00935
+  real (kind=RKIND), parameter :: tke_drag_coefficient = 0.006
   real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
   real (kind=RKIND), parameter :: c_k = 0.1
 
@@ -280,7 +280,7 @@ subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,
             ! terms for the prognostic tke integration
 
             shear_production = eddy_visc_h*(d_11(k)**2 + d_22(k)**2 + d_12(k)**2) &
-                              +eddy_visc_v*(d_33(k)**2 + d_13(k)**2 + d_13(k)**2)
+                              +eddy_visc_v*(d_33(k)**2 + d_13(k)**2 + d_23(k)**2)
 
             buoyancy = -eddy_visc_v*bv_freq2(k,iCell)
 

From c3f694c42250a4a9a4e116f139488c4968a7b65d Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Wed, 19 Mar 2025 11:53:12 -0600
Subject: [PATCH 078/146] remove specialized physics code to leave dynonly les

---
 src/core_atmosphere/mpas_atm_core.F           |  2 +-
 .../physics/mpas_atmphys_driver.F             |  7 ++-
 .../physics/mpas_atmphys_driver_pbl.F         |  5 --
 .../physics/mpas_atmphys_driver_sfclayer.F    | 54 +------------------
 .../physics/physics_wrf/module_sf_sfclay.F    | 43 ++-------------
 5 files changed, 9 insertions(+), 102 deletions(-)

diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F
index ce9875cc89..f7d04a1f0c 100644
--- a/src/core_atmosphere/mpas_atm_core.F
+++ b/src/core_atmosphere/mpas_atm_core.F
@@ -1015,7 +1015,7 @@ subroutine atm_do_timestep(domain, dt, itimestep)
       !proceed with physics if moist_physics is set to true:
       if(moist_physics) then
          call physics_timetracker(domain,dt,clock,itimestep,xtime_s)
-         call physics_driver(domain,clock,itimestep,xtime_s)
+         call physics_driver(domain,itimestep,xtime_s)
       endif
 #endif
 
diff --git a/src/core_atmosphere/physics/mpas_atmphys_driver.F b/src/core_atmosphere/physics/mpas_atmphys_driver.F
index 5d9d5d092c..8e31672657 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_driver.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_driver.F
@@ -108,13 +108,12 @@ module mpas_atmphys_driver
 
 
 !=================================================================================================================
- subroutine physics_driver(domain,clock,itimestep,xtime_s)
+ subroutine physics_driver(domain,itimestep,xtime_s)
 !=================================================================================================================
 
 !input arguments:
  integer,intent(in):: itimestep
  real(kind=RKIND),intent(in):: xtime_s
- type(MPAS_Clock_type),intent(in):: clock
 
 !inout arguments:
  type(domain_type),intent(inout):: domain
@@ -270,8 +269,8 @@ subroutine physics_driver(domain,clock,itimestep,xtime_s)
        call allocate_sfclayer(block%configs)
 !$OMP PARALLEL DO
        do thread=1,nThreads
-          call driver_sfclayer(clock,itimestep,block%configs,mesh,diag_physics,sfc_input, &
-                               cellSolveThreadStart(thread), cellSolveThreadEnd(thread))
+          call driver_sfclayer(itimestep,block%configs,mesh,diag_physics,sfc_input, &
+                               cellSolveThreadStart(thread),cellSolveThreadEnd(thread))
        end do
 !$OMP END PARALLEL DO
        call deallocate_sfclayer(block%configs)
diff --git a/src/core_atmosphere/physics/mpas_atmphys_driver_pbl.F b/src/core_atmosphere/physics/mpas_atmphys_driver_pbl.F
index 43837ee27c..72a411aeba 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_driver_pbl.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_driver_pbl.F
@@ -775,7 +775,6 @@ subroutine driver_pbl(itimestep,configs,mesh,sfc_input,diag_physics,tend_physics
                    bl_mynn_tkeadvect
 
  character(len=StrKIND),pointer:: pbl_scheme
- character(len=StrKIND),pointer:: config_les_model
 
  integer,pointer:: bl_mynn_cloudpdf,    &
                    bl_mynn_mixlength,   &
@@ -814,7 +813,6 @@ subroutine driver_pbl(itimestep,configs,mesh,sfc_input,diag_physics,tend_physics
  call mpas_pool_get_config(configs,'config_do_DAcycling',config_do_DAcycling)
  call mpas_pool_get_config(configs,'config_do_restart'  ,config_do_restart  )
  call mpas_pool_get_config(configs,'config_pbl_scheme'  ,pbl_scheme         )
- call mpas_pool_get_config(configs, 'config_les_model', config_les_model)
 
 !copy MPAS arrays to local arrays:
  call pbl_from_MPAS(configs,mesh,sfc_input,diag_physics,tend_physics,its,ite)
@@ -822,7 +820,6 @@ subroutine driver_pbl(itimestep,configs,mesh,sfc_input,diag_physics,tend_physics
  initflag = 1
  if(config_do_restart .or. itimestep > 1) initflag = 0
 
- if(config_les_model == "none") then
  pbl_select: select case (trim(pbl_scheme))
 
     case("bl_ysu")
@@ -967,8 +964,6 @@ subroutine driver_pbl(itimestep,configs,mesh,sfc_input,diag_physics,tend_physics
     case default
 
  end select pbl_select
- 
- endif ! les skip pbl
 
 !copy local arrays to MPAS grid:
  call pbl_to_MPAS(configs,diag_physics,tend_physics,its,ite)
diff --git a/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F b/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
index 791d933405..afde4fa523 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_driver_sfclayer.F
@@ -8,11 +8,7 @@
 !=================================================================================================================
  module mpas_atmphys_driver_sfclayer
  use mpas_kind_types
- use mpas_derived_types
  use mpas_pool_routines
-! use mpas_timekeeping, only: MPAS_Time_type, MPAS_TimeInterval_type, mpas_get_clock_time, &
-!                               mpas_set_time, mpas_set_timeInterval, mpas_get_time, operator(+), add_t_ti
- use mpas_timekeeping, only: MPAS_Time_type, mpas_get_clock_time, mpas_get_time
  use mpas_timer, only : mpas_timer_start, mpas_timer_stop
 
  use mpas_atmphys_constants
@@ -36,7 +32,6 @@ module mpas_atmphys_driver_sfclayer
  integer,parameter,private:: iz0tlnd  = 0        !=0,(Carlson-Boland).
  integer,parameter,private:: scm_force_flux = 0  !SCM surface forcing by surface fluxes.
                                                  !0=no 1=yes (WRF single column model option only).
- type (MPAS_Clock_type), pointer, private :: clock
 
 !MPAS driver for parameterization of the surface layer.
 !Laura D. Fowler (send comments to laura@ucar.edu).
@@ -841,12 +836,10 @@ subroutine init_sfclayer(configs)
  end subroutine init_sfclayer
 
 !=================================================================================================================
- subroutine driver_sfclayer(clock,itimestep,configs,mesh,diag_physics,sfc_input,its,ite)
+ subroutine driver_sfclayer(itimestep,configs,mesh,diag_physics,sfc_input,its,ite)
 !=================================================================================================================
 
 !input and inout arguments:
- type(MPAS_Clock_type),intent(in):: clock
-
  type(mpas_pool_type),intent(in):: mesh
  type(mpas_pool_type),intent(in):: configs
  type(mpas_pool_type),intent(in):: sfc_input
@@ -861,7 +854,6 @@ subroutine driver_sfclayer(clock,itimestep,configs,mesh,diag_physics,sfc_input,i
  logical,pointer:: config_do_restart,config_frac_seaice
  character(len=StrKIND),pointer:: sfclayer_scheme
  real(kind=RKIND),dimension(:),pointer:: areaCell
- real(kind=RKIND),pointer:: config_dt
 
 !local variables:
  integer:: initflag
@@ -871,13 +863,6 @@ subroutine driver_sfclayer(clock,itimestep,configs,mesh,diag_physics,sfc_input,i
  character(len=StrKIND):: errmsg
  integer:: errflg
 
- type (MPAS_Time_Type) :: currTime
- integer :: H, M, S, S_n, S_d
- integer :: ierr
- real(kind=RKIND) :: time_of_day_seconds
- real(kind=RKIND) :: heat_flux, moisture_flux
- logical :: specified_flux
-
 !-----------------------------------------------------------------------------------------------------------------
 !call mpas_log_write('')
 !call mpas_log_write('--- enter subroutine driver_sfclayer:')
@@ -889,7 +874,6 @@ subroutine driver_sfclayer(clock,itimestep,configs,mesh,diag_physics,sfc_input,i
  call mpas_pool_get_config(configs,'config_do_restart'     ,config_do_restart )
  call mpas_pool_get_config(configs,'config_frac_seaice'    ,config_frac_seaice)
  call mpas_pool_get_config(configs,'config_sfclayer_scheme',sfclayer_scheme   )
- call mpas_pool_get_config(configs,'config_dt',config_dt)
 
  call mpas_pool_get_array(mesh,'areaCell',areaCell)
 
@@ -905,14 +889,6 @@ subroutine driver_sfclayer(clock,itimestep,configs,mesh,diag_physics,sfc_input,i
 
     case("sf_monin_obukhov")
        call mpas_timer_start('sf_monin_obukhov')
-
-       currTime = mpas_get_clock_time(clock, MPAS_NOW, ierr)
-       call mpas_get_time(curr_time=currTime, H=H, M=M, S=S, S_n=S_n, S_d=S_d)
-       time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*config_dt
-       call mpas_log_write(' sfclay, time of day in seconds, $r ', realArgs=(/time_of_day_seconds/))
-       call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
-       specified_flux = .true.
-
        call sfclay( &
                    p3d      = pres_hyd_p , psfc     = psfc_p     , t3d      = t_p        , &
                    u3d      = u_p        , v3d      = v_p        , qv3d     = qv_p       , &
@@ -936,8 +912,6 @@ subroutine driver_sfclayer(clock,itimestep,configs,mesh,diag_physics,sfc_input,i
                    ustm     = ustm_p     , ck       = ck_p       , cka      = cka_p      , &
                    cd       = cd_p       , cda      = cda_p      , isftcflx = isftcflx   , &
                    iz0tlnd  = iz0tlnd    , scm_force_flux = scm_force_flux               , &
-                   heat_flux = heat_flux , moisture_flux = moisture_flux                 , &
-                   specified_flux = specified_flux                                       , &
                    ids = ids , ide = ide , jds = jds , jde = jde , kds = kds , kde = kde , &
                    ims = ims , ime = ime , jms = jms , jme = jme , kms = kms , kme = kme , &
                    its = its , ite = ite , jts = jts , jte = jte , kts = kts , kte = kte   &
@@ -967,8 +941,6 @@ subroutine driver_sfclayer(clock,itimestep,configs,mesh,diag_physics,sfc_input,i
                       ustm     = ustm_sea   , ck       = ck_sea     , cka      = cka_sea    , &
                       cd       = cd_sea     , cda      = cda_sea    , isftcflx = isftcflx   , &
                       iz0tlnd  = iz0tlnd    , scm_force_flux = scm_force_flux               , &
-                      heat_flux = heat_flux , moisture_flux = moisture_flux                 , &
-                      specified_flux = specified_flux                                       , &
                       ids = ids , ide = ide , jds = jds , jde = jde , kds = kds , kde = kde , &
                       ims = ims , ime = ime , jms = jms , jme = jme , kms = kms , kme = kme , &
                       its = its , ite = ite , jts = jts , jte = jte , kts = kts , kte = kte   &
@@ -1115,30 +1087,6 @@ subroutine driver_sfclayer(clock,itimestep,configs,mesh,diag_physics,sfc_input,i
 
  end subroutine driver_sfclayer
 
-      subroutine flux_les_sas(heat_flux, moisture_flux, time_of_day_seconds)
-
-      implicit none
-
-      real (kind=RKIND), intent(in) :: time_of_day_seconds
-      real (kind=RKIND), intent(out) :: heat_flux, moisture_flux
-
-      real (kind=RKIND), parameter:: t_start_t_flux = 3600.*6.0
-      real (kind=RKIND), parameter:: t_end_t_flux = 3600.*19.50
-      real (kind=RKIND), parameter:: t_start_q_flux = 3600.*7.0
-      real (kind=RKIND), parameter:: t_end_q_flux = 3600.*19.50
-      real (kind=RKIND) :: rel_time_t_flux, rel_time_q_flux
-
-      rel_time_t_flux = max(0.,(time_of_day_seconds - t_start_t_flux)/(t_end_t_flux - t_start_t_flux))
-      rel_time_q_flux = max(0.,(time_of_day_seconds - t_start_q_flux)/(t_end_q_flux - t_start_q_flux))
-
-      heat_flux = max(0., 0.1*sin(pii*rel_time_t_flux))
-      moisture_flux = max(0., 0.15*sin(pii*rel_time_q_flux))/1000.
-
-      end subroutine flux_les_sas
-
-
-
-
 !=================================================================================================================
  end module mpas_atmphys_driver_sfclayer
 !=================================================================================================================
diff --git a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
index f7399e4b40..2b3ba578f0 100644
--- a/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
+++ b/src/core_atmosphere/physics/physics_wrf/module_sf_sfclay.F
@@ -25,8 +25,7 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
                      ims,ime, jms,jme, kms,kme,                    &
                      its,ite, jts,jte, kts,kte,                    &
                      ustm,ck,cka,cd,cda,                           &
-                     isftcflx,iz0tlnd,scm_force_flux,              &
-                     heat_flux, moisture_flux, specified_flux      )
+                     isftcflx,iz0tlnd,scm_force_flux)
 !-------------------------------------------------------------------
       IMPLICIT NONE
 !-------------------------------------------------------------------
@@ -185,8 +184,6 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
 
       INTEGER,  OPTIONAL,  INTENT(IN )   ::     ISFTCFLX, IZ0TLND
       INTEGER,  OPTIONAL,  INTENT(IN )   ::     SCM_FORCE_FLUX
-      REAL,     INTENT(IN   )            ::     heat_flux, moisture_flux
-      LOGICAL,  INTENT(IN   )            ::     specified_flux
 
       REAL,     DIMENSION( ims:ime, jms:jme )                    , &
                 INTENT(INOUT  )             ::               QSFC
@@ -211,7 +208,6 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
       REAL,     DIMENSION( its:ite ) ::                      DX2D
 
       INTEGER ::  I,J
-      REAL ::  RHO
 
       DO J=jts,jte
 
@@ -231,14 +227,6 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
            T1D(i) =T3D(i,1,j)
         ENDDO
 
-        IF(specified_flux)THEN
-           DO i=its,ite
-              RHO = P1D(i)/R/(T1D(i)*(1.+EP1*QV1D(i)))
-              HFX(i,j)=heat_flux*RHO*CP
-              QFX(i,j)=moisture_flux*RHO
-           ENDDO
-        ENDIF
-
         !  Sending array starting locations of optional variables may cause
         !  troubles, so we explicitly change the call.
 
@@ -255,7 +243,6 @@ SUBROUTINE SFCLAY(U3D,V3D,T3D,QV3D,P3D,dz8w,                    &
                 GZ1OZ0(ims,j),WSPD(ims,j),BR(ims,j),ISFFLX,DX2D,   &
                 SVP1,SVP2,SVP3,SVPT0,EP1,EP2,KARMAN,EOMEG,STBOLT,  &
                 P1000mb,                                           &
-                specified_flux,                                    &
                 ids,ide, jds,jde, kds,kde,                         &
                 ims,ime, jms,jme, kms,kme,                         &
                 its,ite, jts,jte, kts,kte                          &
@@ -281,7 +268,6 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
                      SVP1,SVP2,SVP3,SVPT0,EP1,EP2,                 &
                      KARMAN,EOMEG,STBOLT,                          &
                      P1000mb,                                      &
-                     specified_flux,                               &
                      ids,ide, jds,jde, kds,kde,                    &
                      ims,ime, jms,jme, kms,kme,                    &
                      its,ite, jts,jte, kts,kte,                    &
@@ -303,7 +289,6 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
       REAL,     INTENT(IN )   ::        EP1,EP2,KARMAN,EOMEG,STBOLT
       REAL,     INTENT(IN )   ::        P1000mb
 
-      LOGICAL,  INTENT(IN   ) ::        specified_flux
 !
       REAL,     DIMENSION( ims:ime )                             , &
                 INTENT(IN   )               ::             MAVAIL, &
@@ -400,7 +385,6 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
       REAL    ::  FLUXC,VSGD,Z0Q,VISC,RESTAR,CZIL,GZ0OZQ,GZ0OZT
       REAL    ::  ZW, ZN1, ZN2
       REAL    ::  Z0T, CZC
-      REAL    ::  USTI
 !-------------------------------------------------------------------
       KL=kte
 
@@ -412,16 +396,10 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
 !----CONVERT GROUND TEMPERATURE TO POTENTIAL TEMPERATURE:  
 !                                                            
       DO 5 I=its,ite                                   
-        IF(.not.specified_flux)THEN
-          TGDSA(I)=TSK(I)                                    
+        TGDSA(I)=TSK(I)                                    
 ! PSFC cb
-!          THGB(I)=TSK(I)*(100./PSFC(I))**ROVCP                
-          THGB(I)=TSK(I)*(P1000mb/PSFCPA(I))**ROVCP   
-        ELSE
-! with specified_flux TSK value is not input and given value will not affect USTM
-          TGDSA(I)=300.
-          THGB(I)=300.*(P1000mb/PSFCPA(I))**ROVCP   
-        ENDIF
+!        THGB(I)=TSK(I)*(100./PSFC(I))**ROVCP                
+        THGB(I)=TSK(I)*(P1000mb/PSFCPA(I))**ROVCP   
     5 CONTINUE                                               
 !                                                            
 !-----DECOUPLE FLUX-FORM VARIABLES TO GIVE U,V,T,THETA,THETA-VIR.,
@@ -553,15 +531,6 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
 
   260 CONTINUE                                                                   
 
-      IF(specified_flux)THEN
-!     Need to recalculate MOL and BR when HFX is given and TSK is not used
-        DO I=ITS,ITE
-!       MOL is THETA*
-          USTI = MAX(UST(I),0.01)
-          MOL(I) = - HFX(I)/(RHOX(I)*CP*USTI)
-          BR(I) = KARMAN*GOVRTH(I)*ZA(I)*MOL(I)/(USTI*USTI)/GZ1OZ0(I)
-        ENDDO
-      ENDIF
 !                                                                                
 !-----DIAGNOSE BASIC PARAMETERS FOR THE APPROPRIATED STABILITY CLASS:            
 !                                                                                
@@ -827,8 +796,6 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
         WSPDI(I)=SQRT(UX(I)*UX(I)+VX(I)*VX(I))
         IF ( PRESENT(USTM) ) THEN
         USTM(I)=0.5*USTM(I)+0.5*KARMAN*WSPDI(I)/PSIX
-! For specified_flux VCONV effect not added to UST that will be used for ZOL
-        IF(specified_flux)UST(I)=USTM(I)
         ENDIF
         U10(I)=UX(I)*PSIX10/PSIX                                    
         V10(I)=VX(I)*PSIX10/PSIX                                   
@@ -854,8 +821,6 @@ SUBROUTINE SFCLAY1D(J,UX,VX,T1D,QV1D,P1D,dz8w1d,                &
   330 CONTINUE                                                                   
 !                                                                                
   335 CONTINUE                                                                   
-     
-      IF(specified_flux) GOTO 410
                                                                                   
 !-----COMPUTE THE SURFACE SENSIBLE AND LATENT HEAT FLUXES:                       
       IF ( PRESENT(SCM_FORCE_FLUX) ) THEN

From d916283e4b28d6ba6ed9a3892775e5d767e488e6 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Wed, 21 May 2025 10:14:59 -0600
Subject: [PATCH 079/146] mods to scale les-supercell case and remove surface
 fluxes from les_test option

---
 src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F | 6 ++++--
 src/core_init_atmosphere/mpas_init_atm_cases.F             | 7 +++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 7933a777bc..c07df5669a 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -17,9 +17,11 @@ module mpas_atm_dissipation_models
   logical, parameter :: debug_dissipation = .false.
   logical, parameter :: les_test = .true., les_sas_test = .false.
   !! real (kind=RKIND), parameter :: tke_heat_flux = 0.03  ! shear case from Moeng et al., first hour
-  real (kind=RKIND), parameter :: tke_heat_flux = 0.03
+  ! real (kind=RKIND), parameter :: tke_heat_flux = 0.03
+  real (kind=RKIND), parameter :: tke_heat_flux = 0.0
   !! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013 ! ocean roughness length
-  real (kind=RKIND), parameter :: tke_drag_coefficient = 0.006
+  ! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.006
+  real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0
   real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
   real (kind=RKIND), parameter :: c_k = 0.1
 
diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index cdfb9324f3..f1b747eb78 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -1459,6 +1459,7 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
       real (kind=RKIND), pointer :: nominalMinDc
       logical, pointer :: on_a_sphere
       real (kind=RKIND), pointer :: sphere_radius
+      real (kind=RKIND), pointer :: config_ztop
 
       real (kind=RKIND), dimension(:,:), pointer :: t_init, w, rw, v, rho, theta
       real (kind=RKIND), dimension(:), pointer :: u_init, qv_init, angleEdge, fEdge, fVertex
@@ -1484,12 +1485,13 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
       call mpas_pool_get_config(mesh, 'on_a_sphere', on_a_sphere)
       call mpas_pool_get_config(mesh, 'sphere_radius', sphere_radius)
       call mpas_pool_get_config(configs, 'config_interface_projection', config_interface_projection)
+      call mpas_pool_get_config(configs, 'config_ztop', config_ztop)
 
       !
       ! Scale all distances
       !
 
-      a_scale = 1.0
+      a_scale = 11.25
 
       xCell(:) = xCell(:) * a_scale
       yCell(:) = yCell(:) * a_scale
@@ -1593,7 +1595,8 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
       !     metrics for hybrid coordinate and vertical stretching
 
       str = 1.0
-      zt = 20000.
+!      zt = 20000.
+      zt = config_ztop
       dz = zt/float(nz1)
 
 !      write(0,*) ' dz = ',dz

From 97e73ed9fd85567f807bc55b85961b17bff584b1 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Fri, 23 May 2025 15:15:45 -0600
Subject: [PATCH 080/146] add seed tke = 0.1 at all points

---
 src/core_init_atmosphere/mpas_init_atm_cases.F | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index f1b747eb78..0f2d92d42c 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -1433,6 +1433,7 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
       integer :: iCell, iCell1, iCell2 , iEdge, ivtx, i, k, nz, nz1, itr, cell1, cell2
       integer, pointer :: nEdges, nVertices, maxEdges, nCellsSolve
       integer, pointer :: index_qv
+      integer, pointer :: index_tke
 
       real (kind=RKIND), dimension(nVertLevels + 1 ) :: zc, zw, ah
       real (kind=RKIND), dimension(nVertLevels ) :: zu, dzw, rdzwp, rdzwm
@@ -1568,6 +1569,7 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
       call mpas_pool_get_array(state, 'scalars', scalars)
 
       call mpas_pool_get_dimension(state, 'index_qv', index_qv)
+      call mpas_pool_get_dimension(state, 'index_tke', index_tke)
 
       scalars(:,:,:) = 0.
 
@@ -1883,6 +1885,18 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
            t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i))
         end do
       end do
+!
+!  initial seed for tke
+!
+!     call mpas_log_write('index_qv $i ', intArgs=(/index_qv/))
+!     call mpas_log_write('index_tke $i ', intArgs=(/index_tke/))
+      scalars(index_tke,:,:) = 0.
+
+      do k = 1,nz1  
+          do i=1,nCells
+             scalars(index_tke,k,i) = 0.1
+          end do
+      end do
 
       do itr=1,30
 

From ac2f96cf9fe415dc50e748c0e8e26f7debfa9929 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Tue, 12 Aug 2025 09:27:31 -0600
Subject: [PATCH 081/146] fix scaling for x_period and y_period for cases

---
 src/core_init_atmosphere/mpas_init_atm_cases.F | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index 0f2d92d42c..48cff2c52e 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -1429,7 +1429,7 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
       integer, dimension(:), pointer :: nEdgesOnEdge 
       integer, dimension(:,:), pointer :: edgesOnEdge, cellsOnEdge
       real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge
-
+      real (kind=RKIND), pointer :: x_period, y_period
       integer :: iCell, iCell1, iCell2 , iEdge, ivtx, i, k, nz, nz1, itr, cell1, cell2
       integer, pointer :: nEdges, nVertices, maxEdges, nCellsSolve
       integer, pointer :: index_qv
@@ -1482,7 +1482,9 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
       call mpas_pool_get_array(mesh, 'areaTriangle', areaTriangle)
       call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex)
       call mpas_pool_get_array(mesh, 'nominalMinDc', nominalMinDc)
-
+     
+      call mpas_pool_get_config(mesh, 'x_period', x_period)
+      call mpas_pool_get_config(mesh, 'y_period', y_period)
       call mpas_pool_get_config(mesh, 'on_a_sphere', on_a_sphere)
       call mpas_pool_get_config(mesh, 'sphere_radius', sphere_radius)
       call mpas_pool_get_config(configs, 'config_interface_projection', config_interface_projection)
@@ -1509,6 +1511,8 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
       areaTriangle(:) = areaTriangle(:) * a_scale**2.0
       kiteAreasOnVertex(:,:) = kiteAreasOnVertex(:,:) * a_scale**2.0
       nominalMinDc = nominalMinDc * a_scale
+      x_period = x_period * a_scale
+      y_period = y_period * a_scale
 
       call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
       call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge)
@@ -2051,6 +2055,7 @@ subroutine init_atm_case_mtn_wave(dminfo, mesh, nCells, nVertLevels, state, diag
       integer, dimension(:), pointer :: nEdgesOnEdge, nEdgesOnCell
       integer, dimension(:,:), pointer :: edgesOnEdge, cellsOnEdge, cellsOnCell
       real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge
+      real (kind=RKIND), pointer :: x_period, y_period
 
       integer :: iCell, iCell1, iCell2 , iEdge, ivtx, i, k, nz, itr, cell1, cell2, nz1
       integer, pointer :: nEdges, maxEdges, nCellsSolve, nVertices
@@ -2109,6 +2114,8 @@ subroutine init_atm_case_mtn_wave(dminfo, mesh, nCells, nVertLevels, state, diag
       call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex)
       call mpas_pool_get_array(mesh, 'nominalMinDc', nominalMinDc)
 
+      call mpas_pool_get_config(mesh, 'x_period', x_period)
+      call mpas_pool_get_config(mesh, 'y_period', y_period)
       call mpas_pool_get_config(mesh, 'on_a_sphere', on_a_sphere)
       call mpas_pool_get_config(mesh, 'sphere_radius', sphere_radius)
 
@@ -2150,6 +2157,8 @@ subroutine init_atm_case_mtn_wave(dminfo, mesh, nCells, nVertLevels, state, diag
       areaTriangle(:) = areaTriangle(:) * a_scale**2.0
       kiteAreasOnVertex(:,:) = kiteAreasOnVertex(:,:) * a_scale**2.0
       nominalMinDc = nominalMinDc * a_scale
+      x_period = x_period * a_scale
+      y_period = y_period * a_scale
 
       
       call mpas_pool_get_dimension(mesh, 'nEdges', nEdges)
@@ -6256,6 +6265,7 @@ subroutine init_atm_case_les(dminfo, mesh, fg, nCells, nVertLevels, state, diag,
       integer, dimension(:), pointer :: nEdgesOnEdge 
       integer, dimension(:,:), pointer :: edgesOnEdge, cellsOnEdge
       real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge
+      real (kind=RKIND), pointer :: x_period, y_period
 
       integer :: iCell, iCell1, iCell2 , iEdge, vtx1, vtx2, ivtx, i, k, nz, nz1, itr, cell1, cell2
       integer, pointer :: nEdges, nVertices, maxEdges, nCellsSolve
@@ -6310,6 +6320,8 @@ subroutine init_atm_case_les(dminfo, mesh, fg, nCells, nVertLevels, state, diag,
       call mpas_pool_get_array(mesh, 'areaTriangle', areaTriangle)
       call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex)
 
+      call mpas_pool_get_config(mesh, 'x_period', x_period)
+      call mpas_pool_get_config(mesh, 'y_period', y_period)
       call mpas_pool_get_config(mesh, 'on_a_sphere', on_a_sphere)
       call mpas_pool_get_config(mesh, 'sphere_radius', sphere_radius)
       call mpas_pool_get_config(configs, 'config_ztop', config_ztop)
@@ -6334,6 +6346,8 @@ subroutine init_atm_case_les(dminfo, mesh, fg, nCells, nVertLevels, state, diag,
       areaCell(:) = areaCell(:) * a_scale**2.0
       areaTriangle(:) = areaTriangle(:) * a_scale**2.0
       kiteAreasOnVertex(:,:) = kiteAreasOnVertex(:,:) * a_scale**2.0
+      x_period = x_period * a_scale
+      y_period = y_period * a_scale
 
       call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
       call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge)

From 250db000bd9be8d1c022b6e42fc0e0c59a8da28f Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Wed, 27 Aug 2025 10:57:45 -0600
Subject: [PATCH 082/146] revert supercell a_scale to 1.0

---
 src/core_init_atmosphere/mpas_init_atm_cases.F | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index 48cff2c52e..9f936dcf93 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -1494,7 +1494,7 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
       ! Scale all distances
       !
 
-      a_scale = 11.25
+      a_scale = 1.0
 
       xCell(:) = xCell(:) * a_scale
       yCell(:) = yCell(:) * a_scale

From e446ec8f17d242275070c74645aca1d7cc2a5a66 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Mon, 29 Sep 2025 14:55:36 -0600
Subject: [PATCH 083/146] add config_les_surface to registry and routines (not
 used yet)

---
 src/core_atmosphere/Registry.xml                 |  7 ++++++-
 .../dynamics/mpas_atm_dissipation_models.F       | 16 ++++++++++++----
 .../dynamics/mpas_atm_time_integration.F         | 15 +++++++++------
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 2f159f3958..5c0950aa10 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -149,7 +149,12 @@
                 <nml_option name="config_les_model" type="character" default_value="none"
                      units="-"
                      description="LES dissipation model"
-                     possible_values="`none'' or `3d_smagorinsky'"/>
+                     possible_values="`none', `3d_smagorinsky', 'prognostic_1.5_order'"/>
+
+                <nml_option name="config_les_surface" type="character" default_value="none"
+                     units="-"
+                     description="LES surface flux option"
+                     possible_values="`free', 'fixed', `varying'"/>
 
                 <nml_option name="config_len_disp" type="real" default_value="0.0" in_defaults="false"
                      units="m"
diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index c07df5669a..a5cbae33ea 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -97,7 +97,7 @@ end subroutine smagorinsky_2d
 
 !---------------------------------------
 
-      subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,                  &
+      subroutine les_models( config_les_model, config_les_surface, eddy_visc_horz, eddy_visc_vert,                  &
                              u, v, uCell, vCell,                                                &
                              w, c_s, bv_freq2, zgrid, config_len_disp,                          &
                              deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
@@ -110,6 +110,7 @@ subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,
       implicit none
       
       character (len=StrKIND), intent(in) :: config_les_model
+      character (len=StrKIND), intent(in) :: config_les_surface
 
       integer, intent(in) :: cellStart, cellEnd, nCells, nEdges, nVertLevels, maxEdges, index_tke, num_scalars
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
@@ -155,6 +156,7 @@ subroutine les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,
 
       if(debug_dissipation)  call mpas_log_write(' begin les_models ')
       if(debug_dissipation)  call mpas_log_write(' les scheme is '//trim(config_les_model))
+      if(debug_dissipation)  call mpas_log_write(' les surface scheme is '//trim(config_les_surface))
 
       pr_inv = 1./prandtl
 
@@ -633,7 +635,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                                    config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
                                    config_del4u_div_factor, zgrid,                                              &
                                    eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
-                                   fzm, fzp, config_les_model,                                                  &
+                                   fzm, fzp, config_les_model, config_les_surface,                              &
                                    delsq_u, delsq_vorticity, delsq_divergence,                                  &
                                    u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, tend_u_euler )
 
@@ -648,6 +650,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       logical, intent(in) :: config_mix_full
 
       character (len=StrKIND) :: config_les_model
+      character (len=StrKIND) :: config_les_surface
 
       integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
       integer, dimension(2,nEdges+1), intent(in) :: verticesOnEdge
@@ -714,6 +717,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
         if(debug_dissipation) then
             call mpas_log_write(' begin u_dissipation_3d ')
             call mpas_log_write(' les model is '//trim(config_les_model))
+            call mpas_log_write(' les surface is '//trim(config_les_surface))
             call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_mom_eddy_visc4/))
             call mpas_log_write(' 4th order divergence factor is $r ', realArgs=(/config_del4u_div_factor/))
         end if
@@ -1060,7 +1064,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
                                    delsq_w,                                                  &
                                    w, rho_edge, rho_zz, zz,                                  &
                                    eddy_visc_horz, eddy_visc_vert,                           &
-                                   config_les_model,                                         &
+                                   config_les_model, config_les_surface,                     &
                                    tend_w_euler                                             )
 
 
@@ -1081,6 +1085,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
       integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
 
       character (len=StrKIND) :: config_les_model
+      character (len=StrKIND) :: config_les_surface
 
       real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
       real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
@@ -1122,6 +1127,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
         if(debug_dissipation) then
             call mpas_log_write(' begin w_dissipation_3d ')
             call mpas_log_write(' les model is '//trim(config_les_model))
+            call mpas_log_write(' les surface is '//trim(config_les_surface))
             call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_mom_eddy_visc4/))
         end if
 
@@ -1587,7 +1593,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                             eddy_visc_horz, eddy_visc_vert,                           &
                                             bv_freq2, config_len_disp, scalars, tend_scalars,         &
                                             index_tke, index_qv, num_scalars_dummy, mix_scalars,      &
-                                            config_les_model, time_of_day_seconds,                    &
+                                            config_les_model, config_les_surface, time_of_day_seconds,&
                                             uReconstructZonal, uReconstructMeridional,                &
                                             hfx, qfx,                                                 &
                                             tend_theta_euler                                         )
@@ -1610,6 +1616,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       logical, intent(in) :: config_mix_full, mix_scalars
 
       character (len=StrKIND) :: config_les_model
+      character (len=StrKIND) :: config_les_surface
 
       integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
       integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
@@ -1671,6 +1678,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
         if(debug_dissipation) then
             call mpas_log_write(' begin scalar_dissipation_3d ')
             call mpas_log_write(' les model is '//trim(config_les_model))
+            call mpas_log_write(' les surface is '//trim(config_les_surface))
             call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_theta_eddy_visc4/))
         end if
 
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index b687fdc0a3..dde697a3e1 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4827,6 +4827,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       logical, pointer :: config_mix_full
       character (len=StrKIND), pointer :: config_horiz_mixing
       character (len=StrKIND), pointer :: config_les_model
+      character (len=StrKIND), pointer :: config_les_surface
       real (kind=RKIND), pointer :: config_del4u_div_factor
       real (kind=RKIND), pointer :: config_h_theta_eddy_visc4
       real (kind=RKIND), pointer :: config_h_mom_eddy_visc4
@@ -4849,6 +4850,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       call mpas_pool_get_config(configs, 'config_mix_full', config_mix_full)
       call mpas_pool_get_config(configs, 'config_horiz_mixing', config_horiz_mixing)
       call mpas_pool_get_config(configs, 'config_les_model', config_les_model)
+      call mpas_pool_get_config(configs, 'config_les_surface', config_les_surface)
       call mpas_pool_get_config(configs, 'config_del4u_div_factor', config_del4u_div_factor)
       call mpas_pool_get_config(configs, 'config_h_theta_eddy_visc4', config_h_theta_eddy_visc4)
       call mpas_pool_get_config(configs, 'config_h_mom_eddy_visc4', config_h_mom_eddy_visc4)
@@ -5004,7 +5006,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
          rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
          deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
          tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_les_model, &
-         prandtl_3d_inv, config_del4u_div_factor, &
+         config_les_surface, prandtl_3d_inv, config_del4u_div_factor, &
          config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
          config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, &
          config_mpas_cam_coef, &
@@ -5032,7 +5034,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
       deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
       tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_les_model, &
-      prandtl_3d_inv, config_del4u_div_factor, &
+      config_les_surface, prandtl_3d_inv, config_del4u_div_factor, &
       config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
       config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, &
       config_mpas_cam_coef, &
@@ -5156,6 +5158,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       logical :: config_mix_full
       character (len=StrKIND) :: config_horiz_mixing
       character (len=StrKIND) :: config_les_model
+      character (len=StrKIND) :: config_les_surface
       real (kind=RKIND) :: config_del4u_div_factor
       real (kind=RKIND) :: config_h_theta_eddy_visc4
       real (kind=RKIND) :: config_h_mom_eddy_visc4
@@ -5318,7 +5321,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                  time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*dt
                  ! call mpas_log_write(' les integration, timestep midpoint time of day in seconds, $r ', realArgs=(/time_of_day_seconds/))
       
-                 call les_models( config_les_model, eddy_visc_horz, eddy_visc_vert,                  &
+                 call les_models( config_les_model, config_les_surface, eddy_visc_horz, eddy_visc_vert,                  &
                                   u, v, ur_cell, vr_cell,                                            &
                                   w, c_s, bn2, zgrid, config_len_disp,                               &
                                   deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
@@ -5537,7 +5540,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                     config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
                                     config_del4u_div_factor, zgrid,                                              &
                                     eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
-                                    fzm, fzp, config_les_model,                                                  &
+                                    fzm, fzp, config_les_model, config_les_surface,                              &
                                     delsq_u, delsq_vorticity, delsq_divergence,                                  &
                                     u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, tend_u_euler )
 
@@ -5694,7 +5697,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                     delsq_w,                                                  &
                                     w, rho_edge, rho_zz, zz,                                  &
                                     eddy_visc_horz, eddy_visc_vert,                           &
-                                    config_les_model,                                         &
+                                    config_les_model, config_les_surface,                     &
                                     tend_w_euler                                             )
 
 !          else
@@ -5885,7 +5888,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                             eddy_visc_horz, eddy_visc_vert,                           &
                                             bn2, config_len_disp, scalars, tend_scalars,              &
                                             index_tke, index_qv, num_scalars, mix_scalars,            &
-                                            config_les_model, time_of_day_seconds,                    &
+                                            config_les_model, config_les_surface, time_of_day_seconds,&
                                             ur_cell, vr_cell,                                         &
                                             hfx, qfx,                                                 &
                                             tend_theta_euler                                         )

From 96574f2ed49c905cc05d972d098232a3aadcd7c6 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Wed, 8 Oct 2025 10:47:54 -0600
Subject: [PATCH 084/146] clean up and reduce duplication

---
 src/core_atmosphere/Registry.xml              |  2 +-
 .../dynamics/mpas_atm_dissipation_models.F    | 39 +++++++------------
 2 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 5c0950aa10..86cf2e749a 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -154,7 +154,7 @@
                 <nml_option name="config_les_surface" type="character" default_value="none"
                      units="-"
                      description="LES surface flux option"
-                     possible_values="`free', 'fixed', `varying'"/>
+                     possible_values="`free', 'specified', `varying'"/>
 
                 <nml_option name="config_len_disp" type="real" default_value="0.0" in_defaults="false"
                      units="m"
diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index a5cbae33ea..8515f6b7a7 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -898,13 +898,12 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                      turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(u(k,iEdge)-u(k-1,iEdge))
                   end do
 
-                if( les_test ) then
+                if( config_les_surface == "specified" ) then
                   velocity_magnitude = sqrt(u(1,iEdge)**2 + v(1,iEdge)**2)
                   turb_vflux(1) = -rho_edge(1,iEdge)*tke_drag_coefficient*u(1,iEdge)*velocity_magnitude
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
-                else if (les_sas_test) then
+                else if ( config_les_surface == "varying" ) then
                   ust_edge = 0.5*(ustm(cell1) + ustm(cell2))
-                  if(iEdge == 1) call mpas_log_write(' SAS ust_edge, $r ', realArgs=(/ust_edge/))
                   velocity_magnitude = max(sqrt(u(1,iEdge)**2 + v(1,iEdge)**2),0.1)
                   turb_vflux(1) = -rho_edge(1,iEdge)*ust_edge*ust_edge*(u(1,iEdge)/velocity_magnitude)
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
@@ -1879,35 +1878,23 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
                 ! test boundary conditions for supercell and les test cases
 
-                if( les_test ) then
-
-                  moisture_flux = 0.
-                  heat_flux = tke_heat_flux
-
-                  qv_cell = scalars(index_qv,1,iCell)
-                  theta_m_cell = theta_m(1,iCell)
-                  theta_cell = theta_m_cell/(1.0+(rv/rgas)*qv_cell)
-                  ! bulk formulation will go here.
+                if( config_les_surface == "specified" .or. config_les_surface == "varying" ) then
 
-!                 theta_m_flux = heat_flux*(1.0+(rv/rgas)*scalars(index_qv,1,iCell))  &
-!                                   +(rv/rgas)*theta_m(1,iCell)*moisture_flux/rho_zz(k,iCell)
-                  theta_m_flux = heat_flux*(1.0+(rv/rgas)*qv_cell)+(rv/rgas)*theta_cell*moisture_flux
-                  ! turb_vflux(1) = tke_heat_flux*rho_zz(1,iCell)  ! this is correct for DRY CASE ONLY
-                  turb_vflux(1) = theta_m_flux*rho_zz(1,iCell)
-                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+                  if( config_les_surface == "specified" ) then
+                    moisture_flux = 0.
+                    heat_flux = tke_heat_flux
+!                 place holder routine for time-varying specified                  
+!                 call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
 
-                else if (les_sas_test) then
+                  else if ( config_les_surface == "varying" ) then
+                    heat_flux = hfx(iCell)/rho_zz(1,iCell)/cp
+                    moisture_flux = qfx(iCell)/rho_zz(1,iCell)
+                  endif
 
-                  ! if(iCell == 1)  call mpas_log_write(' les_sas_test for theta and qv surface fluxes ')
                   qv_cell = scalars(index_qv,1,iCell)
                   theta_m_cell = theta_m(1,iCell)
                   theta_cell = theta_m_cell/(1.0+(rv/rgas)*qv_cell)
 
-!                 call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
-                  heat_flux = hfx(iCell)/rho_zz(1,iCell)/cp
-                  moisture_flux = qfx(iCell)/rho_zz(1,iCell)
-                  if(iCell == 1) call mpas_log_write(' SAS t and qv fluxes, $r, $r ', realArgs=(/heat_flux, moisture_flux/))
-
                   theta_m_flux = heat_flux*(1.0+(rv/rgas)*qv_cell)+(rv/rgas)*theta_cell*moisture_flux
                   turb_vflux(1) = theta_m_flux*rho_zz(1,iCell)
                   moisture_flux = moisture_flux*rho_zz(1,iCell) 
@@ -1941,7 +1928,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   end do
                 end do
 
-                if( les_test .or. les_sas_test ) turb_vflux_scalars(index_qv,1) = moisture_flux ! lower b.c. for qv
+                if( config_les_surface == "specified" .or.  config_les_surface == "varying" ) turb_vflux_scalars(index_qv,1) = moisture_flux ! lower b.c. for qv
 
                 do k=1,nVertLevels
                   do iScalar=1,num_scalars

From a311c6ff411d4a96e633c232fad74618def23355 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Wed, 8 Oct 2025 15:07:24 -0600
Subject: [PATCH 085/146] remove redundant dissipation routines

---
 .../dynamics/mpas_atm_dissipation_models.F    | 801 +-----------------
 1 file changed, 45 insertions(+), 756 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 8515f6b7a7..d2b6245e13 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -403,229 +403,6 @@ end subroutine calculate_n2
 
 !---------------------------------------
 
-      subroutine u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
-                                cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
-                                cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
-                                nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign,                          &
-                                invAreaCell, invAreaTriangle, invDvEdge, invDcEdge,                          &
-                                angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4,                 &
-                                config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
-                                config_del4u_div_factor, zgrid, kdiff,                                       &
-                                delsq_u, delsq_vorticity, delsq_divergence,                                  &
-                                u, divergence, vorticity, rho_edge, u_init, v_init, tend_u_euler            )
-
-      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here
-
-      implicit none
-
-      integer, intent(in) :: edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd
-      integer, intent(in) :: vertexStart, vertexEnd, vertexDegree
-      integer, intent(in) :: cellStart, cellEnd
-      integer, intent(in) :: nCells, nEdges, nVertices
-      logical, intent(in) :: config_mix_full
-
-      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
-      integer, dimension(2,nEdges+1), intent(in) :: verticesOnEdge
-      integer, dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell
-      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
-      integer, dimension(vertexDegree,nVertices+1), intent(in) :: edgesOnVertex
-
-      real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
-      real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
-      real (kind=RKIND), intent(in) :: config_del4u_div_factor
-
-      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
-      real (kind=RKIND), dimension(vertexDegree,nVertices+1), intent(in) :: edgesOnVertex_sign
-      real (kind=RKIND), dimension(nVertices+1), intent(in) :: invAreaTriangle
-      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDvEdge
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: angleEdge
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dcEdge
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
-      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
-
-      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: divergence
-      real (kind=RKIND), dimension(nVertLevels,nVertices+1), intent(in) :: vorticity
-      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: kdiff
-
-
-      !  scratch space from calling routine
-      real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: delsq_u
-      real (kind=RKIND), dimension(nVertLevels,nVertices+1) :: delsq_vorticity
-      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_divergence
-
-      real (kind=RKIND), dimension(nVertLevels), intent(in) :: u_init, v_init
-
-      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(out) :: tend_u_euler
-
-      ! local variables
-
-      integer :: iEdge, cell1, cell2, vertex1, vertex2, iVertex, iCell, i, k
-      real (kind=RKIND) :: r_dc, r_dv, u_diffusion, kdiffu, r, edge_sign, u_mix_scale
-      real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
-      real (kind=RKIND), dimension(nVertLevels) :: u_mix
-
-!$OMP BARRIER 
-
-         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).
-         ! First, storage to hold the result from the first del^2 computation.
-
-         delsq_u(1:nVertLevels,edgeStart:edgeEnd) = 0.0
-
-         do iEdge=edgeStart,edgeEnd
-            cell1 = cellsOnEdge(1,iEdge)
-            cell2 = cellsOnEdge(2,iEdge)
-            vertex1 = verticesOnEdge(1,iEdge)
-            vertex2 = verticesOnEdge(2,iEdge)
-            r_dc = invDcEdge(iEdge)
-            r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
-
-!DIR$ IVDEP  
-            do k=1,nVertLevels
-
-               ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
-               ! only valid for h_mom_eddy_visc4 == constant
-              u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) * r_dc  &
-                              -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv
-
-               delsq_u(k,iEdge) = delsq_u(k,iEdge) + u_diffusion
-
-               kdiffu = 0.5*(kdiff(k,cell1)+kdiff(k,cell2))
-
-               ! include 2nd-orer diffusion here
-
-               tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) &
-                                       + rho_edge(k,iEdge)* kdiffu * u_diffusion * meshScalingDel2(iEdge)
-
-            end do
-         end do
-
-         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active
-
-!$OMP BARRIER
-            do iVertex=vertexStart,vertexEnd
-               delsq_vorticity(1:nVertLevels,iVertex) = 0.0
-               do i=1,vertexDegree
-                  iEdge = edgesOnVertex(i,iVertex)
-                  edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge) * edgesOnVertex_sign(i,iVertex)
-                  do k=1,nVertLevels
-                     delsq_vorticity(k,iVertex) = delsq_vorticity(k,iVertex) + edge_sign * delsq_u(k,iEdge)
-                  end do
-               end do
-            end do
-
-            do iCell=cellStart,cellEnd
-               delsq_divergence(1:nVertLevels,iCell) = 0.0
-               r = invAreaCell(iCell)
-               do i=1,nEdgesOnCell(iCell)
-                  iEdge = edgesOnCell(i,iCell)
-                  edge_sign = r * dvEdge(iEdge) * edgesOnCell_sign(i,iCell)
-                  do k=1,nVertLevels
-                     delsq_divergence(k,iCell) = delsq_divergence(k,iCell) + edge_sign * delsq_u(k,iEdge)
-                  end do
-               end do
-            end do
-
-!$OMP BARRIER
-            do iEdge=edgeSolveStart,edgeSolveEnd
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
-               vertex1 = verticesOnEdge(1,iEdge)
-               vertex2 = verticesOnEdge(2,iEdge)
-
-               u_mix_scale = meshScalingDel4(iEdge)*h_mom_eddy_visc4
-               r_dc = u_mix_scale * config_del4u_div_factor * invDcEdge(iEdge)
-               r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
-
-!DIR$ IVDEP
-               do k=1,nVertLevels
-
-                  ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
-                  !                    only valid for h_mom_eddy_visc4 == constant
-                  !
-                  ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor
-                  !    relative to the rotational part.  The stability constraint on the divergence component is much less
-                  !    stringent than the rotational part, and this flexibility may be useful.
-                  !
-                  u_diffusion =  rho_edge(k,iEdge) *  ( ( delsq_divergence(k,cell2)  - delsq_divergence(k,cell1) ) * r_dc  &
-                                                       -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) * r_dv )
-                  tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - u_diffusion
-
-               end do
-            end do
-
-         end if ! 4th order mixing is active
-      !
-      !  vertical mixing for u - 2nd order filter in physical (z) space              
-      !
-         if ( v_mom_eddy_visc2 > 0.0 ) then
-
-            if (config_mix_full) then  ! mix full state 
-
-               do iEdge=edgeSolveStart,edgeSolveEnd
-
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
-
-                  do k=2,nVertLevels-1
-
-                     z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
-                     z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
-                     z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
-                     z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
-
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
-
-                     tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &
-                                        (u(k+1,iEdge)-u(k  ,iEdge))/(zp-z0)                      &
-                                       -(u(k  ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
-               end do
-
-            else  ! idealized cases where we mix on the perturbation from the initial 1-D state
-
-               do iEdge=edgeSolveStart,edgeSolveEnd
-
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
-
-                  do k=1,nVertLevels
-                     u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) &
-                                           + v_init(k) * sin( angleEdge(iEdge) )
-                  end do
-
-                  do k=2,nVertLevels-1
-
-                     z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
-                     z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
-                     z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
-                     z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
-
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
-
-                     tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &
-                                        (u_mix(k+1)-u_mix(k  ))/(zp-z0)                      &
-                                       -(u_mix(k  )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
-               end do
-
-            end if  ! mix perturbation state
-
-         end if  ! vertical mixing of horizontal momentum
-
-      end subroutine u_dissipation
-
-!------------------------
-
       subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
                                    cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
                                    cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
@@ -929,17 +706,22 @@ end subroutine u_dissipation_3d
 
 !------------------------
 
-      subroutine w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                nCells, nEdges,                                           &
-                                nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                invAreaCell, invDcEdge, dvEdge,                           &
-                                meshScalingDel2, meshScalingDel4,                         &
-                                rdzw, rdzu,                                               &
-                                v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
-                                delsq_w,                                                  &
-                                w, rho_edge, kdiff, rho_zz,                               &
-                                tend_w_euler                                             )
+      subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                   nCells, nEdges,                                           &
+                                   nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                   invAreaCell, invDcEdge, dvEdge,                           &
+                                   meshScalingDel2, meshScalingDel4,                         &
+                                   rdzw, rdzu,                                               &
+                                   v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
+                                   delsq_w,                                                  &
+                                   w, rho_edge, rho_zz, zz,                                  &
+                                   eddy_visc_horz, eddy_visc_vert,                           &
+                                   config_les_model, config_les_surface,                     &
+                                   tend_w_euler                                             )
+
 
+      ! 3D w dissipation using the 3D smagorinsky eddy viscosities.  
+      ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter
 
       use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
 
@@ -954,6 +736,9 @@ subroutine w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
       integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
 
+      character (len=StrKIND) :: config_les_model
+      character (len=StrKIND) :: config_les_surface
+
       real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
       real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
 
@@ -967,14 +752,17 @@ subroutine w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
       real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
 
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: kdiff
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
 
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(inout) :: tend_w_euler
 
       ! storage passed in from calling routine                                                                                                        
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_w
+      real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
 
       ! local variables                                                                                                                               
 
@@ -988,6 +776,12 @@ subroutine w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
        ! First, storage to hold the result from the first del^2 computation.                                                                        
        !  we copied code from the theta mixing, hence the theta* names.                                                                             
 
+        if(debug_dissipation) then
+            call mpas_log_write(' begin w_dissipation_3d ')
+            call mpas_log_write(' les model is '//trim(config_les_model))
+            call mpas_log_write(' les surface is '//trim(config_les_surface))
+            call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_mom_eddy_visc4/))
+        end if
 
          delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0
 
@@ -1008,7 +802,8 @@ subroutine w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
                   w_turb_flux =  edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
                   delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux
                   w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * &
-                                  (kdiff(k,cell1)+kdiff(k,cell2)+kdiff(k-1,cell1)+kdiff(k-1,cell2))
+                                  ( eddy_visc_horz(k  ,cell1)+eddy_visc_horz(k  ,cell2)      &
+                                   +eddy_visc_horz(k-1,cell1)+eddy_visc_horz(k-1,cell2) )
                   tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux
                end do
             end do
@@ -1049,533 +844,27 @@ subroutine w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
       end if
 
-      end subroutine w_dissipation
-
-!------------------------
-
-      subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                   nCells, nEdges,                                           &
-                                   nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                   invAreaCell, invDcEdge, dvEdge,                           &
-                                   meshScalingDel2, meshScalingDel4,                         &
-                                   rdzw, rdzu,                                               &
-                                   v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
-                                   delsq_w,                                                  &
-                                   w, rho_edge, rho_zz, zz,                                  &
-                                   eddy_visc_horz, eddy_visc_vert,                           &
-                                   config_les_model, config_les_surface,                     &
-                                   tend_w_euler                                             )
-
-
-      ! 3D w dissipation using the 3D smagorinsky eddy viscosities.  
-      ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter
+         if ( config_les_model /= "none") then
 
-      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+             do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
+                ! compute turbulent fluxes
+                do k=1,nVertLevels
+                  turb_vflux(k) = - rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*zz(k,iCell)  &
+                                    *rdzw(k)*(w(k+1,iCell)-w(k,iCell))
+                end do
+                turb_vflux(nVertLevels+1) = 0.
+                do k=2,nVertLevels
+                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) &
+                                              - rdzu(k)*(turb_vflux(k)-turb_vflux(k-1))
+                end do
+             end do
 
-      implicit none
+          end if
 
-      integer, intent(in) :: cellStart, cellEnd
-      integer, intent(in) :: cellSolveStart, cellSolveEnd
-      integer, intent(in) :: nCells, nEdges
-
-      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
-      integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
-
-      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
-
-      character (len=StrKIND) :: config_les_model
-      character (len=StrKIND) :: config_les_surface
-
-      real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
-      real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
-
-      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
-      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
-      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
-      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
-
-      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz
-      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
-
-      real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(inout) :: tend_w_euler
-
-      ! storage passed in from calling routine                                                                                                        
-      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_w
-      real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
-
-      ! local variables                                                                                                                               
-
-      integer :: cell1, cell2, iEdge, iCell, i, k
-      real (kind=RKIND) :: r_areaCell, edge_sign, w_turb_flux
-
-!  !OMP BARRIER  why is this openmp barrier here???                                                                                                   
-
-         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (w) ).                                                                          
-       !                                                                                                                                            
-       ! First, storage to hold the result from the first del^2 computation.                                                                        
-       !  we copied code from the theta mixing, hence the theta* names.                                                                             
-
-        if(debug_dissipation) then
-            call mpas_log_write(' begin w_dissipation_3d ')
-            call mpas_log_write(' les model is '//trim(config_les_model))
-            call mpas_log_write(' les surface is '//trim(config_les_surface))
-            call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_mom_eddy_visc4/))
-        end if
-
-         delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0
-
-         do iCell=cellStart,cellEnd
-            tend_w_euler(1:nVertLevels+1,iCell) = 0.0
-            r_areaCell = invAreaCell(iCell)
-            do i=1,nEdgesOnCell(iCell)
-               iEdge = edgesOnCell(i,iCell)
-
-               edge_sign = 0.5 * r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
-
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
-
-!DIR$ IVDEP                                                                                                                                           
-              do k=2,nVertLevels
-
-                  w_turb_flux =  edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
-                  delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux
-                  w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * &
-                                  ( eddy_visc_horz(k  ,cell1)+eddy_visc_horz(k  ,cell2)      &
-                                   +eddy_visc_horz(k-1,cell1)+eddy_visc_horz(k-1,cell2) )
-                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux
-               end do
-            end do
-         end do
-
-!$OMP BARRIER                                                                                                                                         
-
-         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active                                                                               
-
-            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...                                                 
-               r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell)
-               do i=1,nEdgesOnCell(iCell)
-                  iEdge = edgesOnCell(i,iCell)
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
-
-                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge)
-
-                  do k=2,nVertLevels
-                     tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1))
-                  end do
-
-               end do
-            end do
-
-         end if ! 4th order mixing is active                                                                                                          
-
-      if ( v_mom_eddy_visc2 > 0.0 ) then  ! vertical mixing
-
-         do iCell=cellSolveStart,cellSolveEnd
-!DIR$ IVDEP                                                                                                                                           
-            do k=2,nVertLevels
-               tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*(  &
-                                           (w(k+1,iCell)-w(k  ,iCell))*rdzw(k)                              &
-                                          -(w(k  ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k)
-            end do
-         end do
-
-      end if
-
-         if ( config_les_model /= "none") then
-
-             do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
-                ! compute turbulent fluxes
-                do k=1,nVertLevels
-                  turb_vflux(k) = - rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*zz(k,iCell)  &
-                                    *rdzw(k)*(w(k+1,iCell)-w(k,iCell))
-                end do
-                turb_vflux(nVertLevels+1) = 0.
-                do k=2,nVertLevels
-                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) &
-                                              - rdzu(k)*(turb_vflux(k)-turb_vflux(k-1))
-                end do
-             end do
-
-          end if
-
-        if(debug_dissipation)  call mpas_log_write(' exiting w_dissipation_3d ')
+        if(debug_dissipation)  call mpas_log_write(' exiting w_dissipation_3d ')
 
       end subroutine w_dissipation_3d
 
-!-----------------------------------------------------
-
-      subroutine theta_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                    nCells, nEdges,                                           &
-                                    nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                    invAreaCell, invDcEdge, dvEdge,                           &
-                                    meshScalingDel2, meshScalingDel4,                         &
-                                    config_mix_full, t_init, zgrid,                           &
-                                    rdzw, rdzu,                                               &
-                                    v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
-                                    delsq_theta,                                              &
-                                    theta_m, rho_edge, kdiff, rho_zz,                         &
-                                    tend_theta_euler                                         )
-
-
-      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
-      implicit none
-
-      integer, intent(in) :: cellStart, cellEnd
-      integer, intent(in) :: cellSolveStart, cellSolveEnd
-      integer, intent(in) :: nCells, nEdges
-
-      logical, intent(in) :: config_mix_full
-
-      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
-      integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
-
-      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
-
-      real (kind=RKIND), intent(in) :: h_theta_eddy_visc4
-      real (kind=RKIND), intent(in) :: v_theta_eddy_visc2
-      real (kind=RKIND), intent(in) :: prandtl_inv
-
-      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
-      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
-      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
-      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
-      real (kind=RKIND), dimension(nVertLevels+1, nCells+1), intent(in) :: zgrid
-
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: t_init
-
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: kdiff
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
-      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
-
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(inout) :: tend_theta_euler
-
-      ! storage passed in from calling routine                                                                                                        
-      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_theta
-
-      ! local variables                                                                                                                               
-      integer :: cell1, cell2, iEdge, iCell, i, k
-      real (kind=RKIND) :: r_areaCell, edge_sign, theta_turb_flux, pr_scale
-      real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
-
-         delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
-
-         do iCell=cellStart,cellEnd
-            tend_theta_euler(1:nVertLevels,iCell) = 0.0
-            r_areaCell = invAreaCell(iCell)
-            do i=1,nEdgesOnCell(iCell)
-               iEdge = edgesOnCell(i,iCell)
-               edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
-               pr_scale = prandtl_inv * meshScalingDel2(iEdge)
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
-!DIR$ IVDEP
-               do k=1,nVertLevels
-
-!  we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below
-
-                  theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge)
-                  delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
-                  theta_turb_flux = theta_turb_flux*0.5*(kdiff(k,cell1)+kdiff(k,cell2)) * pr_scale
-                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux
-
-               end do
-            end do
-          end do
-
-!$OMP BARRIER
-            
-         if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
-
-            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
-               r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
-               do i=1,nEdgesOnCell(iCell)
-
-                  iEdge = edgesOnCell(i,iCell)
-                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
-
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
-
-                  do k=1,nVertLevels
-                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
-                  end do
-               end do
-            end do
-
-         end if ! 4th order mixing is active 
-
-         if ( v_theta_eddy_visc2 > 0.0 ) then  ! vertical mixing for theta_m
-
-            if (config_mix_full) then
-
-               do iCell = cellSolveStart,cellSolveEnd
-                  do k=2,nVertLevels-1
-                     z1 = zgrid(k-1,iCell)
-                     z2 = zgrid(k  ,iCell)
-                     z3 = zgrid(k+1,iCell)
-                     z4 = zgrid(k+2,iCell)
-
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
-
-                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
-                                              (theta_m(k+1,iCell)-theta_m(k  ,iCell))/(zp-z0)                 &
-                                             -(theta_m(k  ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
-               end do
-
-         else  ! idealized cases where we mix on the perturbation from the initial 1-D state
-
-               do iCell = cellSolveStart,cellSolveEnd
-                  do k=2,nVertLevels-1
-                     z1 = zgrid(k-1,iCell)
-                     z2 = zgrid(k  ,iCell)
-                     z3 = zgrid(k+1,iCell)
-                     z4 = zgrid(k+2,iCell)
-
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
-
-                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
-                                              ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k  ,iCell)-t_init(k,iCell)))/(zp-z0)      &
-                                             -((theta_m(k  ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
-               end do
-
-            end if
-
-         end if
-
-      end subroutine theta_dissipation
-
-!-----------------------------------------------------
-
-      subroutine theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                       nCells, nEdges,                                           &
-                                       nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                       invAreaCell, invDcEdge, dvEdge,                           &
-                                       meshScalingDel2, meshScalingDel4,                         &
-                                       config_mix_full, t_init, zgrid,                           &
-                                       rdzw, rdzu, fzm, fzp,                                     &
-                                       v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
-                                       delsq_theta,                                              &
-                                       theta_m, rho_edge, rho_zz, zz,                            &
-                                       eddy_visc_horz, eddy_visc_vert,                           &
-                                       config_les_model,                                         &
-                                       tend_theta_euler                                         )
-
-
-      ! 3D theta_m dissipation using the 3D smagorinsky eddy viscosities.  
-      ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter
-
-      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
-      implicit none
-
-      integer, intent(in) :: cellStart, cellEnd
-      integer, intent(in) :: cellSolveStart, cellSolveEnd
-      integer, intent(in) :: nCells, nEdges
-
-      logical, intent(in) :: config_mix_full
-
-      character (len=StrKIND) :: config_les_model
-
-      integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
-      integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
-
-      integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
-
-      real (kind=RKIND), intent(in) :: h_theta_eddy_visc4
-      real (kind=RKIND), intent(in) :: v_theta_eddy_visc2
-      real (kind=RKIND), intent(in) :: prandtl_inv
-
-      real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
-      real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
-      real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
-      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
-      real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
-      real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzm
-      real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzp
-      real (kind=RKIND), dimension(nVertLevels+1, nCells+1), intent(in) :: zgrid
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz
-
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: t_init
-
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
-      real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
-
-      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(inout) :: tend_theta_euler
-
-      ! storage passed in from calling routine                                                                                                        
-      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_theta
-
-      ! local variables                                                                                                                               
-      integer :: cell1, cell2, iEdge, iCell, i, k
-      real (kind=RKIND) :: r_areaCell, edge_sign, theta_turb_flux, pr_scale
-      real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
-      real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
-      real (kind=RKIND) :: rho_k_at_w, zz_at_w
-
-        if(debug_dissipation) then
-            call mpas_log_write(' begin theta_dissipation_3d ')
-            call mpas_log_write(' les model is '//trim(config_les_model))
-            call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_theta_eddy_visc4/))
-        end if
-
-         delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
-
-         do iCell=cellStart,cellEnd
-            tend_theta_euler(1:nVertLevels,iCell) = 0.0
-            r_areaCell = invAreaCell(iCell)
-            do i=1,nEdgesOnCell(iCell)
-               iEdge = edgesOnCell(i,iCell)
-               edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
-               pr_scale = prandtl_inv * meshScalingDel2(iEdge)
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
-!DIR$ IVDEP
-               do k=1,nVertLevels
-
-!  we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below.
-!  This is in conservative form.
-
-                  theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge)
-                  delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
-                  theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) * pr_scale
-                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux
-
-               end do
-            end do
-          end do
-
-!$OMP BARRIER
-            
-         if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
-
-            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
-               r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
-               do i=1,nEdgesOnCell(iCell)
-
-                  iEdge = edgesOnCell(i,iCell)
-                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
-
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
-
-                  do k=1,nVertLevels
-                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
-                  end do
-               end do
-            end do
-
-         end if ! 4th order mixing is active 
-
-         !  idealized case vertical mixing
-
-         if ( v_theta_eddy_visc2 > 0.0 ) then  ! vertical mixing for theta_m
-
-            if (config_mix_full) then
-
-               do iCell = cellSolveStart,cellSolveEnd
-                  do k=2,nVertLevels-1
-                     z1 = zgrid(k-1,iCell)
-                     z2 = zgrid(k  ,iCell)
-                     z3 = zgrid(k+1,iCell)
-                     z4 = zgrid(k+2,iCell)
-
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
-
-                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
-                                              (theta_m(k+1,iCell)-theta_m(k  ,iCell))/(zp-z0)                 &
-                                             -(theta_m(k  ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
-               end do
-
-         else  ! idealized cases where we mix on the perturbation from the initial 1-D state
-
-               do iCell = cellSolveStart,cellSolveEnd
-                  do k=2,nVertLevels-1
-                     z1 = zgrid(k-1,iCell)
-                     z2 = zgrid(k  ,iCell)
-                     z3 = zgrid(k+1,iCell)
-                     z4 = zgrid(k+2,iCell)
-
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
-
-                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
-                                              ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k  ,iCell)-t_init(k,iCell)))/(zp-z0)      &
-                                             -((theta_m(k  ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
-               end do
-
-            end if
-
-         end if
-
-         if ( config_les_model /= "none") then
-
-             do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
-                ! compute turbulent fluxes
-                turb_vflux(nVertlevels+1) = 0.  !  no turbulent flux out of the domain
-                turb_vflux(1) = 0.  !  lower bc flux handled where ???
-                do k=2,nVertLevels
-                  rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
-                               +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
-                  zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
-                  turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
-                end do
-
-                ! test boundary conditions for supercell and les test cases
-
-                if( les_test ) then
-                  turb_vflux(1) = tke_heat_flux*rho_zz(1,iCell)  ! this is correct for DRY CASE ONLY
-                  ! SAS case lower flux for theta_m - code goes here
-
-                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
-                else
-                  turb_vflux(1) = turb_vflux(2)
-                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
-                end if
-
-
-                do k=1,nVertLevels
-                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) &
-                                              - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
-                end do
-             end do
-
-         end if
-
-        if(debug_dissipation)  call mpas_log_write(' exiting theta_dissipation_3d ')
-
-      end subroutine theta_dissipation_3d
-
 !-----------------------------------------------------
 
       subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &

From 49372ce4707e6fde3b27fac13b89cf9b8645e418 Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Thu, 9 Oct 2025 13:01:58 -0600
Subject: [PATCH 086/146] Merge with Bill Skamarock updated LES modules

---
 src/core_atmosphere/Registry.xml              |  5 ++
 .../dynamics/mpas_atm_dissipation_models.F    | 83 +++++++++++--------
 .../dynamics/mpas_atm_time_integration.F      | 29 +++----
 3 files changed, 69 insertions(+), 48 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 86cf2e749a..7cfd643b5e 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -166,6 +166,11 @@
                      description="Scaling coefficient of $\delta x^3$ to obtain $\nabla^4$ diffusion coefficient"
                      possible_values="Non-negative real values"/>
 
+                <nml_option name="config_mix_scalars" type="logical" default_value="false"
+                     units="-"
+                     description="Whether to enable horizontal and vertical mixing for scalars"
+                     possible_values=".true. or .false."/>
+
                 <nml_option name="config_del4u_div_factor" type="real" default_value="10.0" in_defaults="false"
                      units="-"
                      description="Scaling factor for the divergent component of $\nabla^4 u$ calculation"
diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index d2b6245e13..7e75ede0fa 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -23,7 +23,8 @@ module mpas_atm_dissipation_models
   ! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.006
   real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0
   real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
-  real (kind=RKIND), parameter :: c_k = 0.1
+  ! real (kind=RKIND), parameter :: c_k = 0.1
+  real (kind=RKIND), parameter :: c_k = 0.25
 
 
     contains
@@ -97,7 +98,7 @@ end subroutine smagorinsky_2d
 
 !---------------------------------------
 
-      subroutine les_models( config_les_model, config_les_surface, eddy_visc_horz, eddy_visc_vert,                  &
+      subroutine les_models( config_les_model, config_les_surface, dynamics_substep, eddy_visc_horz, eddy_visc_vert,                  &
                              u, v, uCell, vCell,                                                &
                              w, c_s, bv_freq2, zgrid, config_len_disp,                          &
                              deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
@@ -113,6 +114,7 @@ subroutine les_models( config_les_model, config_les_surface, eddy_visc_horz, edd
       character (len=StrKIND), intent(in) :: config_les_surface
 
       integer, intent(in) :: cellStart, cellEnd, nCells, nEdges, nVertLevels, maxEdges, index_tke, num_scalars
+      integer, intent(in) :: dynamics_substep
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: uCell
@@ -239,13 +241,16 @@ subroutine les_models( config_les_model, config_les_surface, eddy_visc_horz, edd
             def2 = 0.5*(d_11(k)**2 + d_22(k)**2 + d_33(k)**2) + d_12(k)**2 + d_13(k)**2 + d_23(k)**2
             eddy_visc_horz(k,iCell) = (c_s * config_len_disp)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
             eddy_visc_horz(k,iCell) = min(eddy_visc_horz(k,iCell),(0.01*config_len_disp**2) * invDt)
-            eddy_visc_vert(k,iCell) = eddy_visc_horz(k,iCell)
+            delta_z = zgrid(k+1,iCell)-zgrid(k,iCell)
+            eddy_visc_vert(k,iCell) = (c_s * delta_z)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))            
+            ! eddy_visc_vert(k,iCell) = eddy_visc_horz(k,iCell)
           end do
 
         else if (config_les_model == "prognostic_1.5_order") then
 
           do k=1,nVertLevels  ! bound the tke here, currently hardwired
-            scalars(index_tke,k,iCell) = max(0.,min(100.,scalars(index_tke,k,iCell)))
+            ! scalars(index_tke,k,iCell) = max(0.,min(100.,scalars(index_tke,k,iCell)))
+            scalars(index_tke,k,iCell) = max(0.,scalars(index_tke,k,iCell))
           end do
 
           do k=1,nVertLevels
@@ -263,21 +268,22 @@ subroutine les_models( config_les_model, config_les_surface, eddy_visc_horz, edd
 
             ! non-isotropic mixing                                                                            
 
-            ! l_horizontal = config_len_disp
-            ! l_vertical = min(delta_z,tke_length)
-            ! if(bv_freq2(k,iCell) <= 0) diss_length = delta_z
+            l_horizontal = config_len_disp
+            l_vertical = min(delta_z,tke_length)
+            if(bv_freq2(k,iCell) <= 0) diss_length = delta_z
 
             ! isotropic mixing
 
-            l_horizontal = min(delta_s,tke_length)
-            if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
-            l_vertical = l_horizontal
+            ! l_horizontal = min(delta_s,tke_length)
+            ! if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
+            ! l_vertical = l_horizontal
 
             ! eddy viscocities set here if we are running the 1.5 order prognostic tke scheme
             eddy_visc_h = c_k*l_horizontal*sqrt(scalars(index_tke,k,iCell))
+            eddy_visc_h = min(eddy_visc_h,(0.01*config_len_disp**2) * invDt)
             eddy_visc_v = c_k*l_vertical*sqrt(scalars(index_tke,k,iCell))
+            eddy_visc_v = min(eddy_visc_v,(0.01*delta_z**2) * invDt)
 
-            ! testing
             eddy_visc_horz(k,iCell) = eddy_visc_h
             eddy_visc_vert(k,iCell) = eddy_visc_v
 
@@ -303,7 +309,8 @@ subroutine les_models( config_les_model, config_les_surface, eddy_visc_horz, edd
 
             ! RHS term for the subgrid ke.                                                                    
 
-            tend_scalars(index_tke,k,iCell) = rho_zz(k,iCell)*( shear_production + buoyancy + dissipation )
+            if(dynamics_substep == 1) &
+                 tend_scalars(index_tke,k,iCell) = rho_zz(k,iCell)*( shear_production + buoyancy + dissipation )            
 
           end do
 
@@ -414,7 +421,8 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                                    eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
                                    fzm, fzp, config_les_model, config_les_surface,                              &
                                    delsq_u, delsq_vorticity, delsq_divergence,                                  &
-                                   u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, tend_u_euler )
+                                   u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm,         &
+                                   tend_u_euler                                                                )
 
       use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
 
@@ -480,7 +488,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       ! local variables                                                                                                                               
 
       integer :: iEdge, cell1, cell2, vertex1, vertex2, iVertex, iCell, i, k
-      real (kind=RKIND) :: r_dc, r_dv, u_diffusion, kdiffu, r, edge_sign, u_mix_scale
+      real (kind=RKIND) :: r_dc, r_dv, u_diffusion, u_diffusion_les, kdiffu, r, edge_sign, u_mix_scale
       real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
       real (kind=RKIND), dimension(nVertLevels) :: u_mix
 
@@ -490,6 +498,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       real (kind=RKIND) :: ust_edge
 
       real (kind=RKIND) :: velocity_magnitude
+      real (kind=RKIND) :: tau_12_factor
 
         if(debug_dissipation) then
             call mpas_log_write(' begin u_dissipation_3d ')
@@ -505,6 +514,8 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
          ! First, storage to hold the result from the first del^2 computation.                                                                        
 
          delsq_u(1:nVertLevels,edgeStart:edgeEnd) = 0.0
+         tau_12_factor = 0.0
+         if(config_les_model /= 'none') tau_12_factor = 1.0
 
          do iEdge=edgeStart,edgeEnd
             cell1 = cellsOnEdge(1,iEdge)
@@ -519,16 +530,19 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
                ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity                                                         
                !                    only valid for h_mom_eddy_visc4 == constant                                                                       
-              u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) * r_dc  &
+               u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) * r_dc  &
                               -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv
+               ! for LES models we need 2 times the gradient of divergence, in contrast to what is
+               ! saved and used to calculate the 4th-order horizontal filter
+               u_diffusion_les = u_diffusion + tau_12_factor * ( divergence(k,cell2)  - divergence(k,cell1)  ) * r_dc
 
                delsq_u(k,iEdge) = delsq_u(k,iEdge) + u_diffusion
 
                kdiffu = 0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2))
 
-               ! include 2nd-orer diffusion here                                                                                                      
+               ! include 2nd-order diffusion here                                                                                                      
                tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) &
-                                       + rho_edge(k,iEdge)* kdiffu * u_diffusion * meshScalingDel2(iEdge)
+                                       + rho_edge(k,iEdge)* kdiffu * u_diffusion_les * meshScalingDel2(iEdge)
 
             end do
          end do
@@ -714,7 +728,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
                                    rdzw, rdzu,                                               &
                                    v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
                                    delsq_w,                                                  &
-                                   w, rho_edge, rho_zz, zz,                                  &
+                                   w, rho_edge, rho_zz, divergence, zz,                      &
                                    eddy_visc_horz, eddy_visc_vert,                           &
                                    config_les_model, config_les_surface,                     &
                                    tend_w_euler                                             )
@@ -755,6 +769,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+      real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: divergence
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
 
@@ -849,10 +864,11 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
              do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
                 ! compute turbulent fluxes
                 do k=1,nVertLevels
-                  turb_vflux(k) = - rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*zz(k,iCell)  &
-                                    *rdzw(k)*(w(k+1,iCell)-w(k,iCell))
+                  turb_vflux(k) = - rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*(      &
+                                             2.0*zz(k,iCell)*rdzw(k)*(w(k+1,iCell)-w(k,iCell))  &
+                                                 + divergence(k,iCell)                         )
                 end do
-                turb_vflux(nVertLevels+1) = 0.
+                turb_vflux(nVertLevels+1) = 0.0
                 do k=2,nVertLevels
                   tend_w_euler(k,iCell) = tend_w_euler(k,iCell) &
                                               - rdzu(k)*(turb_vflux(k)-turb_vflux(k-1))
@@ -884,7 +900,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                             config_les_model, config_les_surface, time_of_day_seconds,&
                                             uReconstructZonal, uReconstructMeridional,                &
                                             hfx, qfx,                                                 &
-                                            tend_theta_euler                                         )
+                                            tend_theta_euler, dynamics_substep                       )
 
 
       ! 3D theta_m dissipation using the 3D smagorinsky eddy viscosities.  
@@ -898,6 +914,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       integer, intent(in) :: nCells, nEdges
       integer, intent(in) :: num_scalars_dummy
       integer, intent(in) :: index_tke, index_qv
+      integer, intent(in) :: dynamics_substep
 
       real (kind=RKIND), intent(in) :: time_of_day_seconds
 
@@ -957,9 +974,6 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       real (kind=RKIND) :: rho_k_at_w, zz_at_w
       real (kind=RKIND) :: delta_z, delta_s, tke_length, bv_frequency2
 
-      logical, parameter :: vmix_scalars = .true.
-      logical, parameter :: hmix_scalars = .true.
-      logical, parameter :: hmix4_scalars = .false.
       real (kind=RKIND) :: moisture_flux, heat_flux, theta_m_flux, c_h, c_q
       real (kind=RKIND) :: qv_cell, theta_m_cell, theta_cell
 
@@ -969,6 +983,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
             call mpas_log_write(' les surface is '//trim(config_les_surface))
             call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_theta_eddy_visc4/))
         end if
+        if( mix_scalars .and. (dynamics_substep == 1)) call mpas_log_write(' scalar mixing on ')
 
          delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
 
@@ -1018,7 +1033,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
          end if ! 4th order mixing is active 
 
-       if(mix_scalars .and. hmix_scalars) then  ! dissipation for scalars, including 4th-order filter.  Likely needs optimization
+         if(mix_scalars .and. (dynamics_substep == 1)) then  ! dissipation for scalars, including 4th-order filter.  Likely needs optimization              
          
          do iScalar=1,num_scalars
          
@@ -1050,7 +1065,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
 !$OMP BARRIER
             
-         if ((h_theta_eddy_visc4 > 0.0).and. hmix4_scalars) then  ! 4th order mixing is active
+         if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
             do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
                r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
@@ -1147,7 +1162,8 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   ! end do
 
                   do k=2,nVertLevels
-                    prandtl_1d_inverse(k) = 0.5*(prandtl_3d_inv(k,iCell)+prandtl_3d_inv(k-1,iCell))
+                    ! prandtl_1d_inverse(k) = 0.5*(prandtl_3d_inv(k,iCell)+prandtl_3d_inv(k-1,iCell))
+                    prandtl_1d_inverse(k) = fzm(k)*prandtl_3d_inv(k,iCell)+fzp(k)*prandtl_3d_inv(k-1,iCell)
                   end do
 
                 end if
@@ -1158,9 +1174,8 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   ! delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
                   ! bv_frequency2 = 0.5*(bv_freq2(k)+bv_freq(k-1))
                   ! bv = max( sqrt(abs(bv_frequency2)), epsilon_bv )
-
-                  rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
-                               +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
+                  rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
+                               +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
                   zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
                   turb_vflux(k) = - prandtl_1d_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
                 end do
@@ -1202,14 +1217,14 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                               - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
                 end do
 
-                if (mix_scalars .and. vmix_scalars) then
+                if (mix_scalars ) then
 
                 ! compute turbulent fluxes
                 turb_vflux_scalars(:,nVertlevels+1) = 0.  !  no turbulent flux out of the domain
                 turb_vflux_scalars(:,1) = 0.  !  lower bc flux handled where ???
                 do k=2,nVertLevels
-                  rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
-                               +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
+                  rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
+                               +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
                   zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
                   do iScalar=1,num_scalars
                      turb_vflux_scalars(iScalar,k) = - prandtl_1d_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*  &
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index dde697a3e1..05dd4feb36 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -1173,7 +1173,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 !$OMP PARALLEL DO
                do thread=1,nThreads
                   call atm_compute_dyn_tend( tend, tend_physics, state, diag, mesh, diag_physics, &
-                                             block % configs, nVertLevels, rk_step, dt, & 
+                                             block % configs, nVertLevels, rk_step, dynamics_substep, dt, & 
                                              cellThreadStart(thread), cellThreadEnd(thread), &
                                              vertexThreadStart(thread), vertexThreadEnd(thread), &
                                              edgeThreadStart(thread), edgeThreadEnd(thread), &
@@ -4736,7 +4736,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
    end subroutine atm_advance_scalars_mono_work
 
 
-   subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_physics, configs, nVertLevels, rk_step, dt, &
+   subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_physics, configs, nVertLevels, rk_step, dynamics_substep, dt, &
                                    cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
                                    cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
@@ -4765,7 +4765,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       type (mpas_pool_type), intent(in) :: diag_physics
       type (mpas_pool_type), intent(in) :: configs
       integer, intent(in) :: nVertLevels              ! for allocating stack variables
-      integer, intent(in) :: rk_step
+      integer, intent(in) :: rk_step, dynamics_substep
       real (kind=RKIND), intent(in) :: dt
       integer, intent(in) :: cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd
       integer, intent(in) :: cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd
@@ -4825,6 +4825,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
 
       real (kind=RKIND), pointer :: coef_3rd_order, c_s
       logical, pointer :: config_mix_full
+      logical, pointer :: config_mix_scalars
       character (len=StrKIND), pointer :: config_horiz_mixing
       character (len=StrKIND), pointer :: config_les_model
       character (len=StrKIND), pointer :: config_les_surface
@@ -4848,6 +4849,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       call mpas_pool_get_config(mesh, 'sphere_radius', r_earth)
       call mpas_pool_get_config(configs, 'config_coef_3rd_order', coef_3rd_order)
       call mpas_pool_get_config(configs, 'config_mix_full', config_mix_full)
+      call mpas_pool_get_config(configs, 'config_mix_scalars', config_mix_scalars)
       call mpas_pool_get_config(configs, 'config_horiz_mixing', config_horiz_mixing)
       call mpas_pool_get_config(configs, 'config_les_model', config_les_model)
       call mpas_pool_get_config(configs, 'config_les_surface', config_les_surface)
@@ -5005,10 +5007,10 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
          latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
          rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
          deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
-         tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_les_model, &
+         tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_mix_scalars, config_horiz_mixing, config_les_model, &
          config_les_surface, prandtl_3d_inv, config_del4u_div_factor, &
          config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
-         config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, &
+         config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dynamics_substep, dt, &
          config_mpas_cam_coef, &
          config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, & 
          config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, &
@@ -5033,10 +5035,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
       rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
       deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
-      tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_les_model, &
+      tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_mix_scalars, config_horiz_mixing, config_les_model, &
       config_les_surface, prandtl_3d_inv, config_del4u_div_factor, &
       config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
-      config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, &
+      config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dynamics_substep, dt, &
       config_mpas_cam_coef, &
       config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, &
       config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, &
@@ -5155,7 +5157,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: tend_w_buoy
 
       real (kind=RKIND) :: coef_3rd_order, c_s
-      logical :: config_mix_full
+      logical :: config_mix_full, config_mix_scalars
       character (len=StrKIND) :: config_horiz_mixing
       character (len=StrKIND) :: config_les_model
       character (len=StrKIND) :: config_les_surface
@@ -5166,9 +5168,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND) :: config_len_disp
       real (kind=RKIND) :: config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2
       logical, parameter :: test_dissipation_3d=.true.
-      logical, parameter :: mix_scalars = .true.
 
-      integer, intent(in) :: rk_step
+      integer, intent(in) :: rk_step, dynamics_substep
       real (kind=RKIND), intent(in) :: dt
 
       real (kind=RKIND) :: config_mpas_cam_coef
@@ -5321,7 +5322,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                  time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*dt
                  ! call mpas_log_write(' les integration, timestep midpoint time of day in seconds, $r ', realArgs=(/time_of_day_seconds/))
       
-                 call les_models( config_les_model, config_les_surface, eddy_visc_horz, eddy_visc_vert,                  &
+                 call les_models( config_les_model, config_les_surface, dynamics_substep, eddy_visc_horz, eddy_visc_vert,                  &
                                   u, v, ur_cell, vr_cell,                                            &
                                   w, c_s, bn2, zgrid, config_len_disp,                               &
                                   deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
@@ -5695,7 +5696,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                     rdzw, rdzu,                                               &
                                     v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
                                     delsq_w,                                                  &
-                                    w, rho_edge, rho_zz, zz,                                  &
+                                    w, rho_edge, rho_zz, divergence, zz,                      &
                                     eddy_visc_horz, eddy_visc_vert,                           &
                                     config_les_model, config_les_surface,                     &
                                     tend_w_euler                                             )
@@ -5887,11 +5888,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                             theta_m, rho_edge, rho_zz, zz,                            &
                                             eddy_visc_horz, eddy_visc_vert,                           &
                                             bn2, config_len_disp, scalars, tend_scalars,              &
-                                            index_tke, index_qv, num_scalars, mix_scalars,            &
+                                            index_tke, index_qv, num_scalars, config_mix_scalars,     &
                                             config_les_model, config_les_surface, time_of_day_seconds,&
                                             ur_cell, vr_cell,                                         &
                                             hfx, qfx,                                                 &
-                                            tend_theta_euler                                         )
+                                            tend_theta_euler, dynamics_substep                       )
 
 
 !          else  ! this is the original MPAS dissipation code

From 6345dc1640d1cd79c1d68ae035769e883d15daab Mon Sep 17 00:00:00 2001
From: Jimy Dudhia <dudhia@ucar.edu>
Date: Tue, 21 Oct 2025 13:24:09 -0600
Subject: [PATCH 087/146] add surface flux namelist options

---
 src/core_atmosphere/Registry.xml                | 17 ++++++++++++++++-
 .../dynamics/mpas_atm_dissipation_models.F      | 15 ++++++++++-----
 .../dynamics/mpas_atm_time_integration.F        | 13 +++++++++++++
 3 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 7cfd643b5e..2264cf1fd5 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -154,7 +154,22 @@
                 <nml_option name="config_les_surface" type="character" default_value="none"
                      units="-"
                      description="LES surface flux option"
-                     possible_values="`free', 'specified', `varying'"/>
+                     possible_values="'specified', `varying'"/>
+
+                <nml_option name="config_surface_heat_flux" type="real" default_value="0.0"
+                     units="K m s-1"
+                     description="specified surface heat flux w'theta'"
+                     possible_values="real number"/>
+
+                <nml_option name="config_surface_moisture_flux" type="real" default_value="0.0"
+                     units="kg m s-1"
+                     description="specified surface moisture flux w'q'"
+                     possible_values="real number"/>
+
+                <nml_option name="config_surface_drag_coefficient" type="real" default_value="0.0"
+                     units="-"
+                     description="Cd 10m drag coefficient"
+                     possible_values="real number"/>
 
                 <nml_option name="config_len_disp" type="real" default_value="0.0" in_defaults="false"
                      units="m"
diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 7e75ede0fa..50c0619962 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -18,10 +18,10 @@ module mpas_atm_dissipation_models
   logical, parameter :: les_test = .true., les_sas_test = .false.
   !! real (kind=RKIND), parameter :: tke_heat_flux = 0.03  ! shear case from Moeng et al., first hour
   ! real (kind=RKIND), parameter :: tke_heat_flux = 0.03
-  real (kind=RKIND), parameter :: tke_heat_flux = 0.0
+  !  real (kind=RKIND), parameter :: tke_heat_flux = 0.0
   !! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013 ! ocean roughness length
   ! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.006
-  real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0
+  ! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0
   real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
   ! real (kind=RKIND), parameter :: c_k = 0.1
   real (kind=RKIND), parameter :: c_k = 0.25
@@ -420,6 +420,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                                    config_del4u_div_factor, zgrid,                                              &
                                    eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
                                    fzm, fzp, config_les_model, config_les_surface,                              &
+                                   config_surface_drag_coefficient,                                             &
                                    delsq_u, delsq_vorticity, delsq_divergence,                                  &
                                    u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm,         &
                                    tend_u_euler                                                                )
@@ -446,6 +447,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
       real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
       real (kind=RKIND), intent(in) :: config_del4u_div_factor
+      real (kind=RKIND), intent(in) :: config_surface_drag_coefficient
 
       real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
       real (kind=RKIND), dimension(vertexDegree,nVertices+1), intent(in) :: edgesOnVertex_sign
@@ -691,7 +693,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
                 if( config_les_surface == "specified" ) then
                   velocity_magnitude = sqrt(u(1,iEdge)**2 + v(1,iEdge)**2)
-                  turb_vflux(1) = -rho_edge(1,iEdge)*tke_drag_coefficient*u(1,iEdge)*velocity_magnitude
+                  turb_vflux(1) = -rho_edge(1,iEdge)*config_surface_drag_coefficient*u(1,iEdge)*velocity_magnitude
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
                 else if ( config_les_surface == "varying" ) then
                   ust_edge = 0.5*(ustm(cell1) + ustm(cell2))
@@ -898,6 +900,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                             bv_freq2, config_len_disp, scalars, tend_scalars,         &
                                             index_tke, index_qv, num_scalars_dummy, mix_scalars,      &
                                             config_les_model, config_les_surface, time_of_day_seconds,&
+                                            config_surface_heat_flux, config_surface_moisture_flux,   &
                                             uReconstructZonal, uReconstructMeridional,                &
                                             hfx, qfx,                                                 &
                                             tend_theta_euler, dynamics_substep                       )
@@ -917,6 +920,8 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       integer, intent(in) :: dynamics_substep
 
       real (kind=RKIND), intent(in) :: time_of_day_seconds
+      real (kind=RKIND), intent(in) :: config_surface_heat_flux
+      real (kind=RKIND), intent(in) :: config_surface_moisture_flux
 
       logical, intent(in) :: config_mix_full, mix_scalars
 
@@ -1185,8 +1190,8 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                 if( config_les_surface == "specified" .or. config_les_surface == "varying" ) then
 
                   if( config_les_surface == "specified" ) then
-                    moisture_flux = 0.
-                    heat_flux = tke_heat_flux
+                    moisture_flux = config_surface_moisture_flux
+                    heat_flux = config_surface_heat_flux
 !                 place holder routine for time-varying specified                  
 !                 call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
 
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 05dd4feb36..e5b9c98733 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4829,6 +4829,9 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       character (len=StrKIND), pointer :: config_horiz_mixing
       character (len=StrKIND), pointer :: config_les_model
       character (len=StrKIND), pointer :: config_les_surface
+      real (kind=RKIND), pointer :: config_surface_heat_flux
+      real (kind=RKIND), pointer :: config_surface_moisture_flux
+      real (kind=RKIND), pointer :: config_surface_drag_coefficient
       real (kind=RKIND), pointer :: config_del4u_div_factor
       real (kind=RKIND), pointer :: config_h_theta_eddy_visc4
       real (kind=RKIND), pointer :: config_h_mom_eddy_visc4
@@ -4853,6 +4856,9 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       call mpas_pool_get_config(configs, 'config_horiz_mixing', config_horiz_mixing)
       call mpas_pool_get_config(configs, 'config_les_model', config_les_model)
       call mpas_pool_get_config(configs, 'config_les_surface', config_les_surface)
+      call mpas_pool_get_config(configs, 'config_surface_heat_flux', config_surface_heat_flux)
+      call mpas_pool_get_config(configs, 'config_surface_moisture_flux', config_surface_moisture_flux)
+      call mpas_pool_get_config(configs, 'config_surface_drag_coefficient', config_surface_drag_coefficient)
       call mpas_pool_get_config(configs, 'config_del4u_div_factor', config_del4u_div_factor)
       call mpas_pool_get_config(configs, 'config_h_theta_eddy_visc4', config_h_theta_eddy_visc4)
       call mpas_pool_get_config(configs, 'config_h_mom_eddy_visc4', config_h_mom_eddy_visc4)
@@ -5014,6 +5020,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
          config_mpas_cam_coef, &
          config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, & 
          config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, &
+         config_surface_heat_flux, config_surface_moisture_flux, config_surface_drag_coefficient, &
          rthdynten, ustm, hfx, qfx, &
          cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
          cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
@@ -5042,6 +5049,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       config_mpas_cam_coef, &
       config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, &
       config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, &
+      config_surface_heat_flux, config_surface_moisture_flux, config_surface_drag_coefficient, &
       rthdynten, ustm, hfx, qfx, &
       cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
       cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
@@ -5161,6 +5169,9 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       character (len=StrKIND) :: config_horiz_mixing
       character (len=StrKIND) :: config_les_model
       character (len=StrKIND) :: config_les_surface
+      real (kind=RKIND) :: config_surface_heat_flux
+      real (kind=RKIND) :: config_surface_moisture_flux
+      real (kind=RKIND) :: config_surface_drag_coefficient
       real (kind=RKIND) :: config_del4u_div_factor
       real (kind=RKIND) :: config_h_theta_eddy_visc4
       real (kind=RKIND) :: config_h_mom_eddy_visc4
@@ -5542,6 +5553,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                     config_del4u_div_factor, zgrid,                                              &
                                     eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
                                     fzm, fzp, config_les_model, config_les_surface,                              &
+                                    config_surface_drag_coefficient,                                             &
                                     delsq_u, delsq_vorticity, delsq_divergence,                                  &
                                     u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, tend_u_euler )
 
@@ -5890,6 +5902,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                             bn2, config_len_disp, scalars, tend_scalars,              &
                                             index_tke, index_qv, num_scalars, config_mix_scalars,     &
                                             config_les_model, config_les_surface, time_of_day_seconds,&
+                                            config_surface_heat_flux, config_surface_moisture_flux,   &
                                             ur_cell, vr_cell,                                         &
                                             hfx, qfx,                                                 &
                                             tend_theta_euler, dynamics_substep                       )

From eef4473a784bee902150feb5bd5294e7f149be17 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Thu, 4 Dec 2025 15:32:48 -0700
Subject: [PATCH 088/146] Add explicit declarations of iCell, k, and p in the
 calculate_n2 routine

By ensuring that all variables are explicitly declared in the calculate_n2
routine, the code can be built with compiler flags (e.g., -fimplicit-none) that
forbid implicitly declared variables.
---
 src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 50c0619962..7753e692c5 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -340,7 +340,8 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
       real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
 ! local
-      real (kind=RKIND) :: dz, rdz, esw
+      integer :: iCell, k
+      real (kind=RKIND) :: dz, rdz, esw, p
       real (kind=RKIND), parameter :: qc_cr = 0.00001  ! in kg/kg
       real (kind=RKIND), dimension(nVertLevels) :: theta, qvsw, temp, coefa
       logical :: dry_bv_frequency

From a6eec0e0794771092ef43480c2a478fd75d3f90e Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Thu, 4 Dec 2025 17:01:35 -0700
Subject: [PATCH 089/146] Avoid passing null pointers for ustm, hfx, and qfx to
 atm_compute_dyn_tend_work

The dummy arguments for ustm, hfx, and qfx in the atm_compute_dyn_tend_work
routine are not pointers, and so passing unassociated pointers for the actual
arguments is invalid and will trigger a runtime error when debugging options are
enabled with the GNU compilers.

The workaround adopted in this commit is to allocate the ustm, hfx, and qfx
array pointers with a trivial size if these fields are not available (due to
packages associated with PBL schemes), and to deallocate them after the call to
atm_compute_dyn_tend_work. The dummy arguments for ustm, hfx, and qfx in
atm_compute_dyn_tend_work (as well as in u_dissipation_3d and
scalar_dissipation_3d_les) are now assumed-shape arrays, as the actual arguments
may not have (nCells+1) elements.
---
 .../dynamics/mpas_atm_dissipation_models.F    |  4 ++--
 .../dynamics/mpas_atm_time_integration.F      | 23 ++++++++++++++++++-
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 7753e692c5..1b35cedd3a 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -484,7 +484,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_divergence
 
       real (kind=RKIND), dimension(nVertLevels), intent(in) :: u_init, v_init
-      real (kind=RKIND), dimension(nCells+1), intent(in) :: ustm
+      real (kind=RKIND), dimension(:), intent(in) :: ustm
 
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(out) :: tend_u_euler
 
@@ -964,7 +964,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
-      real (kind=RKIND), dimension(nCells+1), intent(in) :: hfx, qfx
+      real (kind=RKIND), dimension(:), intent(in) :: hfx, qfx
 
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(inout) :: tend_theta_euler
 
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index e5b9c98733..f54804cc93 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4847,6 +4847,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       integer, pointer :: index_qv, index_qc, index_tke
 
       logical :: inactive_rthdynten
+      logical :: nopbl
 
 
       call mpas_pool_get_config(mesh, 'sphere_radius', r_earth)
@@ -4910,10 +4911,24 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
 
       call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
 
+      nullify(ustm)
       call mpas_pool_get_array(diag_physics,'ustm',ustm)
+      nullify(hfx)
       call mpas_pool_get_array(diag_physics,'hfx',hfx)
+      nullify(qfx)
       call mpas_pool_get_array(diag_physics,'qfx',qfx)
 
+      nopbl = .false.
+      if (.not. associated(ustm) &
+          .or. .not. associated(hfx) &
+          .or. .not. associated(qfx)) then
+
+          allocate(ustm(1))
+          allocate(hfx(1))
+          allocate(qfx(1))
+          nopbl = .true.
+      end if
+
       call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
       call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)
       call mpas_pool_get_array(mesh, 'cellsOnCell', cellsOnCell)
@@ -5025,6 +5040,12 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
          cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
          cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
 
+         if (nopbl) then
+             deallocate(ustm)
+             deallocate(hfx)
+             deallocate(qfx)
+         end if
+
    end subroutine atm_compute_dyn_tend
 
 
@@ -5108,7 +5129,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_horz
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_vert
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: bn2
-      real (kind=RKIND), dimension(nCells+1) :: ustm, hfx, qfx
+      real (kind=RKIND), dimension(:) :: ustm, hfx, qfx
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: prandtl_3d_inv
       real (kind=RKIND), dimension(maxEdges,nCells+1) :: edgesOnCell_sign
       real (kind=RKIND), dimension(vertexDegree,nVertices+1) :: edgesOnVertex_sign

From 08e681bcb71739b7c447837a92db404039e36fda Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Thu, 4 Dec 2025 19:15:30 -0700
Subject: [PATCH 090/146] Remove unused variables from new LES code for init
 and mixing

---
 .../dynamics/mpas_atm_dissipation_models.F     |  5 ++---
 src/core_init_atmosphere/mpas_init_atm_cases.F | 18 +++++++-----------
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 1b35cedd3a..5328f0e4a0 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -150,7 +150,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
       real (kind=RKIND) :: rdz, def2, pr_inv, wk
       real (kind=RKIND) :: shear_production, buoyancy, dissipation, delta_z, delta_s, bv, tke_length, diss_length
       real (kind=RKIND) :: l_horizontal, l_vertical, c_dissipation
-      real (kind=RKIND) :: prandtl_horizontal_inv, prandtl_vertical_inv
+      real (kind=RKIND) :: prandtl_horizontal_inv
       real (kind=RKIND) :: eddy_visc_h, eddy_visc_v
 
       logical, parameter :: test_tke=.true.
@@ -978,9 +978,8 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux, prandtl_1d_inverse
       real (kind=RKIND), dimension(num_scalars,nVertLevels+1) :: turb_vflux_scalars
       real (kind=RKIND) :: rho_k_at_w, zz_at_w
-      real (kind=RKIND) :: delta_z, delta_s, tke_length, bv_frequency2
 
-      real (kind=RKIND) :: moisture_flux, heat_flux, theta_m_flux, c_h, c_q
+      real (kind=RKIND) :: moisture_flux, heat_flux, theta_m_flux
       real (kind=RKIND) :: qv_cell, theta_m_cell, theta_cell
 
         if(debug_dissipation) then
diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index 9f936dcf93..9c372cb163 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -6261,36 +6261,32 @@ subroutine init_atm_case_les(dminfo, mesh, fg, nCells, nVertLevels, state, diag,
       integer, dimension(:), pointer :: landmask, lu_index
 
       !This is temporary variable here. It just need when calculate tangential velocity v.
-      integer :: eoe, j
+      integer :: eoe
       integer, dimension(:), pointer :: nEdgesOnEdge 
       integer, dimension(:,:), pointer :: edgesOnEdge, cellsOnEdge
       real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge
       real (kind=RKIND), pointer :: x_period, y_period
 
-      integer :: iCell, iCell1, iCell2 , iEdge, vtx1, vtx2, ivtx, i, k, nz, nz1, itr, cell1, cell2
+      integer :: iCell, iCell1, iCell2 , iEdge, ivtx, i, k, nz, nz1, itr, cell1, cell2
       integer, pointer :: nEdges, nVertices, maxEdges, nCellsSolve
       integer, pointer :: index_qv
       integer, pointer :: index_tke
 
-      real (kind=RKIND), dimension(nVertLevels + 1 ) :: znu, znw, znwc, znwv
-      real (kind=RKIND), dimension(nVertLevels + 1 ) :: znuc, znuv
-
       real (kind=RKIND), dimension(nVertLevels + 1 ) :: zc, zw, ah
       real (kind=RKIND), dimension(nVertLevels ) :: zu, dzw, rdzwp, rdzwm
 
-      real (kind=RKIND), dimension(nVertLevels, nCells) :: relhum, thi, tbi, cqwb
+      real (kind=RKIND), dimension(nVertLevels, nCells) :: thi, tbi, cqwb
 
-      real (kind=RKIND) ::  r, xnutr
+      real (kind=RKIND) ::  xnutr
       real (kind=RKIND) ::  ztemp, zd, zt, dz, str
 
       real (kind=RKIND), dimension(nVertLevels ) :: qvb, qvp, zg
       real (kind=RKIND), dimension(nVertLevels ) :: t_init_1d
 
-      real (kind=RKIND) :: d1, d2, d3, cof1, cof2
+      real (kind=RKIND) :: cof1, cof2
       real (kind=RKIND), pointer :: cf1, cf2, cf3
-      real (kind=RKIND) :: ztr, thetar, ttr, thetas, um, us, zts, pitop, pibtop, ptopb, ptop, rcp, rcv, p0
-      real (kind=RKIND) :: radx, radz, zcent, xmid, delt, xloc, rad, yloc, ymid, a_scale
-      real (kind=RKIND) :: pres, temp, es, qvs
+      real (kind=RKIND) :: pitop, pibtop, ptopb, ptop, rcp, rcv, p0
+      real (kind=RKIND) :: a_scale
 
       real (kind=RKIND), dimension(:), pointer :: xCell, yCell, zCell
       real (kind=RKIND), dimension(:), pointer :: xEdge, yEdge, zEdge

From ec50d82b2e78a72e49990ad4bf91befa7acee0a2 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Fri, 5 Dec 2025 12:13:38 -0700
Subject: [PATCH 091/146] Remove trailing whitespace from
 mpas_atm_dissipation_models.F

---
 .../dynamics/mpas_atm_dissipation_models.F    | 150 +++++++++---------
 1 file changed, 75 insertions(+), 75 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 5328f0e4a0..c8ef75d1ab 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -38,7 +38,7 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
       use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here
 
       implicit none
-      
+
       integer, intent(in) :: cellStart, cellEnd, nCells, nEdges
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v
@@ -79,7 +79,7 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
                end do
 !DIR$ IVDEP
                do k=1, nVertLevels
-                  ! here is the Smagorinsky formulation, 
+                  ! here is the Smagorinsky formulation,
                   ! followed by imposition of an upper bound on the eddy viscosity
                   d_11(k) = 2*dudx(k)
                   d_22(k) = 2*dvdy(k)
@@ -109,7 +109,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
                              nCells, nEdges, nVertLevels, maxEdges, num_scalars                )
 
       implicit none
-      
+
       character (len=StrKIND), intent(in) :: config_les_model
       character (len=StrKIND), intent(in) :: config_les_surface
 
@@ -143,7 +143,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
       ! local variables
 
       integer :: iCell, iEdge, k, ie, cell1, cell2
-      real (kind=RKIND), dimension(nVertLevels) :: d_11, d_22, d_33, d_12, d_13, d_23 
+      real (kind=RKIND), dimension(nVertLevels) :: d_11, d_22, d_33, d_12, d_13, d_23
       real (kind=RKIND), dimension(nVertLevels) :: dudx, dudy, dvdx, dvdy
       real (kind=RKIND), dimension(nVertLevels+1) :: dwdx, dwdy
       real (kind=RKIND), dimension(nVertLevels) :: dudz, dvdz, dwdz
@@ -179,7 +179,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
         dudz(1:nVertLevels) = 0.0
         dvdz(1:nVertLevels) = 0.0
         dwdz(1:nVertLevels) = 0.0
-               
+
         do iEdge=1,nEdgesOnCell(iCell)
 
           ie = EdgesOnCell(iEdge,iCell)
@@ -242,7 +242,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
             eddy_visc_horz(k,iCell) = (c_s * config_len_disp)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
             eddy_visc_horz(k,iCell) = min(eddy_visc_horz(k,iCell),(0.01*config_len_disp**2) * invDt)
             delta_z = zgrid(k+1,iCell)-zgrid(k,iCell)
-            eddy_visc_vert(k,iCell) = (c_s * delta_z)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))            
+            eddy_visc_vert(k,iCell) = (c_s * delta_z)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
             ! eddy_visc_vert(k,iCell) = eddy_visc_horz(k,iCell)
           end do
 
@@ -266,7 +266,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
             diss_length = min(delta_s,max(tke_length,0.01*delta_s))
             if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
 
-            ! non-isotropic mixing                                                                            
+            ! non-isotropic mixing
 
             l_horizontal = config_len_disp
             l_vertical = min(delta_z,tke_length)
@@ -294,30 +294,30 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
 
             buoyancy = -eddy_visc_v*bv_freq2(k,iCell)
 
-            ! dissipation                                                                                     
+            ! dissipation
 
             c_dissipation = 1.9*c_k + max( 0.0, 0.93 - 1.9*c_k )*diss_length/delta_s
             !  if( (k.eq. 1) .or. (k.eq.nVertLevels) ) c_dissipation = 3.9
 
             dissipation = -c_dissipation*(scalars(index_tke,k,iCell)**(1.5))/diss_length
 
-            ! computing eddy viscosities *********                                                            
+            ! computing eddy viscosities *********
 
             prandtl_horizontal_inv = 3.
-            prandtl_3d_inv(k,iCell) = 1.0+(2.0*l_vertical/delta_z) 
-            
+            prandtl_3d_inv(k,iCell) = 1.0+(2.0*l_vertical/delta_z)
+
 
-            ! RHS term for the subgrid ke.                                                                    
+            ! RHS term for the subgrid ke.
 
             if(dynamics_substep == 1) &
-                 tend_scalars(index_tke,k,iCell) = rho_zz(k,iCell)*( shear_production + buoyancy + dissipation )            
+                 tend_scalars(index_tke,k,iCell) = rho_zz(k,iCell)*( shear_production + buoyancy + dissipation )
 
           end do
 
         else
 
           call mpas_log_write(' in les_models, no les scheme for '//trim(config_les_model), messageType=MPAS_LOG_CRIT)
-          
+
         end if ! end of config_les_model test
 
       end do ! loop over all owned cells (columns)
@@ -362,7 +362,7 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
                   qvsw(k) = ep_2 * esw / (p - esw)
 
                   coefa(k) = ( 1.0 + xlv * qvsw(k)/ R_d / temp(k) ) / &
-                     ( 1.0 + xlv * xlv *qvsw(k) / Cp / R_v / temp(k) / temp(k) ) 
+                     ( 1.0 + xlv * xlv *qvsw(k) / Cp / R_v / temp(k) / temp(k) )
 
                end do
 
@@ -377,7 +377,7 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
                   !               - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
                   ! else
                   !   ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
-                  !   bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) / dz & 
+                  !   bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) / dz &
                   !               + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) / dz ) &
                   !               - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
                   ! endif
@@ -394,7 +394,7 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
                                  - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) * rdz )
                   else
                      ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
-                     bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) * rdz & 
+                     bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) * rdz &
                                  + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) * rdz ) &
                                  - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) * rdz )
                   endif
@@ -426,7 +426,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                                    u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm,         &
                                    tend_u_euler                                                                )
 
-      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here
 
       implicit none
 
@@ -488,7 +488,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
       real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(out) :: tend_u_euler
 
-      ! local variables                                                                                                                               
+      ! local variables
 
       integer :: iEdge, cell1, cell2, vertex1, vertex2, iVertex, iCell, i, k
       real (kind=RKIND) :: r_dc, r_dv, u_diffusion, u_diffusion_les, kdiffu, r, edge_sign, u_mix_scale
@@ -511,10 +511,10 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
             call mpas_log_write(' 4th order divergence factor is $r ', realArgs=(/config_del4u_div_factor/))
         end if
 
-!$OMP BARRIER                                                                                                                                         
+!$OMP BARRIER
 
-         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).                                                                          
-         ! First, storage to hold the result from the first del^2 computation.                                                                        
+         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).
+         ! First, storage to hold the result from the first del^2 computation.
 
          delsq_u(1:nVertLevels,edgeStart:edgeEnd) = 0.0
          tau_12_factor = 0.0
@@ -528,11 +528,11 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
             r_dc = invDcEdge(iEdge)
             r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
 
-!DIR$ IVDEP                                                                                                                                           
+!DIR$ IVDEP
             do k=1,nVertLevels
 
-               ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity                                                         
-               !                    only valid for h_mom_eddy_visc4 == constant                                                                       
+               ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
+               !                    only valid for h_mom_eddy_visc4 == constant
                u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) * r_dc  &
                               -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv
                ! for LES models we need 2 times the gradient of divergence, in contrast to what is
@@ -543,16 +543,16 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
                kdiffu = 0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2))
 
-               ! include 2nd-order diffusion here                                                                                                      
+               ! include 2nd-order diffusion here
                tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) &
                                        + rho_edge(k,iEdge)* kdiffu * u_diffusion_les * meshScalingDel2(iEdge)
 
             end do
          end do
 
-         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active                                                                               
+         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
-!$OMP BARRIER                                                                                                                                         
+!$OMP BARRIER
 
             do iVertex=vertexStart,vertexEnd
                delsq_vorticity(1:nVertLevels,iVertex) = 0.0
@@ -577,7 +577,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                end do
             end do
 
-!$OMP BARRIER                                                                                                                                         
+!$OMP BARRIER
 
             do iEdge=edgeSolveStart,edgeSolveEnd
                cell1 = cellsOnEdge(1,iEdge)
@@ -589,16 +589,16 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                r_dc = u_mix_scale * config_del4u_div_factor * invDcEdge(iEdge)
                r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
 
-!DIR$ IVDEP                                                                                                                                           
+!DIR$ IVDEP
                do k=1,nVertLevels
 
-                  ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity                                                      
-                  !                    only valid for h_mom_eddy_visc4 == constant                                                                    
-                  !                                                                                                                                   
-                  ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor                                           
-                  !    relative to the rotational part.  The stability constraint on the divergence component is much less                            
-                  !    stringent than the rotational part, and this flexibility may be useful.                                                        
-                  !                                                                                                                                   
+                  ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
+                  !                    only valid for h_mom_eddy_visc4 == constant
+                  !
+                  ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor
+                  !    relative to the rotational part.  The stability constraint on the divergence component is much less
+                  !    stringent than the rotational part, and this flexibility may be useful.
+                  !
                   u_diffusion =  rho_edge(k,iEdge) *  ( ( delsq_divergence(k,cell2)  - delsq_divergence(k,cell1) ) * r_dc  &
                                                        -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) * r_dv )
                   tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - u_diffusion
@@ -606,14 +606,14 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                end do
             end do
 
-         end if ! 4th order mixing is active                                                                                                          
+         end if ! 4th order mixing is active
 
-      !                                                                                                                                               
-      !  vertical mixing for u - 2nd order filter in physical (z) space                                                                               
-      !                                                                                                                                               
+      !
+      !  vertical mixing for u - 2nd order filter in physical (z) space
+      !
          if ( v_mom_eddy_visc2 > 0.0 ) then
 
-            if (config_mix_full) then  ! mix full state                                                                                               
+            if (config_mix_full) then  ! mix full state
 
                do iEdge=edgeSolveStart,edgeSolveEnd
 
@@ -637,7 +637,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                   end do
                end do
 
-            else  ! idealized cases where we mix on the perturbation from the initial 1-D state                                                       
+            else  ! idealized cases where we mix on the perturbation from the initial 1-D state
 
                do iEdge=edgeSolveStart,edgeSolveEnd
 
@@ -666,7 +666,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                   end do
                end do
 
-            end if  ! mix perturbation state                                                                                             
+            end if  ! mix perturbation state
 
          end if  ! vertical mixing of horizontal momentum for les formulation
 
@@ -715,7 +715,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
                end do
 
-         end if 
+         end if
 
         if(debug_dissipation)  call mpas_log_write(' exiting u_dissipation_3d ')
 
@@ -737,10 +737,10 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
                                    tend_w_euler                                             )
 
 
-      ! 3D w dissipation using the 3D smagorinsky eddy viscosities.  
+      ! 3D w dissipation using the 3D smagorinsky eddy viscosities.
       ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter
 
-      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here
 
       implicit none
 
@@ -778,21 +778,21 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
       real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(inout) :: tend_w_euler
 
-      ! storage passed in from calling routine                                                                                                        
+      ! storage passed in from calling routine
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_w
       real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
 
-      ! local variables                                                                                                                               
+      ! local variables
 
       integer :: cell1, cell2, iEdge, iCell, i, k
       real (kind=RKIND) :: r_areaCell, edge_sign, w_turb_flux
 
-!  !OMP BARRIER  why is this openmp barrier here???                                                                                                   
+!  !OMP BARRIER  why is this openmp barrier here???
 
-         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (w) ).                                                                          
-       !                                                                                                                                            
-       ! First, storage to hold the result from the first del^2 computation.                                                                        
-       !  we copied code from the theta mixing, hence the theta* names.                                                                             
+         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (w) ).
+       !
+       ! First, storage to hold the result from the first del^2 computation.
+       !  we copied code from the theta mixing, hence the theta* names.
 
         if(debug_dissipation) then
             call mpas_log_write(' begin w_dissipation_3d ')
@@ -814,7 +814,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
                cell1 = cellsOnEdge(1,iEdge)
                cell2 = cellsOnEdge(2,iEdge)
 
-!DIR$ IVDEP                                                                                                                                           
+!DIR$ IVDEP
               do k=2,nVertLevels
 
                   w_turb_flux =  edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
@@ -827,11 +827,11 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
             end do
          end do
 
-!$OMP BARRIER                                                                                                                                         
+!$OMP BARRIER
 
-         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active                                                                               
+         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
-            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...                                                 
+            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
                r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell)
                do i=1,nEdgesOnCell(iCell)
                   iEdge = edgesOnCell(i,iCell)
@@ -847,12 +847,12 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
                end do
             end do
 
-         end if ! 4th order mixing is active                                                                                                          
+         end if ! 4th order mixing is active
 
       if ( v_mom_eddy_visc2 > 0.0 ) then  ! vertical mixing
 
          do iCell=cellSolveStart,cellSolveEnd
-!DIR$ IVDEP                                                                                                                                           
+!DIR$ IVDEP
             do k=2,nVertLevels
                tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*(  &
                                            (w(k+1,iCell)-w(k  ,iCell))*rdzw(k)                              &
@@ -907,10 +907,10 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                             tend_theta_euler, dynamics_substep                       )
 
 
-      ! 3D theta_m dissipation using the 3D smagorinsky eddy viscosities.  
+      ! 3D theta_m dissipation using the 3D smagorinsky eddy viscosities.
       ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter
 
-      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here                                                                             
+      use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here
       implicit none
 
       integer, intent(in) :: cellStart, cellEnd
@@ -968,10 +968,10 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
       real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(inout) :: tend_theta_euler
 
-      ! storage passed in from calling routine                                                                                                        
+      ! storage passed in from calling routine
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_theta
 
-      ! local variables                                                                                                                               
+      ! local variables
       integer :: cell1, cell2, iEdge, iCell, i, k, iScalar
       real (kind=RKIND) :: r_areaCell, edge_sign, theta_turb_flux, pr_scale
       real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
@@ -1017,7 +1017,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
           end do
 
 !$OMP BARRIER
-            
+
          if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
             do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
@@ -1036,12 +1036,12 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                end do
             end do
 
-         end if ! 4th order mixing is active 
+         end if ! 4th order mixing is active
+
+         if(mix_scalars .and. (dynamics_substep == 1)) then  ! dissipation for scalars, including 4th-order filter.  Likely needs optimization
 
-         if(mix_scalars .and. (dynamics_substep == 1)) then  ! dissipation for scalars, including 4th-order filter.  Likely needs optimization              
-         
          do iScalar=1,num_scalars
-         
+
          delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
 
          do iCell=cellStart,cellEnd
@@ -1069,7 +1069,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
           end do
 
 !$OMP BARRIER
-            
+
          if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
             do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
@@ -1088,8 +1088,8 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                end do
             end do
 
-         end if ! 4th order mixing is active 
-         
+         end if ! 4th order mixing is active
+
         end do ! loop over scalars for horizontal mixing
 
       end if ! horizontal scalar mixing
@@ -1192,7 +1192,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   if( config_les_surface == "specified" ) then
                     moisture_flux = config_surface_moisture_flux
                     heat_flux = config_surface_heat_flux
-!                 place holder routine for time-varying specified                  
+!                 place holder routine for time-varying specified
 !                 call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
 
                   else if ( config_les_surface == "varying" ) then
@@ -1206,7 +1206,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
                   theta_m_flux = heat_flux*(1.0+(rv/rgas)*qv_cell)+(rv/rgas)*theta_cell*moisture_flux
                   turb_vflux(1) = theta_m_flux*rho_zz(1,iCell)
-                  moisture_flux = moisture_flux*rho_zz(1,iCell) 
+                  moisture_flux = moisture_flux*rho_zz(1,iCell)
                   turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
 
                 else
@@ -1249,7 +1249,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
              end if ! mix scalars
 
              end do  ! loop over cells (columns)
-             
+
          end if
 
         if(debug_dissipation)  call mpas_log_write(' exiting scalar_dissipation_3d ')

From 384c5ba35bb0a2d06150d4412826c6ad5a6c7244 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Fri, 5 Dec 2025 14:29:13 -0700
Subject: [PATCH 092/146] Clean up indentation in mpas_atm_dissipation_models.F

---
 .../dynamics/mpas_atm_dissipation_models.F    | 1352 +++++++++--------
 1 file changed, 680 insertions(+), 672 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index c8ef75d1ab..70cf9c24b3 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -8,32 +8,33 @@
 
 module mpas_atm_dissipation_models
 
-  use mpas_kind_types, only : RKIND
-  use mpas_atmphys_constants
-  use mpas_constants
-  use mpas_log
-  use mpas_derived_types, only : MPAS_LOG_CRIT
-
-  logical, parameter :: debug_dissipation = .false.
-  logical, parameter :: les_test = .true., les_sas_test = .false.
-  !! real (kind=RKIND), parameter :: tke_heat_flux = 0.03  ! shear case from Moeng et al., first hour
-  ! real (kind=RKIND), parameter :: tke_heat_flux = 0.03
-  !  real (kind=RKIND), parameter :: tke_heat_flux = 0.0
-  !! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013 ! ocean roughness length
-  ! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.006
-  ! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0
-  real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
-  ! real (kind=RKIND), parameter :: c_k = 0.1
-  real (kind=RKIND), parameter :: c_k = 0.25
-
-
-    contains
-
-      subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,                                 &
-                                 deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
-                                 invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
-                                 cellStart, cellEnd, nEdgesOnCell, edgesOnCell,                     &
-                                 nCells, nEdges                                                    )
+   use mpas_kind_types, only : RKIND
+   use mpas_atmphys_constants
+   use mpas_constants
+   use mpas_log
+   use mpas_derived_types, only : MPAS_LOG_CRIT
+
+   logical, parameter :: debug_dissipation = .false.
+   logical, parameter :: les_test = .true., les_sas_test = .false.
+   !! real (kind=RKIND), parameter :: tke_heat_flux = 0.03  ! shear case from Moeng et al., first hour
+   ! real (kind=RKIND), parameter :: tke_heat_flux = 0.03
+   !  real (kind=RKIND), parameter :: tke_heat_flux = 0.0
+   !! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013 ! ocean roughness length
+   ! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.006
+   ! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0
+   real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
+   ! real (kind=RKIND), parameter :: c_k = 0.1
+   real (kind=RKIND), parameter :: c_k = 0.25
+
+
+contains
+
+
+   subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,                                 &
+                              deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
+                              invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
+                              cellStart, cellEnd, nEdgesOnCell, edgesOnCell,                     &
+                              nCells, nEdges                                                    )
 
       use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here
 
@@ -57,56 +58,57 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
       integer :: iCell, iEdge, k
       real (kind=RKIND), dimension(nVertLevels) :: d_11, d_22, d_12, dudx, dudy, dvdx, dvdy
 
-        if(debug_dissipation)  call mpas_log_write(' begin smagorinsky_2d ')
+      if(debug_dissipation)  call mpas_log_write(' begin smagorinsky_2d ')
 
 
-            do iCell = cellStart,cellEnd
-               dudx(1:nVertLevels) = 0.0
-               dudy(1:nVertLevels) = 0.0
-               dvdx(1:nVertLevels) = 0.0
-               dvdy(1:nVertLevels) = 0.0
-               do iEdge=1,nEdgesOnCell(iCell)
-                  do k=1,nVertLevels
-                     dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
-                                       - deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
-                     dudy(k) = dudy(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
-                                       - deformation_coef_s2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
-                     dvdx(k) = dvdx(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
-                                       + deformation_coef_c2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
-                     dvdy(k) = dvdy(k) + deformation_coef_s2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
-                                       + deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
-                  end do
-               end do
-!DIR$ IVDEP
-               do k=1, nVertLevels
-                  ! here is the Smagorinsky formulation,
-                  ! followed by imposition of an upper bound on the eddy viscosity
-                  d_11(k) = 2*dudx(k)
-                  d_22(k) = 2*dvdy(k)
-                  d_12(k) = dudy(k) + dvdx(k)
-                  kdiff(k,iCell) = (c_s * config_len_disp)**2 * sqrt(0.25*(d_11(k)-d_22(k))**2 + d_12(k)**2)
-                  kdiff(k,iCell) = min(kdiff(k,iCell),(0.01*config_len_disp**2) * invDt)
-               end do
+      do iCell = cellStart,cellEnd
+         dudx(1:nVertLevels) = 0.0
+         dudy(1:nVertLevels) = 0.0
+         dvdx(1:nVertLevels) = 0.0
+         dvdy(1:nVertLevels) = 0.0
+         do iEdge=1,nEdgesOnCell(iCell)
+            do k=1,nVertLevels
+               dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+                                 - deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+               dudy(k) = dudy(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+                                 - deformation_coef_s2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+               dvdx(k) = dvdx(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+                                 + deformation_coef_c2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+               dvdy(k) = dvdy(k) + deformation_coef_s2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+                                 + deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
             end do
+         end do
 
-            h_mom_eddy_visc4   = config_visc4_2dsmag * config_len_disp**3
-            h_theta_eddy_visc4 = h_mom_eddy_visc4
+!DIR$ IVDEP
+         do k=1, nVertLevels
+            ! here is the Smagorinsky formulation,
+            ! followed by imposition of an upper bound on the eddy viscosity
+            d_11(k) = 2*dudx(k)
+            d_22(k) = 2*dvdy(k)
+            d_12(k) = dudy(k) + dvdx(k)
+            kdiff(k,iCell) = (c_s * config_len_disp)**2 * sqrt(0.25*(d_11(k)-d_22(k))**2 + d_12(k)**2)
+            kdiff(k,iCell) = min(kdiff(k,iCell),(0.01*config_len_disp**2) * invDt)
+         end do
+      end do
 
-        if(debug_dissipation)  call mpas_log_write(' exiting smagorinsky_2d ')
+      h_mom_eddy_visc4   = config_visc4_2dsmag * config_len_disp**3
+      h_theta_eddy_visc4 = h_mom_eddy_visc4
+
+      if(debug_dissipation)  call mpas_log_write(' exiting smagorinsky_2d ')
 
-      end subroutine smagorinsky_2d
+   end subroutine smagorinsky_2d
 
 !---------------------------------------
 
-      subroutine les_models( config_les_model, config_les_surface, dynamics_substep, eddy_visc_horz, eddy_visc_vert,                  &
-                             u, v, uCell, vCell,                                                &
-                             w, c_s, bv_freq2, zgrid, config_len_disp,                          &
-                             deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
-                             deformation_coef_c, deformation_coef_s, prandtl_3d_inv,            &
-                             invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
-                             scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2,         &
-                             cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
-                             nCells, nEdges, nVertLevels, maxEdges, num_scalars                )
+   subroutine les_models( config_les_model, config_les_surface, dynamics_substep, eddy_visc_horz, eddy_visc_vert,                  &
+                          u, v, uCell, vCell,                                                &
+                          w, c_s, bv_freq2, zgrid, config_len_disp,                          &
+                          deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
+                          deformation_coef_c, deformation_coef_s, prandtl_3d_inv,            &
+                          invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
+                          scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2,         &
+                          cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
+                          nCells, nEdges, nVertLevels, maxEdges, num_scalars                )
 
       implicit none
 
@@ -169,167 +171,167 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
 
       do iCell = cellStart,cellEnd
 
-        dudx(1:nVertLevels) = 0.0
-        dudy(1:nVertLevels) = 0.0
-        dvdx(1:nVertLevels) = 0.0
-        dvdy(1:nVertLevels) = 0.0
-        dwdx(1:nVertLevels+1) = 0.0
-        dwdy(1:nVertLevels+1) = 0.0
-
-        dudz(1:nVertLevels) = 0.0
-        dvdz(1:nVertLevels) = 0.0
-        dwdz(1:nVertLevels) = 0.0
-
-        do iEdge=1,nEdgesOnCell(iCell)
-
-          ie = EdgesOnCell(iEdge,iCell)
-          cell1 = cellsOnEdge(1,ie)
-          cell2 = cellsOnEdge(2,ie)
-
-          do k=1,nVertLevels
-            dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,ie) &
-                              - deformation_coef_cs(iEdge,iCell)*v(k,ie)
-            dudy(k) = dudy(k) + deformation_coef_cs(iEdge,iCell)*u(k,ie) &
-                              - deformation_coef_s2(iEdge,iCell)*v(k,ie)
-            dvdx(k) = dvdx(k) + deformation_coef_cs(iEdge,iCell)*u(k,ie) &
-                              + deformation_coef_c2(iEdge,iCell)*v(k,ie)
-            dvdy(k) = dvdy(k) + deformation_coef_s2(iEdge,iCell)*u(k,ie) &
-                              + deformation_coef_cs(iEdge,iCell)*v(k,ie)
-          end do
-
-          do k=1,nVertLevels+1
-            wk = 0.5*(w(k,cell1)+w(k,cell2))
-             dwdx(k) = dwdx(k) + deformation_coef_c(iEdge,iCell)*wk
-             dwdy(k) = dwdy(k) + deformation_coef_s(iEdge,iCell)*wk
-          end do
-
-        end do
-
-        do k=1,nVertLevels
-          rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
-          dwdz(k) = (w(k+1,iCell)-w(k,iCell))*rdz
-        end do
+         dudx(1:nVertLevels) = 0.0
+         dudy(1:nVertLevels) = 0.0
+         dvdx(1:nVertLevels) = 0.0
+         dvdy(1:nVertLevels) = 0.0
+         dwdx(1:nVertLevels+1) = 0.0
+         dwdy(1:nVertLevels+1) = 0.0
+
+         dudz(1:nVertLevels) = 0.0
+         dvdz(1:nVertLevels) = 0.0
+         dwdz(1:nVertLevels) = 0.0
 
-        do k=2,nVertLevels-1
-          rdz = 1./(zgrid(k+2,iCell)+zgrid(k+1,iCell)-zgrid(k,iCell)-zgrid(k-1,iCell))
-          dudz(k) = (uCell(k+1,iCell)-uCell(k-1,iCell))*rdz
-          dvdz(k) = (vCell(k+1,iCell)-vCell(k-1,iCell))*rdz
-        end do
+         do iEdge=1,nEdgesOnCell(iCell)
 
-        k = 1
-        rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
-        dudz(k) = (uCell(k+1,iCell)-uCell(k,iCell))*rdz
-        dvdz(k) = (vCell(k+1,iCell)-vCell(k,iCell))*rdz
+            ie = EdgesOnCell(iEdge,iCell)
+            cell1 = cellsOnEdge(1,ie)
+            cell2 = cellsOnEdge(2,ie)
 
-        k = nVertLevels-1
-        rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
-        dudz(k+1) = (uCell(k+1,iCell)-uCell(k,iCell))*rdz
-        dvdz(k+1) = (vCell(k+1,iCell)-vCell(k,iCell))*rdz
+            do k=1,nVertLevels
+               dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,ie) &
+                                 - deformation_coef_cs(iEdge,iCell)*v(k,ie)
+               dudy(k) = dudy(k) + deformation_coef_cs(iEdge,iCell)*u(k,ie) &
+                                 - deformation_coef_s2(iEdge,iCell)*v(k,ie)
+               dvdx(k) = dvdx(k) + deformation_coef_cs(iEdge,iCell)*u(k,ie) &
+                                 + deformation_coef_c2(iEdge,iCell)*v(k,ie)
+               dvdy(k) = dvdy(k) + deformation_coef_s2(iEdge,iCell)*u(k,ie) &
+                                 + deformation_coef_cs(iEdge,iCell)*v(k,ie)
+            end do
+
+            do k=1,nVertLevels+1
+               wk = 0.5*(w(k,cell1)+w(k,cell2))
+               dwdx(k) = dwdx(k) + deformation_coef_c(iEdge,iCell)*wk
+               dwdy(k) = dwdy(k) + deformation_coef_s(iEdge,iCell)*wk
+            end do
+
+         end do
+
+         do k=1,nVertLevels
+            rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
+            dwdz(k) = (w(k+1,iCell)-w(k,iCell))*rdz
+         end do
 
-        do k=1, nVertLevels
-          d_11(k) = 2.*dudx(k)
-          d_22(k) = 2.*dvdy(k)
-          d_33(k) = 2.*dwdz(k)
-          d_12(k) = dudy(k) + dvdx(k)
-          d_13(k) = dwdx(k) + dudz(k)
-          d_23(k) = dwdy(k) + dvdz(k)
-        end do
+         do k=2,nVertLevels-1
+            rdz = 1./(zgrid(k+2,iCell)+zgrid(k+1,iCell)-zgrid(k,iCell)-zgrid(k-1,iCell))
+            dudz(k) = (uCell(k+1,iCell)-uCell(k-1,iCell))*rdz
+            dvdz(k) = (vCell(k+1,iCell)-vCell(k-1,iCell))*rdz
+         end do
 
-        if (config_les_model == "3d_smagorinsky") then
+         k = 1
+         rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
+         dudz(k) = (uCell(k+1,iCell)-uCell(k,iCell))*rdz
+         dvdz(k) = (vCell(k+1,iCell)-vCell(k,iCell))*rdz
+
+         k = nVertLevels-1
+         rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
+         dudz(k+1) = (uCell(k+1,iCell)-uCell(k,iCell))*rdz
+         dvdz(k+1) = (vCell(k+1,iCell)-vCell(k,iCell))*rdz
+ 
+         do k=1, nVertLevels
+            d_11(k) = 2.*dudx(k)
+            d_22(k) = 2.*dvdy(k)
+            d_33(k) = 2.*dwdz(k)
+            d_12(k) = dudy(k) + dvdx(k)
+            d_13(k) = dwdx(k) + dudz(k)
+            d_23(k) = dwdy(k) + dvdz(k)
+         end do
 
-          do k=1, nVertLevels
-            def2 = 0.5*(d_11(k)**2 + d_22(k)**2 + d_33(k)**2) + d_12(k)**2 + d_13(k)**2 + d_23(k)**2
-            eddy_visc_horz(k,iCell) = (c_s * config_len_disp)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
-            eddy_visc_horz(k,iCell) = min(eddy_visc_horz(k,iCell),(0.01*config_len_disp**2) * invDt)
-            delta_z = zgrid(k+1,iCell)-zgrid(k,iCell)
-            eddy_visc_vert(k,iCell) = (c_s * delta_z)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
-            ! eddy_visc_vert(k,iCell) = eddy_visc_horz(k,iCell)
-          end do
+         if (config_les_model == "3d_smagorinsky") then
 
-        else if (config_les_model == "prognostic_1.5_order") then
+            do k=1, nVertLevels
+               def2 = 0.5*(d_11(k)**2 + d_22(k)**2 + d_33(k)**2) + d_12(k)**2 + d_13(k)**2 + d_23(k)**2
+               eddy_visc_horz(k,iCell) = (c_s * config_len_disp)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
+               eddy_visc_horz(k,iCell) = min(eddy_visc_horz(k,iCell),(0.01*config_len_disp**2) * invDt)
+               delta_z = zgrid(k+1,iCell)-zgrid(k,iCell)
+               eddy_visc_vert(k,iCell) = (c_s * delta_z)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
+               ! eddy_visc_vert(k,iCell) = eddy_visc_horz(k,iCell)
+            end do
 
-          do k=1,nVertLevels  ! bound the tke here, currently hardwired
-            ! scalars(index_tke,k,iCell) = max(0.,min(100.,scalars(index_tke,k,iCell)))
-            scalars(index_tke,k,iCell) = max(0.,scalars(index_tke,k,iCell))
-          end do
+         else if (config_les_model == "prognostic_1.5_order") then
 
-          do k=1,nVertLevels
+            do k=1,nVertLevels  ! bound the tke here, currently hardwired
+               ! scalars(index_tke,k,iCell) = max(0.,min(100.,scalars(index_tke,k,iCell)))
+               scalars(index_tke,k,iCell) = max(0.,scalars(index_tke,k,iCell))
+            end do
 
-            delta_z = zgrid(k+1,iCell)-zgrid(k,iCell)
-            delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
-            bv = max( sqrt(abs(bv_freq2(k,iCell))), epsilon_bv )
-            tke_length = delta_s
-            ! isentropic mixing formulation
-            if(bv_freq2(k,iCell) .gt. 1.e-06) &
-                tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/bv
-                tke_length = min(tke_length, delta_z)
-            diss_length = min(delta_s,max(tke_length,0.01*delta_s))
-            if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
+            do k=1,nVertLevels
 
-            ! non-isotropic mixing
+               delta_z = zgrid(k+1,iCell)-zgrid(k,iCell)
+               delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
+               bv = max( sqrt(abs(bv_freq2(k,iCell))), epsilon_bv )
+               tke_length = delta_s
+               ! isentropic mixing formulation
+               if(bv_freq2(k,iCell) .gt. 1.e-06) &
+                  tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/bv
+                  tke_length = min(tke_length, delta_z)
+               diss_length = min(delta_s,max(tke_length,0.01*delta_s))
+               if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
 
-            l_horizontal = config_len_disp
-            l_vertical = min(delta_z,tke_length)
-            if(bv_freq2(k,iCell) <= 0) diss_length = delta_z
+               ! non-isotropic mixing
 
-            ! isotropic mixing
+               l_horizontal = config_len_disp
+               l_vertical = min(delta_z,tke_length)
+               if(bv_freq2(k,iCell) <= 0) diss_length = delta_z
 
-            ! l_horizontal = min(delta_s,tke_length)
-            ! if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
-            ! l_vertical = l_horizontal
+               ! isotropic mixing
 
-            ! eddy viscocities set here if we are running the 1.5 order prognostic tke scheme
-            eddy_visc_h = c_k*l_horizontal*sqrt(scalars(index_tke,k,iCell))
-            eddy_visc_h = min(eddy_visc_h,(0.01*config_len_disp**2) * invDt)
-            eddy_visc_v = c_k*l_vertical*sqrt(scalars(index_tke,k,iCell))
-            eddy_visc_v = min(eddy_visc_v,(0.01*delta_z**2) * invDt)
+               ! l_horizontal = min(delta_s,tke_length)
+               ! if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
+               ! l_vertical = l_horizontal
 
-            eddy_visc_horz(k,iCell) = eddy_visc_h
-            eddy_visc_vert(k,iCell) = eddy_visc_v
+               ! eddy viscocities set here if we are running the 1.5 order prognostic tke scheme
+               eddy_visc_h = c_k*l_horizontal*sqrt(scalars(index_tke,k,iCell))
+               eddy_visc_h = min(eddy_visc_h,(0.01*config_len_disp**2) * invDt)
+               eddy_visc_v = c_k*l_vertical*sqrt(scalars(index_tke,k,iCell))
+               eddy_visc_v = min(eddy_visc_v,(0.01*delta_z**2) * invDt)
 
-            ! terms for the prognostic tke integration
+               eddy_visc_horz(k,iCell) = eddy_visc_h
+               eddy_visc_vert(k,iCell) = eddy_visc_v
 
-            shear_production = eddy_visc_h*(d_11(k)**2 + d_22(k)**2 + d_12(k)**2) &
-                              +eddy_visc_v*(d_33(k)**2 + d_13(k)**2 + d_23(k)**2)
+               ! terms for the prognostic tke integration
 
-            buoyancy = -eddy_visc_v*bv_freq2(k,iCell)
+               shear_production = eddy_visc_h*(d_11(k)**2 + d_22(k)**2 + d_12(k)**2) &
+                                 +eddy_visc_v*(d_33(k)**2 + d_13(k)**2 + d_23(k)**2)
 
-            ! dissipation
+               buoyancy = -eddy_visc_v*bv_freq2(k,iCell)
 
-            c_dissipation = 1.9*c_k + max( 0.0, 0.93 - 1.9*c_k )*diss_length/delta_s
-            !  if( (k.eq. 1) .or. (k.eq.nVertLevels) ) c_dissipation = 3.9
+               ! dissipation
 
-            dissipation = -c_dissipation*(scalars(index_tke,k,iCell)**(1.5))/diss_length
+               c_dissipation = 1.9*c_k + max( 0.0, 0.93 - 1.9*c_k )*diss_length/delta_s
+               !  if( (k.eq. 1) .or. (k.eq.nVertLevels) ) c_dissipation = 3.9
 
-            ! computing eddy viscosities *********
+               dissipation = -c_dissipation*(scalars(index_tke,k,iCell)**(1.5))/diss_length
 
-            prandtl_horizontal_inv = 3.
-            prandtl_3d_inv(k,iCell) = 1.0+(2.0*l_vertical/delta_z)
+               ! computing eddy viscosities *********
 
+               prandtl_horizontal_inv = 3.
+               prandtl_3d_inv(k,iCell) = 1.0+(2.0*l_vertical/delta_z)
 
-            ! RHS term for the subgrid ke.
 
-            if(dynamics_substep == 1) &
-                 tend_scalars(index_tke,k,iCell) = rho_zz(k,iCell)*( shear_production + buoyancy + dissipation )
+               ! RHS term for the subgrid ke.
 
-          end do
+               if(dynamics_substep == 1) &
+                    tend_scalars(index_tke,k,iCell) = rho_zz(k,iCell)*( shear_production + buoyancy + dissipation )
 
-        else
+            end do
 
-          call mpas_log_write(' in les_models, no les scheme for '//trim(config_les_model), messageType=MPAS_LOG_CRIT)
+         else
 
-        end if ! end of config_les_model test
+            call mpas_log_write(' in les_models, no les scheme for '//trim(config_les_model), messageType=MPAS_LOG_CRIT)
+
+         end if ! end of config_les_model test
 
       end do ! loop over all owned cells (columns)
 
       if(debug_dissipation)  call mpas_log_write(' les_models ')
 
-      end subroutine les_models
+   end subroutine les_models
 
 !---------------------------------------
 
-      subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
-                               cellStart, cellEnd, nCells)
+   subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
+                            cellStart, cellEnd, nCells)
 
       use mpas_atm_dimensions  !  pull nVertLevels and num_scalars from here
 
@@ -346,85 +348,86 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
       real (kind=RKIND), dimension(nVertLevels) :: theta, qvsw, temp, coefa
       logical :: dry_bv_frequency
 
-         if(debug_dissipation)  call mpas_log_write(' begin BV frequency calculations ')
+      if(debug_dissipation)  call mpas_log_write(' begin BV frequency calculations ')
 
-            do iCell = cellStart,cellEnd
+      do iCell = cellStart,cellEnd
 !DIR$ IVDEP
-               do k=1, nVertLevels
+         do k=1, nVertLevels
 
-                  theta(k) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell))
+            theta(k) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell))
 
-                  temp(k) = exner(k,iCell) * theta(k)
+            temp(k) = exner(k,iCell) * theta(k)
 
-                  p = pressure_b(k,iCell) + pp(k,iCell)
-                  esw = 1000. * svp1 * exp(svp2 * (temp(k) - svpt0) / (temp(k) - svp3))
-                  if (p < esw) esw = p * 0.99     ! fix for pressure < esw
-                  qvsw(k) = ep_2 * esw / (p - esw)
+            p = pressure_b(k,iCell) + pp(k,iCell)
+            esw = 1000. * svp1 * exp(svp2 * (temp(k) - svpt0) / (temp(k) - svp3))
+            if (p < esw) esw = p * 0.99     ! fix for pressure < esw
+            qvsw(k) = ep_2 * esw / (p - esw)
 
-                  coefa(k) = ( 1.0 + xlv * qvsw(k)/ R_d / temp(k) ) / &
-                     ( 1.0 + xlv * xlv *qvsw(k) / Cp / R_v / temp(k) / temp(k) )
+            coefa(k) = ( 1.0 + xlv * qvsw(k)/ R_d / temp(k) ) / &
+               ( 1.0 + xlv * xlv *qvsw(k) / Cp / R_v / temp(k) / temp(k) )
 
-               end do
+         end do
 
-               do k=2, nVertLevels-1
-                  dz = 0.5 * (zgrid(k+2,iCell)+zgrid(k+1,iCell)) - 0.5 * (zgrid(k,iCell)+zgrid(k-1,iCell))
-                  rdz = 1.0/dz
-
-                  ! if ( scalars(index_qc,k,iCell) < qc_cr ) then
-                  !   ! Dry Brunt-Vaisala frequency
-                  !   bn2(k,iCell) = gravity * ((theta(k+1) - theta(k-1) ) / theta(k)  / dz  &
-                  !                + rvord * (scalars(index_qv,k+1,iCell) - scalars(index_qv,k-1,iCell)) / dz &
-                  !               - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
-                  ! else
-                  !   ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
-                  !   bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) / dz &
-                  !               + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) / dz ) &
-                  !               - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
-                  ! endif
-
-                  dry_bv_frequency = .true.
-                  if(index_qc .gt. 0) then  ! if moist simulation, qc exists
-                    if ( scalars(index_qc,k,iCell) .ge. qc_cr ) dry_bv_frequency = .false.
-                  end if
-
-                  if (dry_bv_frequency) then
-                     ! Dry Brunt-Vaisala frequency
-                     bn2(k,iCell) = gravity * ((theta(k+1) - theta(k-1) ) / theta(k)  * rdz  &
-                                  + rvord * (scalars(index_qv,k+1,iCell) - scalars(index_qv,k-1,iCell)) * rdz &
-                                 - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) * rdz )
-                  else
-                     ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
-                     bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) * rdz &
-                                 + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) * rdz ) &
-                                 - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) * rdz )
-                  endif
+         do k=2, nVertLevels-1
+            dz = 0.5 * (zgrid(k+2,iCell)+zgrid(k+1,iCell)) - 0.5 * (zgrid(k,iCell)+zgrid(k-1,iCell))
+            rdz = 1.0/dz
+
+            ! if ( scalars(index_qc,k,iCell) < qc_cr ) then
+            !   ! Dry Brunt-Vaisala frequency
+            !   bn2(k,iCell) = gravity * ((theta(k+1) - theta(k-1) ) / theta(k)  / dz  &
+            !                + rvord * (scalars(index_qv,k+1,iCell) - scalars(index_qv,k-1,iCell)) / dz &
+            !               - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
+            ! else
+            !   ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
+            !   bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) / dz &
+            !               + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) / dz ) &
+            !               - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
+            ! endif
+
+            dry_bv_frequency = .true.
+            if(index_qc .gt. 0) then  ! if moist simulation, qc exists
+               if ( scalars(index_qc,k,iCell) .ge. qc_cr ) dry_bv_frequency = .false.
+            end if
 
-               end do
-               bn2(1,iCell) = bn2(2,iCell)
-               bn2(nVertLevels,iCell) = bn2(nVertLevels-1,iCell)
+            if (dry_bv_frequency) then
+               ! Dry Brunt-Vaisala frequency
+               bn2(k,iCell) = gravity * ((theta(k+1) - theta(k-1) ) / theta(k)  * rdz  &
+                            + rvord * (scalars(index_qv,k+1,iCell) - scalars(index_qv,k-1,iCell)) * rdz &
+                           - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) * rdz )
+            else
+               ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
+               bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) * rdz &
+                           + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) * rdz ) &
+                           - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) * rdz )
+            endif
 
-            end do
+         end do
+
+         bn2(1,iCell) = bn2(2,iCell)
+         bn2(nVertLevels,iCell) = bn2(nVertLevels-1,iCell)
 
-         if(debug_dissipation)  call mpas_log_write(' exiting BV frequency calculations ')
+      end do
 
-      end subroutine calculate_n2
+      if(debug_dissipation)  call mpas_log_write(' exiting BV frequency calculations ')
+
+   end subroutine calculate_n2
 
 !---------------------------------------
 
-      subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
-                                   cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
-                                   cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
-                                   nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign,                          &
-                                   invAreaCell, invAreaTriangle, invDvEdge, invDcEdge,                          &
-                                   angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4,                 &
-                                   config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
-                                   config_del4u_div_factor, zgrid,                                              &
-                                   eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
-                                   fzm, fzp, config_les_model, config_les_surface,                              &
-                                   config_surface_drag_coefficient,                                             &
-                                   delsq_u, delsq_vorticity, delsq_divergence,                                  &
-                                   u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm,         &
-                                   tend_u_euler                                                                )
+   subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
+                                cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
+                                cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
+                                nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign,                          &
+                                invAreaCell, invAreaTriangle, invDvEdge, invDcEdge,                          &
+                                angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4,                 &
+                                config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
+                                config_del4u_div_factor, zgrid,                                              &
+                                eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
+                                fzm, fzp, config_les_model, config_les_surface,                              &
+                                config_surface_drag_coefficient,                                             &
+                                delsq_u, delsq_vorticity, delsq_divergence,                                  &
+                                u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm,         &
+                                tend_u_euler                                                                )
 
       use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here
 
@@ -503,238 +506,238 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       real (kind=RKIND) :: velocity_magnitude
       real (kind=RKIND) :: tau_12_factor
 
-        if(debug_dissipation) then
-            call mpas_log_write(' begin u_dissipation_3d ')
-            call mpas_log_write(' les model is '//trim(config_les_model))
-            call mpas_log_write(' les surface is '//trim(config_les_surface))
-            call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_mom_eddy_visc4/))
-            call mpas_log_write(' 4th order divergence factor is $r ', realArgs=(/config_del4u_div_factor/))
-        end if
+      if(debug_dissipation) then
+         call mpas_log_write(' begin u_dissipation_3d ')
+         call mpas_log_write(' les model is '//trim(config_les_model))
+         call mpas_log_write(' les surface is '//trim(config_les_surface))
+         call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_mom_eddy_visc4/))
+         call mpas_log_write(' 4th order divergence factor is $r ', realArgs=(/config_del4u_div_factor/))
+      end if
 
 !$OMP BARRIER
 
-         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).
-         ! First, storage to hold the result from the first del^2 computation.
+      ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).
+      ! First, storage to hold the result from the first del^2 computation.
 
-         delsq_u(1:nVertLevels,edgeStart:edgeEnd) = 0.0
-         tau_12_factor = 0.0
-         if(config_les_model /= 'none') tau_12_factor = 1.0
+      delsq_u(1:nVertLevels,edgeStart:edgeEnd) = 0.0
+      tau_12_factor = 0.0
+      if(config_les_model /= 'none') tau_12_factor = 1.0
 
-         do iEdge=edgeStart,edgeEnd
-            cell1 = cellsOnEdge(1,iEdge)
-            cell2 = cellsOnEdge(2,iEdge)
-            vertex1 = verticesOnEdge(1,iEdge)
-            vertex2 = verticesOnEdge(2,iEdge)
-            r_dc = invDcEdge(iEdge)
-            r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
+      do iEdge=edgeStart,edgeEnd
+         cell1 = cellsOnEdge(1,iEdge)
+         cell2 = cellsOnEdge(2,iEdge)
+         vertex1 = verticesOnEdge(1,iEdge)
+         vertex2 = verticesOnEdge(2,iEdge)
+         r_dc = invDcEdge(iEdge)
+         r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
 
 !DIR$ IVDEP
-            do k=1,nVertLevels
+         do k=1,nVertLevels
 
-               ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
-               !                    only valid for h_mom_eddy_visc4 == constant
-               u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) * r_dc  &
-                              -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv
-               ! for LES models we need 2 times the gradient of divergence, in contrast to what is
-               ! saved and used to calculate the 4th-order horizontal filter
-               u_diffusion_les = u_diffusion + tau_12_factor * ( divergence(k,cell2)  - divergence(k,cell1)  ) * r_dc
+            ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
+            !                    only valid for h_mom_eddy_visc4 == constant
+            u_diffusion =   ( divergence(k,cell2)  - divergence(k,cell1) ) * r_dc  &
+                           -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv
+            ! for LES models we need 2 times the gradient of divergence, in contrast to what is
+            ! saved and used to calculate the 4th-order horizontal filter
+            u_diffusion_les = u_diffusion + tau_12_factor * ( divergence(k,cell2)  - divergence(k,cell1)  ) * r_dc
 
-               delsq_u(k,iEdge) = delsq_u(k,iEdge) + u_diffusion
+            delsq_u(k,iEdge) = delsq_u(k,iEdge) + u_diffusion
 
-               kdiffu = 0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2))
+            kdiffu = 0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2))
 
-               ! include 2nd-order diffusion here
-               tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) &
-                                       + rho_edge(k,iEdge)* kdiffu * u_diffusion_les * meshScalingDel2(iEdge)
+            ! include 2nd-order diffusion here
+            tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) &
+                                    + rho_edge(k,iEdge)* kdiffu * u_diffusion_les * meshScalingDel2(iEdge)
 
-            end do
          end do
+      end do
 
-         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active
+      if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
 !$OMP BARRIER
 
-            do iVertex=vertexStart,vertexEnd
-               delsq_vorticity(1:nVertLevels,iVertex) = 0.0
-               do i=1,vertexDegree
-                  iEdge = edgesOnVertex(i,iVertex)
-                  edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge) * edgesOnVertex_sign(i,iVertex)
-                  do k=1,nVertLevels
-                     delsq_vorticity(k,iVertex) = delsq_vorticity(k,iVertex) + edge_sign * delsq_u(k,iEdge)
-                  end do
+         do iVertex=vertexStart,vertexEnd
+            delsq_vorticity(1:nVertLevels,iVertex) = 0.0
+            do i=1,vertexDegree
+               iEdge = edgesOnVertex(i,iVertex)
+               edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge) * edgesOnVertex_sign(i,iVertex)
+               do k=1,nVertLevels
+                  delsq_vorticity(k,iVertex) = delsq_vorticity(k,iVertex) + edge_sign * delsq_u(k,iEdge)
                end do
             end do
+         end do
 
-            do iCell=cellStart,cellEnd
-               delsq_divergence(1:nVertLevels,iCell) = 0.0
-               r = invAreaCell(iCell)
-               do i=1,nEdgesOnCell(iCell)
-                  iEdge = edgesOnCell(i,iCell)
-                  edge_sign = r * dvEdge(iEdge) * edgesOnCell_sign(i,iCell)
-                  do k=1,nVertLevels
-                     delsq_divergence(k,iCell) = delsq_divergence(k,iCell) + edge_sign * delsq_u(k,iEdge)
-                  end do
+         do iCell=cellStart,cellEnd
+            delsq_divergence(1:nVertLevels,iCell) = 0.0
+            r = invAreaCell(iCell)
+            do i=1,nEdgesOnCell(iCell)
+               iEdge = edgesOnCell(i,iCell)
+               edge_sign = r * dvEdge(iEdge) * edgesOnCell_sign(i,iCell)
+               do k=1,nVertLevels
+                  delsq_divergence(k,iCell) = delsq_divergence(k,iCell) + edge_sign * delsq_u(k,iEdge)
                end do
             end do
+         end do
 
 !$OMP BARRIER
 
-            do iEdge=edgeSolveStart,edgeSolveEnd
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
-               vertex1 = verticesOnEdge(1,iEdge)
-               vertex2 = verticesOnEdge(2,iEdge)
+         do iEdge=edgeSolveStart,edgeSolveEnd
+            cell1 = cellsOnEdge(1,iEdge)
+            cell2 = cellsOnEdge(2,iEdge)
+            vertex1 = verticesOnEdge(1,iEdge)
+            vertex2 = verticesOnEdge(2,iEdge)
 
-               u_mix_scale = meshScalingDel4(iEdge)*h_mom_eddy_visc4
-               r_dc = u_mix_scale * config_del4u_div_factor * invDcEdge(iEdge)
-               r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
+            u_mix_scale = meshScalingDel4(iEdge)*h_mom_eddy_visc4
+            r_dc = u_mix_scale * config_del4u_div_factor * invDcEdge(iEdge)
+            r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
 
 !DIR$ IVDEP
-               do k=1,nVertLevels
+            do k=1,nVertLevels
 
-                  ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
-                  !                    only valid for h_mom_eddy_visc4 == constant
-                  !
-                  ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor
-                  !    relative to the rotational part.  The stability constraint on the divergence component is much less
-                  !    stringent than the rotational part, and this flexibility may be useful.
-                  !
-                  u_diffusion =  rho_edge(k,iEdge) *  ( ( delsq_divergence(k,cell2)  - delsq_divergence(k,cell1) ) * r_dc  &
-                                                       -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) * r_dv )
-                  tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - u_diffusion
+               ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
+               !                    only valid for h_mom_eddy_visc4 == constant
+               !
+               ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor
+               !    relative to the rotational part.  The stability constraint on the divergence component is much less
+               !    stringent than the rotational part, and this flexibility may be useful.
+               !
+               u_diffusion =  rho_edge(k,iEdge) *  ( ( delsq_divergence(k,cell2)  - delsq_divergence(k,cell1) ) * r_dc  &
+                                                    -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) * r_dv )
+               tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - u_diffusion
 
-               end do
             end do
+         end do
 
-         end if ! 4th order mixing is active
+      end if ! 4th order mixing is active
 
       !
       !  vertical mixing for u - 2nd order filter in physical (z) space
       !
-         if ( v_mom_eddy_visc2 > 0.0 ) then
+      if ( v_mom_eddy_visc2 > 0.0 ) then
 
-            if (config_mix_full) then  ! mix full state
+         if (config_mix_full) then  ! mix full state
 
-               do iEdge=edgeSolveStart,edgeSolveEnd
+            do iEdge=edgeSolveStart,edgeSolveEnd
 
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
 
-                  do k=2,nVertLevels-1
+               do k=2,nVertLevels-1
 
-                     z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
-                     z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
-                     z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
-                     z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
+                  z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
+                  z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
+                  z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
+                  z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
 
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
+                  zm = 0.5*(z1+z2)
+                  z0 = 0.5*(z2+z3)
+                  zp = 0.5*(z3+z4)
 
-                     tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &
-                                        (u(k+1,iEdge)-u(k  ,iEdge))/(zp-z0)                      &
-                                       -(u(k  ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
+                  tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &
+                                     (u(k+1,iEdge)-u(k  ,iEdge))/(zp-z0)                      &
+                                    -(u(k  ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm))
                end do
+            end do
 
-            else  ! idealized cases where we mix on the perturbation from the initial 1-D state
+         else  ! idealized cases where we mix on the perturbation from the initial 1-D state
 
-               do iEdge=edgeSolveStart,edgeSolveEnd
+            do iEdge=edgeSolveStart,edgeSolveEnd
 
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
 
-                  do k=1,nVertLevels
-                     u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) &
-                                           + v_init(k) * sin( angleEdge(iEdge) )
-                  end do
+               do k=1,nVertLevels
+                  u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) &
+                                        + v_init(k) * sin( angleEdge(iEdge) )
+               end do
 
-                  do k=2,nVertLevels-1
+               do k=2,nVertLevels-1
 
-                     z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
-                     z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
-                     z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
-                     z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
+                  z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
+                  z2 = 0.5*(zgrid(k  ,cell1)+zgrid(k  ,cell2))
+                  z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
+                  z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
 
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
+                  zm = 0.5*(z1+z2)
+                  z0 = 0.5*(z2+z3)
+                  zp = 0.5*(z3+z4)
 
-                     tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &
-                                        (u_mix(k+1)-u_mix(k  ))/(zp-z0)                      &
-                                       -(u_mix(k  )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
+                  tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*(  &
+                                     (u_mix(k+1)-u_mix(k  ))/(zp-z0)                      &
+                                    -(u_mix(k  )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm))
                end do
+            end do
 
-            end if  ! mix perturbation state
+         end if  ! mix perturbation state
 
-         end if  ! vertical mixing of horizontal momentum for les formulation
+      end if  ! vertical mixing of horizontal momentum for les formulation
 
-        if ( config_les_model /= "none") then
+      if ( config_les_model /= "none") then
 
-               do iEdge=edgeSolveStart,edgeSolveEnd
+         do iEdge=edgeSolveStart,edgeSolveEnd
 
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
-                  turb_vflux(nVertlevels+1) = 0.  !  no turbulent flux out of the domain
-                  turb_vflux(1) = 0.  !  lower bc flux handled where ???
-
-                  do k=2,nVertLevels
-                     rho_k_cell1 =  fzm(k)*rho_zz(k  ,cell1)*zz(k  ,cell1)*eddy_visc_vert(k  ,cell1) &
-                                   +fzp(k)*rho_zz(k-1,cell1)*zz(k-1,cell1)*eddy_visc_vert(k-1,cell1)
-                     rho_k_cell2 =  fzm(k)*rho_zz(k  ,cell2)*zz(k  ,cell2)*eddy_visc_vert(k  ,cell2) &
-                                   +fzp(k)*rho_zz(k-1,cell2)*zz(k-1,cell2)*eddy_visc_vert(k-1,cell2)
-                     rho_k_at_w = 0.5*(rho_k_cell1+rho_k_cell2)
-
-                     zz_cell1 = fzm(k)*zz(k,cell1)+fzp(k)*zz(k-1,cell1)
-                     zz_cell2 = fzm(k)*zz(k,cell2)+fzp(k)*zz(k-1,cell2)
-                     zz_at_w = 0.5*(zz_cell1+zz_cell2)
-                     turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(u(k,iEdge)-u(k-1,iEdge))
-                  end do
+            cell1 = cellsOnEdge(1,iEdge)
+            cell2 = cellsOnEdge(2,iEdge)
+            turb_vflux(nVertlevels+1) = 0.  !  no turbulent flux out of the domain
+            turb_vflux(1) = 0.  !  lower bc flux handled where ???
 
-                if( config_les_surface == "specified" ) then
-                  velocity_magnitude = sqrt(u(1,iEdge)**2 + v(1,iEdge)**2)
-                  turb_vflux(1) = -rho_edge(1,iEdge)*config_surface_drag_coefficient*u(1,iEdge)*velocity_magnitude
-                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
-                else if ( config_les_surface == "varying" ) then
-                  ust_edge = 0.5*(ustm(cell1) + ustm(cell2))
-                  velocity_magnitude = max(sqrt(u(1,iEdge)**2 + v(1,iEdge)**2),0.1)
-                  turb_vflux(1) = -rho_edge(1,iEdge)*ust_edge*ust_edge*(u(1,iEdge)/velocity_magnitude)
-                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
-                  ! end test conditions
-                else
-                  ! test conditions for supercell case
-                  turb_vflux(1) = turb_vflux(2)
-                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
-                  ! end test conditions
-                end if
+            do k=2,nVertLevels
+               rho_k_cell1 =  fzm(k)*rho_zz(k  ,cell1)*zz(k  ,cell1)*eddy_visc_vert(k  ,cell1) &
+                             +fzp(k)*rho_zz(k-1,cell1)*zz(k-1,cell1)*eddy_visc_vert(k-1,cell1)
+               rho_k_cell2 =  fzm(k)*rho_zz(k  ,cell2)*zz(k  ,cell2)*eddy_visc_vert(k  ,cell2) &
+                             +fzp(k)*rho_zz(k-1,cell2)*zz(k-1,cell2)*eddy_visc_vert(k-1,cell2)
+               rho_k_at_w = 0.5*(rho_k_cell1+rho_k_cell2)
+
+               zz_cell1 = fzm(k)*zz(k,cell1)+fzp(k)*zz(k-1,cell1)
+               zz_cell2 = fzm(k)*zz(k,cell2)+fzp(k)*zz(k-1,cell2)
+               zz_at_w = 0.5*(zz_cell1+zz_cell2)
+               turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(u(k,iEdge)-u(k-1,iEdge))
+            end do
 
-                  do k=1,nVertLevels
-                    tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
-                  end do
+            if( config_les_surface == "specified" ) then
+               velocity_magnitude = sqrt(u(1,iEdge)**2 + v(1,iEdge)**2)
+               turb_vflux(1) = -rho_edge(1,iEdge)*config_surface_drag_coefficient*u(1,iEdge)*velocity_magnitude
+               turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+            else if ( config_les_surface == "varying" ) then
+               ust_edge = 0.5*(ustm(cell1) + ustm(cell2))
+               velocity_magnitude = max(sqrt(u(1,iEdge)**2 + v(1,iEdge)**2),0.1)
+               turb_vflux(1) = -rho_edge(1,iEdge)*ust_edge*ust_edge*(u(1,iEdge)/velocity_magnitude)
+               turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+               ! end test conditions
+            else
+               ! test conditions for supercell case
+               turb_vflux(1) = turb_vflux(2)
+               turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+               ! end test conditions
+            end if
 
-               end do
+            do k=1,nVertLevels
+               tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
+            end do
 
-         end if
+         end do
 
-        if(debug_dissipation)  call mpas_log_write(' exiting u_dissipation_3d ')
+      end if
 
-      end subroutine u_dissipation_3d
+      if(debug_dissipation)  call mpas_log_write(' exiting u_dissipation_3d ')
+
+   end subroutine u_dissipation_3d
 
 !------------------------
 
-      subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                   nCells, nEdges,                                           &
-                                   nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                   invAreaCell, invDcEdge, dvEdge,                           &
-                                   meshScalingDel2, meshScalingDel4,                         &
-                                   rdzw, rdzu,                                               &
-                                   v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
-                                   delsq_w,                                                  &
-                                   w, rho_edge, rho_zz, divergence, zz,                      &
-                                   eddy_visc_horz, eddy_visc_vert,                           &
-                                   config_les_model, config_les_surface,                     &
-                                   tend_w_euler                                             )
+   subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                nCells, nEdges,                                           &
+                                nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                invAreaCell, invDcEdge, dvEdge,                           &
+                                meshScalingDel2, meshScalingDel4,                         &
+                                rdzw, rdzu,                                               &
+                                v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
+                                delsq_w,                                                  &
+                                w, rho_edge, rho_zz, divergence, zz,                      &
+                                eddy_visc_horz, eddy_visc_vert,                           &
+                                config_les_model, config_les_surface,                     &
+                                tend_w_euler                                             )
 
 
       ! 3D w dissipation using the 3D smagorinsky eddy viscosities.
@@ -789,65 +792,65 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
 !  !OMP BARRIER  why is this openmp barrier here???
 
-         ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (w) ).
-       !
-       ! First, storage to hold the result from the first del^2 computation.
-       !  we copied code from the theta mixing, hence the theta* names.
-
-        if(debug_dissipation) then
-            call mpas_log_write(' begin w_dissipation_3d ')
-            call mpas_log_write(' les model is '//trim(config_les_model))
-            call mpas_log_write(' les surface is '//trim(config_les_surface))
-            call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_mom_eddy_visc4/))
-        end if
+      ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (w) ).
+      !
+      ! First, storage to hold the result from the first del^2 computation.
+      !  we copied code from the theta mixing, hence the theta* names.
+
+      if(debug_dissipation) then
+         call mpas_log_write(' begin w_dissipation_3d ')
+         call mpas_log_write(' les model is '//trim(config_les_model))
+         call mpas_log_write(' les surface is '//trim(config_les_surface))
+         call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_mom_eddy_visc4/))
+      end if
 
-         delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0
+      delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0
 
-         do iCell=cellStart,cellEnd
-            tend_w_euler(1:nVertLevels+1,iCell) = 0.0
-            r_areaCell = invAreaCell(iCell)
-            do i=1,nEdgesOnCell(iCell)
-               iEdge = edgesOnCell(i,iCell)
+      do iCell=cellStart,cellEnd
+         tend_w_euler(1:nVertLevels+1,iCell) = 0.0
+         r_areaCell = invAreaCell(iCell)
+         do i=1,nEdgesOnCell(iCell)
+            iEdge = edgesOnCell(i,iCell)
 
-               edge_sign = 0.5 * r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+            edge_sign = 0.5 * r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
 
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
+            cell1 = cellsOnEdge(1,iEdge)
+            cell2 = cellsOnEdge(2,iEdge)
 
 !DIR$ IVDEP
-              do k=2,nVertLevels
-
-                  w_turb_flux =  edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
-                  delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux
-                  w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * &
-                                  ( eddy_visc_horz(k  ,cell1)+eddy_visc_horz(k  ,cell2)      &
-                                   +eddy_visc_horz(k-1,cell1)+eddy_visc_horz(k-1,cell2) )
-                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux
-               end do
+            do k=2,nVertLevels
+
+               w_turb_flux =  edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
+               delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux
+               w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * &
+                               ( eddy_visc_horz(k  ,cell1)+eddy_visc_horz(k  ,cell2)      &
+                                +eddy_visc_horz(k-1,cell1)+eddy_visc_horz(k-1,cell2) )
+               tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux
             end do
          end do
+      end do
 
 !$OMP BARRIER
 
-         if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active
+      if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
-            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
-               r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell)
-               do i=1,nEdgesOnCell(iCell)
-                  iEdge = edgesOnCell(i,iCell)
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
-
-                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge)
+         do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
+            r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell)
+            do i=1,nEdgesOnCell(iCell)
+               iEdge = edgesOnCell(i,iCell)
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
 
-                  do k=2,nVertLevels
-                     tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1))
-                  end do
+               edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge)
 
+               do k=2,nVertLevels
+                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1))
                end do
+
             end do
+         end do
 
-         end if ! 4th order mixing is active
+      end if ! 4th order mixing is active
 
       if ( v_mom_eddy_visc2 > 0.0 ) then  ! vertical mixing
 
@@ -862,55 +865,58 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
       end if
 
-         if ( config_les_model /= "none") then
+      if ( config_les_model /= "none") then
+
+         do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
+            ! compute turbulent fluxes
+            do k=1,nVertLevels
+               turb_vflux(k) = - rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*(      &
+                                          2.0*zz(k,iCell)*rdzw(k)*(w(k+1,iCell)-w(k,iCell))  &
+                                              + divergence(k,iCell)                         )
+            end do
 
-             do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
-                ! compute turbulent fluxes
-                do k=1,nVertLevels
-                  turb_vflux(k) = - rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*(      &
-                                             2.0*zz(k,iCell)*rdzw(k)*(w(k+1,iCell)-w(k,iCell))  &
-                                                 + divergence(k,iCell)                         )
-                end do
-                turb_vflux(nVertLevels+1) = 0.0
-                do k=2,nVertLevels
-                  tend_w_euler(k,iCell) = tend_w_euler(k,iCell) &
-                                              - rdzu(k)*(turb_vflux(k)-turb_vflux(k-1))
-                end do
-             end do
+            turb_vflux(nVertLevels+1) = 0.0
 
-          end if
+            do k=2,nVertLevels
+               tend_w_euler(k,iCell) = tend_w_euler(k,iCell) &
+                                           - rdzu(k)*(turb_vflux(k)-turb_vflux(k-1))
+            end do
+         end do
+
+      end if
 
-        if(debug_dissipation)  call mpas_log_write(' exiting w_dissipation_3d ')
+      if(debug_dissipation)  call mpas_log_write(' exiting w_dissipation_3d ')
 
-      end subroutine w_dissipation_3d
+   end subroutine w_dissipation_3d
 
 !-----------------------------------------------------
 
-      subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                            nCells, nEdges,                                           &
-                                            nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                            invAreaCell, invDcEdge, dvEdge,                           &
-                                            meshScalingDel2, meshScalingDel4,                         &
-                                            config_mix_full, t_init, zgrid,                           &
-                                            rdzw, rdzu, fzm, fzp,                                     &
-                                            v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
-                                            prandtl_3d_inv,                                           &
-                                            delsq_theta,                                              &
-                                            theta_m, rho_edge, rho_zz, zz,                            &
-                                            eddy_visc_horz, eddy_visc_vert,                           &
-                                            bv_freq2, config_len_disp, scalars, tend_scalars,         &
-                                            index_tke, index_qv, num_scalars_dummy, mix_scalars,      &
-                                            config_les_model, config_les_surface, time_of_day_seconds,&
-                                            config_surface_heat_flux, config_surface_moisture_flux,   &
-                                            uReconstructZonal, uReconstructMeridional,                &
-                                            hfx, qfx,                                                 &
-                                            tend_theta_euler, dynamics_substep                       )
+   subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                         nCells, nEdges,                                           &
+                                         nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                         invAreaCell, invDcEdge, dvEdge,                           &
+                                         meshScalingDel2, meshScalingDel4,                         &
+                                         config_mix_full, t_init, zgrid,                           &
+                                         rdzw, rdzu, fzm, fzp,                                     &
+                                         v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+                                         prandtl_3d_inv,                                           &
+                                         delsq_theta,                                              &
+                                         theta_m, rho_edge, rho_zz, zz,                            &
+                                         eddy_visc_horz, eddy_visc_vert,                           &
+                                         bv_freq2, config_len_disp, scalars, tend_scalars,         &
+                                         index_tke, index_qv, num_scalars_dummy, mix_scalars,      &
+                                         config_les_model, config_les_surface, time_of_day_seconds,&
+                                         config_surface_heat_flux, config_surface_moisture_flux,   &
+                                         uReconstructZonal, uReconstructMeridional,                &
+                                         hfx, qfx,                                                 &
+                                         tend_theta_euler, dynamics_substep                       )
 
 
       ! 3D theta_m dissipation using the 3D smagorinsky eddy viscosities.
       ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter
 
       use mpas_atm_dimensions  !  pull nVertLevels and maxEdges from here
+
       implicit none
 
       integer, intent(in) :: cellStart, cellEnd
@@ -982,252 +988,254 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       real (kind=RKIND) :: moisture_flux, heat_flux, theta_m_flux
       real (kind=RKIND) :: qv_cell, theta_m_cell, theta_cell
 
-        if(debug_dissipation) then
-            call mpas_log_write(' begin scalar_dissipation_3d ')
-            call mpas_log_write(' les model is '//trim(config_les_model))
-            call mpas_log_write(' les surface is '//trim(config_les_surface))
-            call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_theta_eddy_visc4/))
-        end if
-        if( mix_scalars .and. (dynamics_substep == 1)) call mpas_log_write(' scalar mixing on ')
+      if(debug_dissipation) then
+         call mpas_log_write(' begin scalar_dissipation_3d ')
+         call mpas_log_write(' les model is '//trim(config_les_model))
+         call mpas_log_write(' les surface is '//trim(config_les_surface))
+         call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_theta_eddy_visc4/))
+      end if
 
-         delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
+      if( mix_scalars .and. (dynamics_substep == 1)) call mpas_log_write(' scalar mixing on ')
+
+      delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
+
+      do iCell=cellStart,cellEnd
+         tend_theta_euler(1:nVertLevels,iCell) = 0.0
+         r_areaCell = invAreaCell(iCell)
+         do i=1,nEdgesOnCell(iCell)
+            iEdge = edgesOnCell(i,iCell)
+            edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+            pr_scale = prandtl_inv * meshScalingDel2(iEdge)
+            cell1 = cellsOnEdge(1,iEdge)
+            cell2 = cellsOnEdge(2,iEdge)
 
-         do iCell=cellStart,cellEnd
-            tend_theta_euler(1:nVertLevels,iCell) = 0.0
-            r_areaCell = invAreaCell(iCell)
-            do i=1,nEdgesOnCell(iCell)
-               iEdge = edgesOnCell(i,iCell)
-               edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
-               pr_scale = prandtl_inv * meshScalingDel2(iEdge)
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
 !DIR$ IVDEP
-               do k=1,nVertLevels
+            do k=1,nVertLevels
 
 !  we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below.
 !  This is in conservative form.
 
-                  theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge)
-                  delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
-                  theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) * pr_scale
-                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux
+               theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge)
+               delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
+               theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) * pr_scale
+               tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux
 
-               end do
             end do
-          end do
+         end do
+      end do
 
 !$OMP BARRIER
 
-         if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
+      if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
-            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
-               r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
-               do i=1,nEdgesOnCell(iCell)
+         do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
+            r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
+            do i=1,nEdgesOnCell(iCell)
 
-                  iEdge = edgesOnCell(i,iCell)
-                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
+               iEdge = edgesOnCell(i,iCell)
+               edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
 
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
+               cell1 = cellsOnEdge(1,iEdge)
+               cell2 = cellsOnEdge(2,iEdge)
 
-                  do k=1,nVertLevels
-                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
-                  end do
+               do k=1,nVertLevels
+                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
                end do
             end do
+         end do
 
-         end if ! 4th order mixing is active
+      end if ! 4th order mixing is active
 
-         if(mix_scalars .and. (dynamics_substep == 1)) then  ! dissipation for scalars, including 4th-order filter.  Likely needs optimization
+      if(mix_scalars .and. (dynamics_substep == 1)) then  ! dissipation for scalars, including 4th-order filter.  Likely needs optimization
 
          do iScalar=1,num_scalars
 
-         delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
+            delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
+
+            do iCell=cellStart,cellEnd
+               ! tend_theta_euler(1:nVertLevels,iCell) = 0.0
+               r_areaCell = invAreaCell(iCell)
+               do i=1,nEdgesOnCell(iCell)
+                  iEdge = edgesOnCell(i,iCell)
+                  edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+                  pr_scale = prandtl_inv * meshScalingDel2(iEdge)
+                  cell1 = cellsOnEdge(1,iEdge)
+                  cell2 = cellsOnEdge(2,iEdge)
 
-         do iCell=cellStart,cellEnd
-            ! tend_theta_euler(1:nVertLevels,iCell) = 0.0
-            r_areaCell = invAreaCell(iCell)
-            do i=1,nEdgesOnCell(iCell)
-               iEdge = edgesOnCell(i,iCell)
-               edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
-               pr_scale = prandtl_inv * meshScalingDel2(iEdge)
-               cell1 = cellsOnEdge(1,iEdge)
-               cell2 = cellsOnEdge(2,iEdge)
 !DIR$ IVDEP
-               do k=1,nVertLevels
+                  do k=1,nVertLevels
 
 !  we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below.
 !  This is in conservative form.
 
-                  theta_turb_flux = edge_sign*(scalars(iScalar,k,cell2) - scalars(iScalar,k,cell1))*rho_edge(k,iEdge)
-                  delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
-                  theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) * pr_scale
-                  tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) + theta_turb_flux
+                     theta_turb_flux = edge_sign*(scalars(iScalar,k,cell2) - scalars(iScalar,k,cell1))*rho_edge(k,iEdge)
+                     delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
+                     theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) * pr_scale
+                     tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) + theta_turb_flux
 
+                  end do
                end do
             end do
-          end do
 
 !$OMP BARRIER
 
-         if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
+            if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
-            do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
-               r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
-               do i=1,nEdgesOnCell(iCell)
+               do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
+                  r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
+                  do i=1,nEdgesOnCell(iCell)
 
-                  iEdge = edgesOnCell(i,iCell)
-                  edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
+                     iEdge = edgesOnCell(i,iCell)
+                     edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
 
-                  cell1 = cellsOnEdge(1,iEdge)
-                  cell2 = cellsOnEdge(2,iEdge)
+                     cell1 = cellsOnEdge(1,iEdge)
+                     cell2 = cellsOnEdge(2,iEdge)
 
-                  do k=1,nVertLevels
-                     tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
+                     do k=1,nVertLevels
+                        tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
+                     end do
                   end do
                end do
-            end do
 
-         end if ! 4th order mixing is active
+            end if ! 4th order mixing is active
 
-        end do ! loop over scalars for horizontal mixing
+         end do ! loop over scalars for horizontal mixing
 
       end if ! horizontal scalar mixing
 
 
-         !  idealized case vertical mixing.  No scalar mixing here.
+      !  idealized case vertical mixing.  No scalar mixing here.
 
-         if ( v_theta_eddy_visc2 > 0.0 ) then  ! vertical mixing for theta_m
+      if ( v_theta_eddy_visc2 > 0.0 ) then  ! vertical mixing for theta_m
 
-            if (config_mix_full) then
+         if (config_mix_full) then
 
-               do iCell = cellSolveStart,cellSolveEnd
-                  do k=2,nVertLevels-1
-                     z1 = zgrid(k-1,iCell)
-                     z2 = zgrid(k  ,iCell)
-                     z3 = zgrid(k+1,iCell)
-                     z4 = zgrid(k+2,iCell)
+            do iCell = cellSolveStart,cellSolveEnd
+               do k=2,nVertLevels-1
+                  z1 = zgrid(k-1,iCell)
+                  z2 = zgrid(k  ,iCell)
+                  z3 = zgrid(k+1,iCell)
+                  z4 = zgrid(k+2,iCell)
 
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
+                  zm = 0.5*(z1+z2)
+                  z0 = 0.5*(z2+z3)
+                  zp = 0.5*(z3+z4)
 
-                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
-                                              (theta_m(k+1,iCell)-theta_m(k  ,iCell))/(zp-z0)                 &
-                                             -(theta_m(k  ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
+                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
+                                           (theta_m(k+1,iCell)-theta_m(k  ,iCell))/(zp-z0)                 &
+                                          -(theta_m(k  ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
                end do
+            end do
 
          else  ! idealized cases where we mix on the perturbation from the initial 1-D state
 
-               do iCell = cellSolveStart,cellSolveEnd
-                  do k=2,nVertLevels-1
-                     z1 = zgrid(k-1,iCell)
-                     z2 = zgrid(k  ,iCell)
-                     z3 = zgrid(k+1,iCell)
-                     z4 = zgrid(k+2,iCell)
+            do iCell = cellSolveStart,cellSolveEnd
+               do k=2,nVertLevels-1
+                  z1 = zgrid(k-1,iCell)
+                  z2 = zgrid(k  ,iCell)
+                  z3 = zgrid(k+1,iCell)
+                  z4 = zgrid(k+2,iCell)
 
-                     zm = 0.5*(z1+z2)
-                     z0 = 0.5*(z2+z3)
-                     zp = 0.5*(z3+z4)
+                  zm = 0.5*(z1+z2)
+                  z0 = 0.5*(z2+z3)
+                  zp = 0.5*(z3+z4)
 
-                     tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
-                                              ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k  ,iCell)-t_init(k,iCell)))/(zp-z0)      &
-                                             -((theta_m(k  ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
-                  end do
+                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
+                                           ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k  ,iCell)-t_init(k,iCell)))/(zp-z0)      &
+                                          -((theta_m(k  ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
                end do
-
-            end if
+            end do
 
          end if
 
-         if ( config_les_model /= "none") then
+      end if
 
-             do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
-                ! compute turbulent fluxes
+      if ( config_les_model /= "none") then
 
+         do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
+            ! compute turbulent fluxes
 
-                turb_vflux(nVertlevels+1) = 0.  !  no turbulent flux out of the domain
-                turb_vflux(1) = 0.  !  lower bc flux handled where ???
+            turb_vflux(nVertlevels+1) = 0.  !  no turbulent flux out of the domain
+            turb_vflux(1) = 0.  !  lower bc flux handled where ???
 
-                if ( config_les_model == "3d_smagorinsky") then
-                  do k=2,nVertLevels
-                    prandtl_1d_inverse(k) = prandtl_inv
-                  end do
-                else   ! prognostic_1.5_order, isentropic mixing length
-                  ! do k=2,nVertLevels
-                  !   delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell))
-                  !   delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
-                  !   bv_frequency2 = 0.5*(bv_freq2(k,iCell)+bv_freq2(k-1,iCell))
-                  !   tke_length = delta_s
-                  !   if(bv_frequency2 .gt. 1.e-06) &
-                  !       tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/sqrt(bv_frequency2)
-                  !       tke_length = min(delta_z,tke_length)
-                  !   prandtl_inverse(k) = 1. + 2.*tke_length/delta_z
-                  ! end do
-
-                  do k=2,nVertLevels
-                    ! prandtl_1d_inverse(k) = 0.5*(prandtl_3d_inv(k,iCell)+prandtl_3d_inv(k-1,iCell))
-                    prandtl_1d_inverse(k) = fzm(k)*prandtl_3d_inv(k,iCell)+fzp(k)*prandtl_3d_inv(k-1,iCell)
-                  end do
+            if ( config_les_model == "3d_smagorinsky") then
+               do k=2,nVertLevels
+                  prandtl_1d_inverse(k) = prandtl_inv
+               end do
+            else   ! prognostic_1.5_order, isentropic mixing length
+               ! do k=2,nVertLevels
+               !   delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell))
+               !   delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
+               !   bv_frequency2 = 0.5*(bv_freq2(k,iCell)+bv_freq2(k-1,iCell))
+               !   tke_length = delta_s
+               !   if(bv_frequency2 .gt. 1.e-06) &
+               !       tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/sqrt(bv_frequency2)
+               !       tke_length = min(delta_z,tke_length)
+               !   prandtl_inverse(k) = 1. + 2.*tke_length/delta_z
+               ! end do
+
+               do k=2,nVertLevels
+                  ! prandtl_1d_inverse(k) = 0.5*(prandtl_3d_inv(k,iCell)+prandtl_3d_inv(k-1,iCell))
+                  prandtl_1d_inverse(k) = fzm(k)*prandtl_3d_inv(k,iCell)+fzp(k)*prandtl_3d_inv(k-1,iCell)
+               end do
 
-                end if
+            end if
 
-                do k=2,nVertLevels
+            do k=2,nVertLevels
 
-                  ! delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell))
-                  ! delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
-                  ! bv_frequency2 = 0.5*(bv_freq2(k)+bv_freq(k-1))
-                  ! bv = max( sqrt(abs(bv_frequency2)), epsilon_bv )
-                  rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
-                               +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
-                  zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
-                  turb_vflux(k) = - prandtl_1d_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
-                end do
+               ! delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell))
+               ! delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
+               ! bv_frequency2 = 0.5*(bv_freq2(k)+bv_freq(k-1))
+               ! bv = max( sqrt(abs(bv_frequency2)), epsilon_bv )
+               rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
+                            +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
+               zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
+               turb_vflux(k) = - prandtl_1d_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
+            end do
+
+            ! test boundary conditions for supercell and les test cases
 
-                ! test boundary conditions for supercell and les test cases
+            if( config_les_surface == "specified" .or. config_les_surface == "varying" ) then
 
-                if( config_les_surface == "specified" .or. config_les_surface == "varying" ) then
+               if( config_les_surface == "specified" ) then
+                  moisture_flux = config_surface_moisture_flux
+                  heat_flux = config_surface_heat_flux
 
-                  if( config_les_surface == "specified" ) then
-                    moisture_flux = config_surface_moisture_flux
-                    heat_flux = config_surface_heat_flux
 !                 place holder routine for time-varying specified
 !                 call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
 
-                  else if ( config_les_surface == "varying" ) then
-                    heat_flux = hfx(iCell)/rho_zz(1,iCell)/cp
-                    moisture_flux = qfx(iCell)/rho_zz(1,iCell)
-                  endif
+               else if ( config_les_surface == "varying" ) then
+                  heat_flux = hfx(iCell)/rho_zz(1,iCell)/cp
+                  moisture_flux = qfx(iCell)/rho_zz(1,iCell)
+               endif
 
-                  qv_cell = scalars(index_qv,1,iCell)
-                  theta_m_cell = theta_m(1,iCell)
-                  theta_cell = theta_m_cell/(1.0+(rv/rgas)*qv_cell)
+               qv_cell = scalars(index_qv,1,iCell)
+               theta_m_cell = theta_m(1,iCell)
+               theta_cell = theta_m_cell/(1.0+(rv/rgas)*qv_cell)
 
-                  theta_m_flux = heat_flux*(1.0+(rv/rgas)*qv_cell)+(rv/rgas)*theta_cell*moisture_flux
-                  turb_vflux(1) = theta_m_flux*rho_zz(1,iCell)
-                  moisture_flux = moisture_flux*rho_zz(1,iCell)
-                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+               theta_m_flux = heat_flux*(1.0+(rv/rgas)*qv_cell)+(rv/rgas)*theta_cell*moisture_flux
+               turb_vflux(1) = theta_m_flux*rho_zz(1,iCell)
+               moisture_flux = moisture_flux*rho_zz(1,iCell)
+               turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
 
-                else
+            else
 
-                  turb_vflux(1) = turb_vflux(2)
-                  turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
-
-                end if
+               turb_vflux(1) = turb_vflux(2)
+               turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
 
+            end if
 
-                do k=1,nVertLevels
-                  tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) &
-                                              - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
-                end do
+            do k=1,nVertLevels
+               tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) &
+                                           - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
+            end do
 
-                if (mix_scalars ) then
+            if (mix_scalars ) then
 
-                ! compute turbulent fluxes
-                turb_vflux_scalars(:,nVertlevels+1) = 0.  !  no turbulent flux out of the domain
-                turb_vflux_scalars(:,1) = 0.  !  lower bc flux handled where ???
-                do k=2,nVertLevels
+               ! compute turbulent fluxes
+               turb_vflux_scalars(:,nVertlevels+1) = 0.  !  no turbulent flux out of the domain
+               turb_vflux_scalars(:,1) = 0.  !  lower bc flux handled where ???
+               do k=2,nVertLevels
                   rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
                                +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
                   zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
@@ -1235,30 +1243,30 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                      turb_vflux_scalars(iScalar,k) = - prandtl_1d_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*  &
                                                       (scalars(iScalar,k,iCell)-scalars(iScalar,k-1,iCell))
                   end do
-                end do
+               end do
 
-                if( config_les_surface == "specified" .or.  config_les_surface == "varying" ) turb_vflux_scalars(index_qv,1) = moisture_flux ! lower b.c. for qv
+               if( config_les_surface == "specified" .or.  config_les_surface == "varying" ) turb_vflux_scalars(index_qv,1) = moisture_flux ! lower b.c. for qv
 
-                do k=1,nVertLevels
+               do k=1,nVertLevels
                   do iScalar=1,num_scalars
-                    tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) &
-                         - rdzw(k)*(turb_vflux_scalars(iScalar,k+1)-turb_vflux_scalars(iScalar,k))
+                     tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) &
+                          - rdzw(k)*(turb_vflux_scalars(iScalar,k+1)-turb_vflux_scalars(iScalar,k))
                   end do
-                end do
+               end do
 
-             end if ! mix scalars
+            end if ! mix scalars
 
-             end do  ! loop over cells (columns)
+         end do  ! loop over cells (columns)
 
-         end if
+      end if
 
-        if(debug_dissipation)  call mpas_log_write(' exiting scalar_dissipation_3d ')
+      if(debug_dissipation)  call mpas_log_write(' exiting scalar_dissipation_3d ')
 
-      end subroutine scalar_dissipation_3d_les
+   end subroutine scalar_dissipation_3d_les
 
 !-----------
 
-!     subroutine flux_les_sas(heat_flux, moisture_flux, time_of_day_seconds)
+!  subroutine flux_les_sas(heat_flux, moisture_flux, time_of_day_seconds)
 
 !     implicit none
 
@@ -1277,6 +1285,6 @@ end subroutine scalar_dissipation_3d_les
 !     heat_flux = max(0., 0.1*sin(pii*rel_time_t_flux))
 !     moisture_flux = max(0., 0.15*sin(pii*rel_time_q_flux))/1000.
 
-!     end subroutine flux_les_sas
+!  end subroutine flux_les_sas
 
 end module mpas_atm_dissipation_models

From 49b38aa2da701e3874d0e060bcdecfe269e5da01 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Fri, 5 Dec 2025 14:42:21 -0700
Subject: [PATCH 093/146] Use a macro for debugging print statements in
 mpas_atm_dissipation_models.F

With a macro, when debugging prints are not enabled the compiler will see no
executable code.
---
 .../dynamics/mpas_atm_dissipation_models.F    | 61 +++++++++----------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 70cf9c24b3..4fd7000acd 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -6,6 +6,9 @@
 ! distributed with this code, or at http://mpas-dev.github.com/license.html
 !
 
+#define COMMA ,
+#define DEBUG_WRITE(M) ! call mpas_log_write(M)
+
 module mpas_atm_dissipation_models
 
    use mpas_kind_types, only : RKIND
@@ -14,7 +17,6 @@ module mpas_atm_dissipation_models
    use mpas_log
    use mpas_derived_types, only : MPAS_LOG_CRIT
 
-   logical, parameter :: debug_dissipation = .false.
    logical, parameter :: les_test = .true., les_sas_test = .false.
    !! real (kind=RKIND), parameter :: tke_heat_flux = 0.03  ! shear case from Moeng et al., first hour
    ! real (kind=RKIND), parameter :: tke_heat_flux = 0.03
@@ -58,8 +60,8 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
       integer :: iCell, iEdge, k
       real (kind=RKIND), dimension(nVertLevels) :: d_11, d_22, d_12, dudx, dudy, dvdx, dvdy
 
-      if(debug_dissipation)  call mpas_log_write(' begin smagorinsky_2d ')
 
+      DEBUG_WRITE(' begin smagorinsky_2d ')
 
       do iCell = cellStart,cellEnd
          dudx(1:nVertLevels) = 0.0
@@ -94,7 +96,7 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
       h_mom_eddy_visc4   = config_visc4_2dsmag * config_len_disp**3
       h_theta_eddy_visc4 = h_mom_eddy_visc4
 
-      if(debug_dissipation)  call mpas_log_write(' exiting smagorinsky_2d ')
+      DEBUG_WRITE(' exiting smagorinsky_2d ')
 
    end subroutine smagorinsky_2d
 
@@ -158,9 +160,10 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
       logical, parameter :: test_tke=.true.
       ! real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
 
-      if(debug_dissipation)  call mpas_log_write(' begin les_models ')
-      if(debug_dissipation)  call mpas_log_write(' les scheme is '//trim(config_les_model))
-      if(debug_dissipation)  call mpas_log_write(' les surface scheme is '//trim(config_les_surface))
+
+      DEBUG_WRITE(' begin les_models ')
+      DEBUG_WRITE(' les scheme is '//trim(config_les_model))
+      DEBUG_WRITE(' les surface scheme is '//trim(config_les_surface))
 
       pr_inv = 1./prandtl
 
@@ -324,7 +327,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
 
       end do ! loop over all owned cells (columns)
 
-      if(debug_dissipation)  call mpas_log_write(' les_models ')
+      DEBUG_WRITE(' les_models ')
 
    end subroutine les_models
 
@@ -348,7 +351,7 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
       real (kind=RKIND), dimension(nVertLevels) :: theta, qvsw, temp, coefa
       logical :: dry_bv_frequency
 
-      if(debug_dissipation)  call mpas_log_write(' begin BV frequency calculations ')
+      DEBUG_WRITE(' begin BV frequency calculations ')
 
       do iCell = cellStart,cellEnd
 !DIR$ IVDEP
@@ -408,7 +411,7 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
 
       end do
 
-      if(debug_dissipation)  call mpas_log_write(' exiting BV frequency calculations ')
+      DEBUG_WRITE(' exiting BV frequency calculations ')
 
    end subroutine calculate_n2
 
@@ -506,13 +509,11 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       real (kind=RKIND) :: velocity_magnitude
       real (kind=RKIND) :: tau_12_factor
 
-      if(debug_dissipation) then
-         call mpas_log_write(' begin u_dissipation_3d ')
-         call mpas_log_write(' les model is '//trim(config_les_model))
-         call mpas_log_write(' les surface is '//trim(config_les_surface))
-         call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_mom_eddy_visc4/))
-         call mpas_log_write(' 4th order divergence factor is $r ', realArgs=(/config_del4u_div_factor/))
-      end if
+      DEBUG_WRITE(' begin u_dissipation_3d ')
+      DEBUG_WRITE(' les model is '//trim(config_les_model))
+      DEBUG_WRITE(' les surface is '//trim(config_les_surface))
+      DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_mom_eddy_visc4/))
+      DEBUG_WRITE(' 4th order divergence factor is $r ' COMMA realArgs=(/config_del4u_div_factor/))
 
 !$OMP BARRIER
 
@@ -720,7 +721,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
       end if
 
-      if(debug_dissipation)  call mpas_log_write(' exiting u_dissipation_3d ')
+      DEBUG_WRITE(' exiting u_dissipation_3d ')
 
    end subroutine u_dissipation_3d
 
@@ -797,12 +798,11 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
       ! First, storage to hold the result from the first del^2 computation.
       !  we copied code from the theta mixing, hence the theta* names.
 
-      if(debug_dissipation) then
-         call mpas_log_write(' begin w_dissipation_3d ')
-         call mpas_log_write(' les model is '//trim(config_les_model))
-         call mpas_log_write(' les surface is '//trim(config_les_surface))
-         call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_mom_eddy_visc4/))
-      end if
+
+      DEBUG_WRITE(' begin w_dissipation_3d ')
+      DEBUG_WRITE(' les model is '//trim(config_les_model))
+      DEBUG_WRITE(' les surface is '//trim(config_les_surface))
+      DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_mom_eddy_visc4/))
 
       delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0
 
@@ -885,7 +885,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
       end if
 
-      if(debug_dissipation)  call mpas_log_write(' exiting w_dissipation_3d ')
+      DEBUG_WRITE(' exiting w_dissipation_3d ')
 
    end subroutine w_dissipation_3d
 
@@ -988,12 +988,11 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       real (kind=RKIND) :: moisture_flux, heat_flux, theta_m_flux
       real (kind=RKIND) :: qv_cell, theta_m_cell, theta_cell
 
-      if(debug_dissipation) then
-         call mpas_log_write(' begin scalar_dissipation_3d ')
-         call mpas_log_write(' les model is '//trim(config_les_model))
-         call mpas_log_write(' les surface is '//trim(config_les_surface))
-         call mpas_log_write(' 4th order hyperviscosity is $r ', realArgs=(/h_theta_eddy_visc4/))
-      end if
+
+      DEBUG_WRITE(' begin scalar_dissipation_3d ')
+      DEBUG_WRITE(' les model is '//trim(config_les_model))
+      DEBUG_WRITE(' les surface is '//trim(config_les_surface))
+      DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_theta_eddy_visc4/))
 
       if( mix_scalars .and. (dynamics_substep == 1)) call mpas_log_write(' scalar mixing on ')
 
@@ -1260,7 +1259,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
       end if
 
-      if(debug_dissipation)  call mpas_log_write(' exiting scalar_dissipation_3d ')
+      DEBUG_WRITE(' exiting scalar_dissipation_3d ')
 
    end subroutine scalar_dissipation_3d_les
 

From 4b8771878107e0d6506fd2d26c1ada100b962609 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Fri, 5 Dec 2025 17:46:44 -0700
Subject: [PATCH 094/146] Use integer comparisons for config_les_model and
 config_les_surface

Rather than comparing strings, set up a local integer variable to indicate which
option was chosen at runtime.
---
 .../dynamics/mpas_atm_dissipation_models.F    | 124 ++++++++++++++++--
 1 file changed, 111 insertions(+), 13 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 4fd7000acd..f32cd70d98 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -29,6 +29,14 @@ module mpas_atm_dissipation_models
    real (kind=RKIND), parameter :: c_k = 0.25
 
 
+   integer, parameter :: LES_MODEL_NONE = 0, &
+                         LES_MODEL_3D_SMAGORINSKY = 1, &
+                         LES_MODEL_PROGNOSTIC_15_ORDER = 2
+
+   integer, parameter :: LES_SURFACE_NONE = 0, &
+                         LES_SURFACE_SPECIFIED = 1, &
+                         LES_SURFACE_VARYING = 2
+
 contains
 
 
@@ -160,11 +168,33 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
       logical, parameter :: test_tke=.true.
       ! real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
 
+      integer :: les_model_opt, les_surface_opt
+
 
       DEBUG_WRITE(' begin les_models ')
       DEBUG_WRITE(' les scheme is '//trim(config_les_model))
       DEBUG_WRITE(' les surface scheme is '//trim(config_les_surface))
 
+      if (trim(config_les_model) == 'none') then
+         les_model_opt = LES_MODEL_NONE
+      else if (trim(config_les_model) == '3d_smagorinsky') then
+         les_model_opt = LES_MODEL_3D_SMAGORINSKY
+      else if (trim(config_les_model) == 'prognostic_1.5_order') then
+         les_model_opt = LES_MODEL_PROGNOSTIC_15_ORDER
+      else
+         ! Error
+      end if
+
+      if (trim(config_les_surface) == 'none') then
+         les_surface_opt = LES_SURFACE_NONE
+      else if (trim(config_les_surface) == 'specified') then
+         les_surface_opt = LES_SURFACE_SPECIFIED
+      else if (trim(config_les_surface) == 'varying') then
+         les_surface_opt = LES_SURFACE_VARYING
+      else
+         ! Error
+      end if
+
       pr_inv = 1./prandtl
 
       ! set up coefficients for 4th-order horizontal background filter
@@ -240,7 +270,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
             d_23(k) = dwdy(k) + dvdz(k)
          end do
 
-         if (config_les_model == "3d_smagorinsky") then
+         if (les_model_opt == LES_MODEL_3D_SMAGORINSKY) then
 
             do k=1, nVertLevels
                def2 = 0.5*(d_11(k)**2 + d_22(k)**2 + d_33(k)**2) + d_12(k)**2 + d_13(k)**2 + d_23(k)**2
@@ -251,7 +281,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
                ! eddy_visc_vert(k,iCell) = eddy_visc_horz(k,iCell)
             end do
 
-         else if (config_les_model == "prognostic_1.5_order") then
+         else if (les_model_opt == LES_MODEL_PROGNOSTIC_15_ORDER) then
 
             do k=1,nVertLevels  ! bound the tke here, currently hardwired
                ! scalars(index_tke,k,iCell) = max(0.,min(100.,scalars(index_tke,k,iCell)))
@@ -509,12 +539,35 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       real (kind=RKIND) :: velocity_magnitude
       real (kind=RKIND) :: tau_12_factor
 
+      integer :: les_model_opt, les_surface_opt
+
+
       DEBUG_WRITE(' begin u_dissipation_3d ')
       DEBUG_WRITE(' les model is '//trim(config_les_model))
       DEBUG_WRITE(' les surface is '//trim(config_les_surface))
       DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_mom_eddy_visc4/))
       DEBUG_WRITE(' 4th order divergence factor is $r ' COMMA realArgs=(/config_del4u_div_factor/))
 
+      if (trim(config_les_model) == 'none') then
+         les_model_opt = LES_MODEL_NONE
+      else if (trim(config_les_model) == '3d_smagorinsky') then
+         les_model_opt = LES_MODEL_3D_SMAGORINSKY
+      else if (trim(config_les_model) == 'prognostic_1.5_order') then
+         les_model_opt = LES_MODEL_PROGNOSTIC_15_ORDER
+      else
+         ! Error
+      end if
+
+      if (trim(config_les_surface) == 'none') then
+         les_surface_opt = LES_SURFACE_NONE
+      else if (trim(config_les_surface) == 'specified') then
+         les_surface_opt = LES_SURFACE_SPECIFIED
+      else if (trim(config_les_surface) == 'varying') then
+         les_surface_opt = LES_SURFACE_VARYING
+      else
+         ! Error
+      end if
+
 !$OMP BARRIER
 
       ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).
@@ -522,7 +575,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
       delsq_u(1:nVertLevels,edgeStart:edgeEnd) = 0.0
       tau_12_factor = 0.0
-      if(config_les_model /= 'none') tau_12_factor = 1.0
+      if(les_model_opt /= LES_MODEL_NONE) tau_12_factor = 1.0
 
       do iEdge=edgeStart,edgeEnd
          cell1 = cellsOnEdge(1,iEdge)
@@ -674,7 +727,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
       end if  ! vertical mixing of horizontal momentum for les formulation
 
-      if ( config_les_model /= "none") then
+      if ( les_model_opt /= LES_MODEL_NONE ) then
 
          do iEdge=edgeSolveStart,edgeSolveEnd
 
@@ -696,11 +749,11 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(u(k,iEdge)-u(k-1,iEdge))
             end do
 
-            if( config_les_surface == "specified" ) then
+            if( les_surface_opt == LES_SURFACE_SPECIFIED ) then
                velocity_magnitude = sqrt(u(1,iEdge)**2 + v(1,iEdge)**2)
                turb_vflux(1) = -rho_edge(1,iEdge)*config_surface_drag_coefficient*u(1,iEdge)*velocity_magnitude
                turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
-            else if ( config_les_surface == "varying" ) then
+            else if ( les_surface_opt == LES_SURFACE_VARYING ) then
                ust_edge = 0.5*(ustm(cell1) + ustm(cell2))
                velocity_magnitude = max(sqrt(u(1,iEdge)**2 + v(1,iEdge)**2),0.1)
                turb_vflux(1) = -rho_edge(1,iEdge)*ust_edge*ust_edge*(u(1,iEdge)/velocity_magnitude)
@@ -791,6 +844,9 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
       integer :: cell1, cell2, iEdge, iCell, i, k
       real (kind=RKIND) :: r_areaCell, edge_sign, w_turb_flux
 
+      integer :: les_model_opt, les_surface_opt
+
+
 !  !OMP BARRIER  why is this openmp barrier here???
 
       ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (w) ).
@@ -804,6 +860,26 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
       DEBUG_WRITE(' les surface is '//trim(config_les_surface))
       DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_mom_eddy_visc4/))
 
+      if (trim(config_les_model) == 'none') then
+         les_model_opt = LES_MODEL_NONE
+      else if (trim(config_les_model) == '3d_smagorinsky') then
+         les_model_opt = LES_MODEL_3D_SMAGORINSKY
+      else if (trim(config_les_model) == 'prognostic_1.5_order') then
+         les_model_opt = LES_MODEL_PROGNOSTIC_15_ORDER
+      else
+         ! Error
+      end if
+
+      if (trim(config_les_surface) == 'none') then
+         les_surface_opt = LES_SURFACE_NONE
+      else if (trim(config_les_surface) == 'specified') then
+         les_surface_opt = LES_SURFACE_SPECIFIED
+      else if (trim(config_les_surface) == 'varying') then
+         les_surface_opt = LES_SURFACE_VARYING
+      else
+         ! Error
+      end if
+
       delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0
 
       do iCell=cellStart,cellEnd
@@ -865,7 +941,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
       end if
 
-      if ( config_les_model /= "none") then
+      if ( les_model_opt /= LES_MODEL_NONE ) then
 
          do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
             ! compute turbulent fluxes
@@ -988,12 +1064,34 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       real (kind=RKIND) :: moisture_flux, heat_flux, theta_m_flux
       real (kind=RKIND) :: qv_cell, theta_m_cell, theta_cell
 
+      integer :: les_model_opt, les_surface_opt
+
 
       DEBUG_WRITE(' begin scalar_dissipation_3d ')
       DEBUG_WRITE(' les model is '//trim(config_les_model))
       DEBUG_WRITE(' les surface is '//trim(config_les_surface))
       DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_theta_eddy_visc4/))
 
+      if (trim(config_les_model) == 'none') then
+         les_model_opt = LES_MODEL_NONE
+      else if (trim(config_les_model) == '3d_smagorinsky') then
+         les_model_opt = LES_MODEL_3D_SMAGORINSKY
+      else if (trim(config_les_model) == 'prognostic_1.5_order') then
+         les_model_opt = LES_MODEL_PROGNOSTIC_15_ORDER
+      else
+         ! Error
+      end if
+
+      if (trim(config_les_surface) == 'none') then
+         les_surface_opt = LES_SURFACE_NONE
+      else if (trim(config_les_surface) == 'specified') then
+         les_surface_opt = LES_SURFACE_SPECIFIED
+      else if (trim(config_les_surface) == 'varying') then
+         les_surface_opt = LES_SURFACE_VARYING
+      else
+         ! Error
+      end if
+
       if( mix_scalars .and. (dynamics_substep == 1)) call mpas_log_write(' scalar mixing on ')
 
       delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
@@ -1149,7 +1247,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
       end if
 
-      if ( config_les_model /= "none") then
+      if ( les_model_opt /= LES_MODEL_NONE ) then
 
          do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
             ! compute turbulent fluxes
@@ -1157,7 +1255,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
             turb_vflux(nVertlevels+1) = 0.  !  no turbulent flux out of the domain
             turb_vflux(1) = 0.  !  lower bc flux handled where ???
 
-            if ( config_les_model == "3d_smagorinsky") then
+            if ( les_model_opt == LES_MODEL_3D_SMAGORINSKY ) then
                do k=2,nVertLevels
                   prandtl_1d_inverse(k) = prandtl_inv
                end do
@@ -1194,16 +1292,16 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
             ! test boundary conditions for supercell and les test cases
 
-            if( config_les_surface == "specified" .or. config_les_surface == "varying" ) then
+            if( les_surface_opt == LES_SURFACE_SPECIFIED .or. les_surface_opt == LES_SURFACE_VARYING ) then
 
-               if( config_les_surface == "specified" ) then
+               if( les_surface_opt == LES_SURFACE_SPECIFIED ) then
                   moisture_flux = config_surface_moisture_flux
                   heat_flux = config_surface_heat_flux
 
 !                 place holder routine for time-varying specified
 !                 call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
 
-               else if ( config_les_surface == "varying" ) then
+               else if ( les_surface_opt == LES_SURFACE_VARYING ) then
                   heat_flux = hfx(iCell)/rho_zz(1,iCell)/cp
                   moisture_flux = qfx(iCell)/rho_zz(1,iCell)
                endif
@@ -1244,7 +1342,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   end do
                end do
 
-               if( config_les_surface == "specified" .or.  config_les_surface == "varying" ) turb_vflux_scalars(index_qv,1) = moisture_flux ! lower b.c. for qv
+               if( les_surface_opt == LES_SURFACE_SPECIFIED .or. les_surface_opt == LES_SURFACE_VARYING ) turb_vflux_scalars(index_qv,1) = moisture_flux ! lower b.c. for qv
 
                do k=1,nVertLevels
                   do iScalar=1,num_scalars

From 8a61b88b31f0312db487c3705999a230bbb8cf4b Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Tue, 20 Jan 2026 17:15:05 -0700
Subject: [PATCH 095/146] Add 'lbc_tke' to the 'lbc_scalars' var_array in
 atmosphere core Registry.xml

---
 src/core_atmosphere/Registry.xml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 2264cf1fd5..68b0952e9b 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -1722,9 +1722,6 @@
                              description="Rain number concentration"
                              packages="mp_thompson_in;mp_thompson_aers_in"/>
 
-                        <var name="tke" array_group="turbulence" units="m^2 s^{-2}"
-                             description="Turbulent kinetic energy for the prognostic tke LES scheme"/>
-
                         <var name="nc" array_group="number" units="nb kg^{-1}"
                              description="Cloud water number concentration"
                              packages="mp_thompson_aers_in"/>
@@ -1736,6 +1733,9 @@
                         <var name="nwfa" array_group="number" units="nb kg^{-1}"
                              description="Water-friendly aerosol number concentration"
                              packages="mp_thompson_aers_in"/>
+
+                        <var name="tke" array_group="turbulence" units="m^2 s^{-2}"
+                             description="Turbulent kinetic energy for the prognostic tke LES scheme"/>
                 </var_array>
 #endif
 
@@ -2104,7 +2104,7 @@
                              packages="mp_thompson_aers_in"/>
 
                         <var name="tend_tke" name_in_code="tke" array_group="turbulence" units="kg m^{-3} m^2 s^{-2} s^{-1}" 
-                             description="Tendency of tke  multiplied by dry air density divided by d(zeta)/dz"/>
+                             description="Tendency of tke multiplied by dry air density divided by d(zeta)/dz"/>
                 </var_array>
 #endif
         </var_struct>
@@ -2183,6 +2183,10 @@
                         <var name="lbc_nwfa" name_in_code="nwfa" array_group="number" packages="mp_thompson_aers_in"
                              units="m^{-3} s^{-1}"
                              description="Lateral boundary tendency of water-friendly aerosol number concentration"/>
+
+                        <var name="lbc_tke" name_in_code="tke" array_group="turbulence"
+                             units="m^2 s^{-3}"
+                             description="Lateral boundary tendency of TKE"/>
                 </var_array>
         </var_struct>
 

From 94d24886d6bd29262b253e09ce3512bb3efd5f29 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Wed, 10 Dec 2025 15:15:35 -0700
Subject: [PATCH 096/146] Initial pass at porting mpas_atm_dissipation_models
 to GPUs with OpenACC

Results with the NVHPC 25.9 compilers are bit-identical between CPU and GPU runs
when compiling with the additional flags -Mnofma -gpu=math_uniform .
---
 .../dynamics/mpas_atm_dissipation_models.F    | 314 ++++++++++++++++--
 .../dynamics/mpas_atm_time_integration.F      |  97 +++++-
 2 files changed, 369 insertions(+), 42 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index f32cd70d98..256aec6019 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -71,12 +71,25 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
 
       DEBUG_WRITE(' begin smagorinsky_2d ')
 
+      !$acc enter data create(dudx, dudy, dvdx, dvdy)
+      !$acc enter data create(d_11, d_22, d_12)
+
+      !$acc parallel default(present)
+
+      !$acc loop gang worker private(dudx, dudy, dvdx, dvdy, d_11, d_22, d_12)
       do iCell = cellStart,cellEnd
-         dudx(1:nVertLevels) = 0.0
-         dudy(1:nVertLevels) = 0.0
-         dvdx(1:nVertLevels) = 0.0
-         dvdy(1:nVertLevels) = 0.0
+
+         !$acc loop vector
+         do k = 1, nVertLevels
+            dudx(k) = 0.0_RKIND
+            dudy(k) = 0.0_RKIND
+            dvdx(k) = 0.0_RKIND
+            dvdy(k) = 0.0_RKIND
+         end do
+
+         !$acc loop seq
          do iEdge=1,nEdgesOnCell(iCell)
+            !$acc loop vector
             do k=1,nVertLevels
                dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
                                  - deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
@@ -90,6 +103,7 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
          end do
 
 !DIR$ IVDEP
+         !$acc loop vector
          do k=1, nVertLevels
             ! here is the Smagorinsky formulation,
             ! followed by imposition of an upper bound on the eddy viscosity
@@ -101,6 +115,11 @@ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,
          end do
       end do
 
+      !$acc end parallel
+
+      !$acc exit data delete(dudx, dudy, dvdx, dvdy)
+      !$acc exit data delete(d_11, d_22, d_12)
+
       h_mom_eddy_visc4   = config_visc4_2dsmag * config_len_disp**3
       h_theta_eddy_visc4 = h_mom_eddy_visc4
 
@@ -195,6 +214,9 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
          ! Error
       end if
 
+      !$acc enter data create(dudx, dudy, dvdx, dvdy, dwdx, dwdy, dudz, dvdz, dwdz)
+      !$acc enter data create(d_11, d_22, d_33, d_12, d_13, d_23)
+
       pr_inv = 1./prandtl
 
       ! set up coefficients for 4th-order horizontal background filter
@@ -202,25 +224,37 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
       h_mom_eddy_visc4   = config_visc4_2dsmag * config_len_disp**3
       h_theta_eddy_visc4 = h_mom_eddy_visc4
 
+      !$acc parallel default(present)
+
+      !$acc loop gang worker private(dudx, dudy, dvdx, dvdy, dwdx, dwdy, dudz, dvdz, dwdz, d_11, d_22, d_33, d_12, d_13, d_23)
       do iCell = cellStart,cellEnd
 
-         dudx(1:nVertLevels) = 0.0
-         dudy(1:nVertLevels) = 0.0
-         dvdx(1:nVertLevels) = 0.0
-         dvdy(1:nVertLevels) = 0.0
-         dwdx(1:nVertLevels+1) = 0.0
-         dwdy(1:nVertLevels+1) = 0.0
+         !$acc loop vector
+         do k = 1, nVertLevels
+            dudx(k) = 0.0_RKIND
+            dudy(k) = 0.0_RKIND
+            dvdx(k) = 0.0_RKIND
+            dvdy(k) = 0.0_RKIND
 
-         dudz(1:nVertLevels) = 0.0
-         dvdz(1:nVertLevels) = 0.0
-         dwdz(1:nVertLevels) = 0.0
+            dudz(k) = 0.0_RKIND
+            dvdz(k) = 0.0_RKIND
+            dwdz(k) = 0.0_RKIND
+         end do
+
+         !$acc loop vector
+         do k = 1, nVertLevels+1
+            dwdx(k) = 0.0_RKIND
+            dwdy(k) = 0.0_RKIND
+         end do
 
+         !$acc loop seq
          do iEdge=1,nEdgesOnCell(iCell)
 
             ie = EdgesOnCell(iEdge,iCell)
             cell1 = cellsOnEdge(1,ie)
             cell2 = cellsOnEdge(2,ie)
 
+            !$acc loop vector
             do k=1,nVertLevels
                dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,ie) &
                                  - deformation_coef_cs(iEdge,iCell)*v(k,ie)
@@ -232,6 +266,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
                                  + deformation_coef_cs(iEdge,iCell)*v(k,ie)
             end do
 
+            !$acc loop vector
             do k=1,nVertLevels+1
                wk = 0.5*(w(k,cell1)+w(k,cell2))
                dwdx(k) = dwdx(k) + deformation_coef_c(iEdge,iCell)*wk
@@ -240,11 +275,13 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
 
          end do
 
+         !$acc loop vector
          do k=1,nVertLevels
             rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
             dwdz(k) = (w(k+1,iCell)-w(k,iCell))*rdz
          end do
 
+         !$acc loop vector
          do k=2,nVertLevels-1
             rdz = 1./(zgrid(k+2,iCell)+zgrid(k+1,iCell)-zgrid(k,iCell)-zgrid(k-1,iCell))
             dudz(k) = (uCell(k+1,iCell)-uCell(k-1,iCell))*rdz
@@ -261,6 +298,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
          dudz(k+1) = (uCell(k+1,iCell)-uCell(k,iCell))*rdz
          dvdz(k+1) = (vCell(k+1,iCell)-vCell(k,iCell))*rdz
  
+         !$acc loop vector
          do k=1, nVertLevels
             d_11(k) = 2.*dudx(k)
             d_22(k) = 2.*dvdy(k)
@@ -272,6 +310,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
 
          if (les_model_opt == LES_MODEL_3D_SMAGORINSKY) then
 
+            !$acc loop vector
             do k=1, nVertLevels
                def2 = 0.5*(d_11(k)**2 + d_22(k)**2 + d_33(k)**2) + d_12(k)**2 + d_13(k)**2 + d_23(k)**2
                eddy_visc_horz(k,iCell) = (c_s * config_len_disp)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
@@ -283,11 +322,13 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
 
          else if (les_model_opt == LES_MODEL_PROGNOSTIC_15_ORDER) then
 
+            !$acc loop vector
             do k=1,nVertLevels  ! bound the tke here, currently hardwired
                ! scalars(index_tke,k,iCell) = max(0.,min(100.,scalars(index_tke,k,iCell)))
                scalars(index_tke,k,iCell) = max(0.,scalars(index_tke,k,iCell))
             end do
 
+            !$acc loop vector
             do k=1,nVertLevels
 
                delta_z = zgrid(k+1,iCell)-zgrid(k,iCell)
@@ -351,12 +392,17 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
 
          else
 
-            call mpas_log_write(' in les_models, no les scheme for '//trim(config_les_model), messageType=MPAS_LOG_CRIT)
+!MGD            call mpas_log_write(' in les_models, no les scheme for '//trim(config_les_model), messageType=MPAS_LOG_CRIT)
 
          end if ! end of config_les_model test
 
       end do ! loop over all owned cells (columns)
 
+      !$acc end parallel
+
+      !$acc exit data delete(dudx, dudy, dvdx, dvdy, dwdx, dwdy, dudz, dvdz, dwdz)
+      !$acc exit data delete(d_11, d_22, d_33, d_12, d_13, d_23)
+
       DEBUG_WRITE(' les_models ')
 
    end subroutine les_models
@@ -381,10 +427,18 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
       real (kind=RKIND), dimension(nVertLevels) :: theta, qvsw, temp, coefa
       logical :: dry_bv_frequency
 
+
       DEBUG_WRITE(' begin BV frequency calculations ')
 
+      !$acc enter data create(theta, temp, qvsw, coefa)
+
+      !$acc parallel default(present)
+
+      !$acc loop gang worker private(theta, temp, qvsw, coefa)
       do iCell = cellStart,cellEnd
+
 !DIR$ IVDEP
+         !$acc loop vector
          do k=1, nVertLevels
 
             theta(k) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell))
@@ -401,6 +455,7 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
 
          end do
 
+         !$acc loop vector
          do k=2, nVertLevels-1
             dz = 0.5 * (zgrid(k+2,iCell)+zgrid(k+1,iCell)) - 0.5 * (zgrid(k,iCell)+zgrid(k-1,iCell))
             rdz = 1.0/dz
@@ -441,6 +496,10 @@ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, in
 
       end do
 
+      !$acc end parallel
+
+      !$acc exit data delete(theta, temp, qvsw, coefa)
+
       DEBUG_WRITE(' exiting BV frequency calculations ')
 
    end subroutine calculate_n2
@@ -573,10 +632,15 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).
       ! First, storage to hold the result from the first del^2 computation.
 
-      delsq_u(1:nVertLevels,edgeStart:edgeEnd) = 0.0
+      !$acc enter data create(u_mix)
+      !$acc enter data create(turb_vflux)
+
+      !$acc parallel default(present)
+
       tau_12_factor = 0.0
       if(les_model_opt /= LES_MODEL_NONE) tau_12_factor = 1.0
 
+      !$acc loop gang worker
       do iEdge=edgeStart,edgeEnd
          cell1 = cellsOnEdge(1,iEdge)
          cell2 = cellsOnEdge(2,iEdge)
@@ -585,7 +649,13 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
          r_dc = invDcEdge(iEdge)
          r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
 
+         !$acc loop vector
+         do k = 1, nVertLevels
+            delsq_u(k,iEdge) = 0.0_RKIND
+         end do
+
 !DIR$ IVDEP
+         !$acc loop vector
          do k=1,nVertLevels
 
             ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
@@ -607,35 +677,61 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
          end do
       end do
 
+      !$acc end parallel
+
       if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
 !$OMP BARRIER
 
+         !$acc parallel default(present)
+
+         !$acc loop gang worker
          do iVertex=vertexStart,vertexEnd
-            delsq_vorticity(1:nVertLevels,iVertex) = 0.0
+            !$acc loop vector
+            do k = 1, nVertLevels
+               delsq_vorticity(k,iVertex) = 0.0_RKIND
+            end do
+
+            !$acc loop seq
             do i=1,vertexDegree
                iEdge = edgesOnVertex(i,iVertex)
                edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge) * edgesOnVertex_sign(i,iVertex)
+
+               !$acc loop vector
                do k=1,nVertLevels
                   delsq_vorticity(k,iVertex) = delsq_vorticity(k,iVertex) + edge_sign * delsq_u(k,iEdge)
                end do
             end do
          end do
 
+         !$acc loop gang worker
          do iCell=cellStart,cellEnd
-            delsq_divergence(1:nVertLevels,iCell) = 0.0
+            !$acc loop vector
+            do k = 1, nVertLevels
+               delsq_divergence(k,iCell) = 0.0_RKIND
+            end do
+
             r = invAreaCell(iCell)
+
+            !$acc loop seq
             do i=1,nEdgesOnCell(iCell)
                iEdge = edgesOnCell(i,iCell)
                edge_sign = r * dvEdge(iEdge) * edgesOnCell_sign(i,iCell)
+
+               !$acc loop vector
                do k=1,nVertLevels
                   delsq_divergence(k,iCell) = delsq_divergence(k,iCell) + edge_sign * delsq_u(k,iEdge)
                end do
             end do
          end do
 
+         !$acc end parallel
+
 !$OMP BARRIER
 
+         !$acc parallel default(present)
+
+         !$acc loop gang worker
          do iEdge=edgeSolveStart,edgeSolveEnd
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
@@ -647,6 +743,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
             r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
 
 !DIR$ IVDEP
+            !$acc loop vector
             do k=1,nVertLevels
 
                ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
@@ -663,6 +760,8 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
             end do
          end do
 
+         !$acc end parallel
+
       end if ! 4th order mixing is active
 
       !
@@ -672,11 +771,15 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
 
          if (config_mix_full) then  ! mix full state
 
+            !$acc parallel default(present)
+
+            !$acc loop gang worker
             do iEdge=edgeSolveStart,edgeSolveEnd
 
                cell1 = cellsOnEdge(1,iEdge)
                cell2 = cellsOnEdge(2,iEdge)
 
+               !$acc loop vector
                do k=2,nVertLevels-1
 
                   z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
@@ -694,18 +797,25 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                end do
             end do
 
+            !$acc end parallel
+
          else  ! idealized cases where we mix on the perturbation from the initial 1-D state
 
+            !$acc parallel default(present)
+
+            !$acc loop gang worker private(u_mix)
             do iEdge=edgeSolveStart,edgeSolveEnd
 
                cell1 = cellsOnEdge(1,iEdge)
                cell2 = cellsOnEdge(2,iEdge)
 
+               !$acc loop vector
                do k=1,nVertLevels
                   u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) &
                                         + v_init(k) * sin( angleEdge(iEdge) )
                end do
 
+               !$acc loop vector
                do k=2,nVertLevels-1
 
                   z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
@@ -723,19 +833,25 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                end do
             end do
 
+            !$acc end parallel
+
          end if  ! mix perturbation state
 
       end if  ! vertical mixing of horizontal momentum for les formulation
 
       if ( les_model_opt /= LES_MODEL_NONE ) then
 
+         !$acc parallel default(present)
+
+         !$acc loop gang worker private(turb_vflux)
          do iEdge=edgeSolveStart,edgeSolveEnd
 
             cell1 = cellsOnEdge(1,iEdge)
             cell2 = cellsOnEdge(2,iEdge)
-            turb_vflux(nVertlevels+1) = 0.  !  no turbulent flux out of the domain
-            turb_vflux(1) = 0.  !  lower bc flux handled where ???
+            turb_vflux(nVertlevels+1) = 0.0_RKIND  !  no turbulent flux out of the domain
+            turb_vflux(1) = 0.0_RKIND  !  lower bc flux handled where ???
 
+            !$acc loop vector
             do k=2,nVertLevels
                rho_k_cell1 =  fzm(k)*rho_zz(k  ,cell1)*zz(k  ,cell1)*eddy_visc_vert(k  ,cell1) &
                              +fzp(k)*rho_zz(k-1,cell1)*zz(k-1,cell1)*eddy_visc_vert(k-1,cell1)
@@ -766,14 +882,20 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                ! end test conditions
             end if
 
+            !$acc loop vector
             do k=1,nVertLevels
                tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
             end do
 
          end do
 
+         !$acc end parallel
+
       end if
 
+      !$acc exit data delete(turb_vflux)
+      !$acc exit data delete(u_mix)
+
       DEBUG_WRITE(' exiting u_dissipation_3d ')
 
    end subroutine u_dissipation_3d
@@ -880,11 +1002,26 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
          ! Error
       end if
 
-      delsq_w(1:nVertLevels,cellStart:cellEnd) = 0.0
+      !$acc enter data create(turb_vflux)
 
+      !$acc parallel default(present)
+
+      !$acc loop gang worker
       do iCell=cellStart,cellEnd
-         tend_w_euler(1:nVertLevels+1,iCell) = 0.0
+
+         !$acc loop vector
+         do k = 1, nVertLevels
+            delsq_w(k,iCell) = 0.0_RKIND
+         end do
+
+         !$acc loop vector
+         do k = 1, nVertLevels+1
+            tend_w_euler(k,iCell) = 0.0_RKIND
+         end do
+
          r_areaCell = invAreaCell(iCell)
+
+         !$acc loop seq
          do i=1,nEdgesOnCell(iCell)
             iEdge = edgesOnCell(i,iCell)
 
@@ -894,6 +1031,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
             cell2 = cellsOnEdge(2,iEdge)
 
 !DIR$ IVDEP
+            !$acc loop vector
             do k=2,nVertLevels
 
                w_turb_flux =  edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
@@ -906,12 +1044,20 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
          end do
       end do
 
+      !$acc end parallel
+
 !$OMP BARRIER
 
       if (h_mom_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
+         !$acc parallel default(present)
+
+         !$acc loop gang worker
          do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
+
             r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell)
+
+            !$acc loop seq
             do i=1,nEdgesOnCell(iCell)
                iEdge = edgesOnCell(i,iCell)
                cell1 = cellsOnEdge(1,iEdge)
@@ -919,6 +1065,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
                edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge)
 
+               !$acc loop vector
                do k=2,nVertLevels
                   tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1))
                end do
@@ -926,12 +1073,18 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
             end do
          end do
 
+         !$acc end parallel
+
       end if ! 4th order mixing is active
 
       if ( v_mom_eddy_visc2 > 0.0 ) then  ! vertical mixing
 
+         !$acc parallel default(present)
+
+         !$acc loop gang worker
          do iCell=cellSolveStart,cellSolveEnd
 !DIR$ IVDEP
+            !$acc loop vector
             do k=2,nVertLevels
                tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*(  &
                                            (w(k+1,iCell)-w(k  ,iCell))*rdzw(k)                              &
@@ -939,12 +1092,19 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
             end do
          end do
 
+         !$acc end parallel
+
       end if
 
       if ( les_model_opt /= LES_MODEL_NONE ) then
 
+         !$acc parallel default(present)
+
+         !$acc loop gang worker private(turb_vflux)
          do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
             ! compute turbulent fluxes
+     
+            !$acc loop vector
             do k=1,nVertLevels
                turb_vflux(k) = - rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*(      &
                                           2.0*zz(k,iCell)*rdzw(k)*(w(k+1,iCell)-w(k,iCell))  &
@@ -953,14 +1113,19 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
             turb_vflux(nVertLevels+1) = 0.0
 
+            !$acc loop vector
             do k=2,nVertLevels
                tend_w_euler(k,iCell) = tend_w_euler(k,iCell) &
                                            - rdzu(k)*(turb_vflux(k)-turb_vflux(k-1))
             end do
          end do
 
+         !$acc end parallel
+
       end if
 
+      !$acc exit data delete(turb_vflux)
+
       DEBUG_WRITE(' exiting w_dissipation_3d ')
 
    end subroutine w_dissipation_3d
@@ -1059,7 +1224,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
       real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux, prandtl_1d_inverse
       real (kind=RKIND), dimension(num_scalars,nVertLevels+1) :: turb_vflux_scalars
-      real (kind=RKIND) :: rho_k_at_w, zz_at_w
+      real (kind=RKIND), dimension(nVertLevels) :: rho_k_at_w, zz_at_w
 
       real (kind=RKIND) :: moisture_flux, heat_flux, theta_m_flux
       real (kind=RKIND) :: qv_cell, theta_m_cell, theta_cell
@@ -1094,11 +1259,23 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
       if( mix_scalars .and. (dynamics_substep == 1)) call mpas_log_write(' scalar mixing on ')
 
-      delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
+      !$acc enter data create(turb_vflux_scalars)
+      !$acc enter data create(turb_vflux, prandtl_1d_inverse)
 
+      !$acc parallel default(present)
+
+      !$acc loop gang worker
       do iCell=cellStart,cellEnd
-         tend_theta_euler(1:nVertLevels,iCell) = 0.0
+
+         !$acc loop vector
+         do k = 1, nVertLevels
+            delsq_theta(k,iCell) = 0.0_RKIND
+            tend_theta_euler(k,iCell) = 0.0_RKIND
+         end do
+
          r_areaCell = invAreaCell(iCell)
+
+         !$acc loop seq
          do i=1,nEdgesOnCell(iCell)
             iEdge = edgesOnCell(i,iCell)
             edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
@@ -1107,6 +1284,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
             cell2 = cellsOnEdge(2,iEdge)
 
 !DIR$ IVDEP
+            !$acc loop vector
             do k=1,nVertLevels
 
 !  we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below.
@@ -1121,12 +1299,19 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
          end do
       end do
 
+      !$acc end parallel
+
 !$OMP BARRIER
 
       if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
+         !$acc parallel default(present)
+
+         !$acc loop gang worker
          do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
             r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
+
+            !$acc loop seq
             do i=1,nEdgesOnCell(iCell)
 
                iEdge = edgesOnCell(i,iCell)
@@ -1135,23 +1320,34 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                cell1 = cellsOnEdge(1,iEdge)
                cell2 = cellsOnEdge(2,iEdge)
 
+               !$acc loop vector
                do k=1,nVertLevels
                   tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
                end do
             end do
          end do
 
+         !$acc end parallel
+
       end if ! 4th order mixing is active
 
       if(mix_scalars .and. (dynamics_substep == 1)) then  ! dissipation for scalars, including 4th-order filter.  Likely needs optimization
 
          do iScalar=1,num_scalars
 
-            delsq_theta(1:nVertLevels,cellStart:cellEnd) = 0.0
+            !$acc parallel default(present)
 
+            !$acc loop gang worker
             do iCell=cellStart,cellEnd
+               !$acc loop vector
+               do k = 1, nVertLevels
+                  delsq_theta(k,iCell) = 0.0_RKIND
+               end do
+
                ! tend_theta_euler(1:nVertLevels,iCell) = 0.0
                r_areaCell = invAreaCell(iCell)
+
+               !$acc loop seq
                do i=1,nEdgesOnCell(iCell)
                   iEdge = edgesOnCell(i,iCell)
                   edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
@@ -1160,6 +1356,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   cell2 = cellsOnEdge(2,iEdge)
 
 !DIR$ IVDEP
+                  !$acc loop vector
                   do k=1,nVertLevels
 
 !  we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below.
@@ -1174,12 +1371,20 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                end do
             end do
 
+            !$acc end parallel
+
 !$OMP BARRIER
 
             if (h_theta_eddy_visc4 > 0.0) then  ! 4th order mixing is active
 
+               !$acc parallel default(present)
+
+               !$acc loop gang worker
                do iCell=cellSolveStart,cellSolveEnd    ! Technically updating fewer cells than before...
+
                   r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
+
+                  !$acc loop seq
                   do i=1,nEdgesOnCell(iCell)
 
                      iEdge = edgesOnCell(i,iCell)
@@ -1188,12 +1393,15 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                      cell1 = cellsOnEdge(1,iEdge)
                      cell2 = cellsOnEdge(2,iEdge)
 
+                     !$acc loop vector
                      do k=1,nVertLevels
                         tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
                      end do
                   end do
                end do
 
+               !$acc end parallel
+
             end if ! 4th order mixing is active
 
          end do ! loop over scalars for horizontal mixing
@@ -1207,7 +1415,12 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
          if (config_mix_full) then
 
+            !$acc parallel default(present)
+
+            !$acc loop gang worker
             do iCell = cellSolveStart,cellSolveEnd
+
+               !$acc loop vector
                do k=2,nVertLevels-1
                   z1 = zgrid(k-1,iCell)
                   z2 = zgrid(k  ,iCell)
@@ -1224,9 +1437,16 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                end do
             end do
 
+            !$acc end parallel
+
          else  ! idealized cases where we mix on the perturbation from the initial 1-D state
 
+            !$acc parallel default(present)
+
+            !$acc loop gang worker
             do iCell = cellSolveStart,cellSolveEnd
+
+               !$acc loop vector
                do k=2,nVertLevels-1
                   z1 = zgrid(k-1,iCell)
                   z2 = zgrid(k  ,iCell)
@@ -1243,12 +1463,17 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                end do
             end do
 
+            !$acc end parallel
+
          end if
 
       end if
 
       if ( les_model_opt /= LES_MODEL_NONE ) then
 
+         !$acc parallel default(present)
+
+         !$acc loop gang worker private(turb_vflux, turb_vflux_scalars, prandtl_1d_inverse, rho_k_at_w, zz_at_w)
          do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
             ! compute turbulent fluxes
 
@@ -1256,6 +1481,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
             turb_vflux(1) = 0.  !  lower bc flux handled where ???
 
             if ( les_model_opt == LES_MODEL_3D_SMAGORINSKY ) then
+               !$acc loop vector
                do k=2,nVertLevels
                   prandtl_1d_inverse(k) = prandtl_inv
                end do
@@ -1271,6 +1497,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                !   prandtl_inverse(k) = 1. + 2.*tke_length/delta_z
                ! end do
 
+               !$acc loop vector
                do k=2,nVertLevels
                   ! prandtl_1d_inverse(k) = 0.5*(prandtl_3d_inv(k,iCell)+prandtl_3d_inv(k-1,iCell))
                   prandtl_1d_inverse(k) = fzm(k)*prandtl_3d_inv(k,iCell)+fzp(k)*prandtl_3d_inv(k-1,iCell)
@@ -1278,16 +1505,17 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
             end if
 
+            !$acc loop vector
             do k=2,nVertLevels
 
                ! delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell))
                ! delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
                ! bv_frequency2 = 0.5*(bv_freq2(k)+bv_freq(k-1))
                ! bv = max( sqrt(abs(bv_frequency2)), epsilon_bv )
-               rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
-                            +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
-               zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
-               turb_vflux(k) = - prandtl_1d_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
+               rho_k_at_w(k) =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
+                             +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
+               zz_at_w(k) = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
+               turb_vflux(k) = - prandtl_1d_inverse(k)*rho_k_at_w(k)*zz_at_w(k)*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
             end do
 
             ! test boundary conditions for supercell and les test cases
@@ -1322,6 +1550,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
             end if
 
+            !$acc loop vector
             do k=1,nVertLevels
                tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) &
                                            - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
@@ -1330,20 +1559,30 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
             if (mix_scalars ) then
 
                ! compute turbulent fluxes
-               turb_vflux_scalars(:,nVertlevels+1) = 0.  !  no turbulent flux out of the domain
-               turb_vflux_scalars(:,1) = 0.  !  lower bc flux handled where ???
+               !$acc loop vector
+               do iScalar=1,num_scalars
+                  turb_vflux_scalars(iScalar,nVertlevels+1) = 0.0_RKIND  !  no turbulent flux out of the domain
+                  turb_vflux_scalars(iScalar,1) = 0.0_RKIND  !  lower bc flux handled where ???
+               end do
+
+               !$acc loop vector
+               do k=2,nVertLevels
+                  rho_k_at_w(k) = fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
+                                + fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
+                  zz_at_w(k) = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
+               end do
+
+               !$acc loop vector collapse(2)
                do k=2,nVertLevels
-                  rho_k_at_w =  fzm(k)*rho_zz(k  ,iCell)*zz(k  ,iCell)*zz(k  ,iCell)*eddy_visc_vert(k  ,iCell) &
-                               +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
-                  zz_at_w = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
                   do iScalar=1,num_scalars
-                     turb_vflux_scalars(iScalar,k) = - prandtl_1d_inverse(k)*rho_k_at_w*zz_at_w*rdzu(k)*  &
+                     turb_vflux_scalars(iScalar,k) = - prandtl_1d_inverse(k)*rho_k_at_w(k)*zz_at_w(k)*rdzu(k)*  &
                                                       (scalars(iScalar,k,iCell)-scalars(iScalar,k-1,iCell))
                   end do
                end do
 
                if( les_surface_opt == LES_SURFACE_SPECIFIED .or. les_surface_opt == LES_SURFACE_VARYING ) turb_vflux_scalars(index_qv,1) = moisture_flux ! lower b.c. for qv
 
+               !$acc loop vector collapse(2)
                do k=1,nVertLevels
                   do iScalar=1,num_scalars
                      tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) &
@@ -1355,8 +1594,13 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
          end do  ! loop over cells (columns)
 
+         !$acc end parallel
+
       end if
 
+      !$acc exit data delete(turb_vflux_scalars)
+      !$acc exit data delete(turb_vflux, prandtl_1d_inverse)
+
       DEBUG_WRITE(' exiting scalar_dissipation_3d ')
 
    end subroutine scalar_dissipation_3d_les
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index f54804cc93..3d79ef059c 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -273,6 +273,11 @@ subroutine mpas_atm_dynamics_init(domain)
       real (kind=RKIND), dimension(:), pointer :: angleEdge
       real (kind=RKIND), dimension(:), pointer :: meshScalingDel2
       real (kind=RKIND), dimension(:), pointer :: meshScalingDel4
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s2
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_cs
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s
 #endif
 
 #ifdef MPAS_CAM_DYCORE
@@ -457,6 +462,21 @@ subroutine mpas_atm_dynamics_init(domain)
 
       call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4)
       !$acc enter data copyin(meshScalingDel4)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2)
+      !$acc enter data copyin(deformation_coef_c2)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2)
+      !$acc enter data copyin(deformation_coef_s2)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs)
+      !$acc enter data copyin(deformation_coef_cs)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c)
+      !$acc enter data copyin(deformation_coef_c)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s)
+      !$acc enter data copyin(deformation_coef_s)
 #endif
 
    end subroutine mpas_atm_dynamics_init
@@ -547,6 +567,11 @@ subroutine mpas_atm_dynamics_finalize(domain)
       real (kind=RKIND), dimension(:), pointer :: angleEdge
       real (kind=RKIND), dimension(:), pointer :: meshScalingDel2
       real (kind=RKIND), dimension(:), pointer :: meshScalingDel4
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s2
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_cs
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c
+      real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s
 #endif
 
 
@@ -732,6 +757,21 @@ subroutine mpas_atm_dynamics_finalize(domain)
 
       call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4)
       !$acc exit data delete(meshScalingDel4)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2)
+      !$acc exit data delete(deformation_coef_c2)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2)
+      !$acc exit data delete(deformation_coef_s2)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs)
+      !$acc exit data delete(deformation_coef_cs)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c)
+      !$acc exit data delete(deformation_coef_c)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s)
+      !$acc exit data delete(deformation_coef_s)
 #endif
 
    end subroutine mpas_atm_dynamics_finalize
@@ -5226,8 +5266,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension( nVertLevels + 1 ) :: wduz, wdwz, wdtz, dpzx
       real (kind=RKIND), dimension( nVertLevels ) :: ru_edge_w, q, u_mix
 
-      real (kind=RKIND), dimension( nVertLevels+1 ) :: d_11, d_22, d_12
-      real (kind=RKIND), dimension( nVertLevels+1 ) :: dudx, dudy, dvdx, dvdy
+!      real (kind=RKIND), dimension( nVertLevels+1 ) :: d_11, d_22, d_12
+!      real (kind=RKIND), dimension( nVertLevels+1 ) :: dudx, dudy, dvdx, dvdy
 
       real (kind=RKIND) :: theta_turb_flux, w_turb_flux, r
       real (kind=RKIND) :: scalar_weight
@@ -5263,6 +5303,13 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
 
       MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]')
+      if (perturbation_coriolis) then
+         !$acc enter data copyin(u_init, v_init)
+      end if
+      if (trim(config_les_model) /= 'none') then
+         !$acc enter data copyin(exner, pressure_b, bn2)
+      end if
+      !$acc enter data copyin(ustm, hfx, qfx)
       if (rk_step == 1) then
          !$acc enter data create(tend_w_euler)
          !$acc enter data create(tend_u_euler)
@@ -5274,7 +5321,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          !$acc enter data copyin(rb, rr_save)
          !$acc enter data copyin(divergence, vorticity)
          !$acc enter data copyin(v)
-         !$acc enter data copyin(u_init, v_init)
       else
          !$acc enter data copyin(tend_w_euler)
          !$acc enter data copyin(tend_u_euler)
@@ -5297,9 +5343,18 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       !$acc enter data copyin(rw_save, rt_diabatic_tend)
       !$acc enter data create(rthdynten)
       !$acc enter data copyin(t_init)
+      if (trim(config_les_model) /= 'none') then
+         !$acc enter data copyin(ur_cell, vr_cell)
+      else
 #ifdef CURVATURE
-      !$acc enter data copyin(ur_cell, vr_cell)
+         !$acc enter data copyin(ur_cell, vr_cell)
 #endif
+      end if
+      !$acc enter data create(eddy_visc_horz)
+      !$acc enter data create(eddy_visc_vert)
+      !$acc enter data create(prandtl_3d_inv)
+      !$acc enter data copyin(scalars)
+      !$acc enter data copyin(tend_scalars)
       MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]')
 
       prandtl_inv = 1.0_RKIND / prandtl
@@ -5336,7 +5391,16 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
             else if(config_horiz_mixing == "2d_fixed") then
 
-               eddy_visc_horz(1:nVertLevels,cellStart:cellEnd) = config_h_theta_eddy_visc2
+               !$acc parallel default(present)
+               !$acc loop gang worker
+               do iCell = cellStart, cellEnd
+                  !$acc loop vector
+                  do k = 1, nVertLevels
+                      eddy_visc_horz(k,iCell) = config_h_theta_eddy_visc2
+                  end do
+               end do
+               !$acc end parallel
+
                h_mom_eddy_visc4 = config_h_mom_eddy_visc4
                h_theta_eddy_visc4 = config_h_theta_eddy_visc4
 
@@ -5520,8 +5584,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          end do
 
          if (perturbation_coriolis) then  !  this is correct only for constant f
+            !$acc loop seq
             do j = 1,nEdgesOnEdge(iEdge)
                eoe = edgesOnEdge(j,iEdge)
+
+               !$acc loop vector
                do k=1,nVertLevels
                   reference_u = u_init(k) * cos(angleEdge(eoe)) -  v_init(k) * sin(angleEdge(eoe))
                   q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * fEdge(iEdge)
@@ -5996,6 +6063,14 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       !$acc end parallel
 
       MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]')
+      if (perturbation_coriolis) then
+         !$acc exit data delete(u_init, v_init)
+      end if
+      if (trim(config_les_model) /= 'none') then
+         !$acc exit data delete(exner, pressure_b)
+         !$acc exit data copyout(bn2)
+      end if
+      !$acc exit data delete(ustm, hfx, qfx)
       if (rk_step == 1) then
          !$acc exit data copyout(tend_w_euler)
          !$acc exit data copyout(tend_u_euler)
@@ -6007,7 +6082,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          !$acc exit data delete(rb, rr_save)
          !$acc exit data delete(divergence, vorticity)
          !$acc exit data delete(v)
-         !$acc exit data delete(u_init, v_init)
       else
          !$acc exit data delete(tend_w_euler)
          !$acc exit data delete(tend_u_euler)
@@ -6030,9 +6104,18 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       !$acc exit data delete(rw_save, rt_diabatic_tend)
       !$acc exit data copyout(rthdynten)
       !$acc exit data delete(t_init)
+      if (trim(config_les_model) /= 'none') then
+         !$acc exit data delete(ur_cell, vr_cell)
+      else
 #ifdef CURVATURE
-      !$acc exit data delete(ur_cell, vr_cell)
+         !$acc exit data delete(ur_cell, vr_cell)
 #endif
+      end if
+      !$acc exit data delete(eddy_visc_horz)
+      !$acc exit data delete(eddy_visc_vert)
+      !$acc exit data delete(prandtl_3d_inv)
+      !$acc exit data delete(scalars)
+      !$acc exit data copyout(tend_scalars)
       MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]')
 
    end subroutine atm_compute_dyn_tend_work

From 8b874cbce1e7fb8588df1a93a0a003522833b5fe Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Wed, 28 Jan 2026 17:49:51 -0700
Subject: [PATCH 097/146] Update copyright statement at the top of
 mpas_atm_dissipation_models.F

---
 src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index 256aec6019..d97f0aa2c4 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -1,5 +1,4 @@
-! Copyright (c) 2013,  Los Alamos National Security, LLC (LANS)
-! and the University Corporation for Atmospheric Research (UCAR).
+! Copyright (c) 2026, The University Corporation for Atmospheric Research (UCAR).
 !
 ! Unless noted otherwise source code is licensed under the BSD license.
 ! Additional copyright and license information can be found in the LICENSE file

From 4712ea1f10190f151859a35fe2bb2c63ee3ad90f Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Wed, 28 Jan 2026 17:55:01 -0700
Subject: [PATCH 098/146] Fix LaTeX quotes in possible_values for
 config_les_model and config_les_surface

This commit modifies the possible_values attribute for the config_les_model and
config_les_surface namelist options in the atmosphere core's Registry.xml file
to use LaTeX ` and ' quotes.
---
 src/core_atmosphere/Registry.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 68b0952e9b..a697eaf2e5 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -149,12 +149,12 @@
                 <nml_option name="config_les_model" type="character" default_value="none"
                      units="-"
                      description="LES dissipation model"
-                     possible_values="`none', `3d_smagorinsky', 'prognostic_1.5_order'"/>
+                     possible_values="`none', `3d_smagorinsky', `prognostic_1.5_order'"/>
 
                 <nml_option name="config_les_surface" type="character" default_value="none"
                      units="-"
                      description="LES surface flux option"
-                     possible_values="'specified', `varying'"/>
+                     possible_values="`specified', `varying'"/>
 
                 <nml_option name="config_surface_heat_flux" type="real" default_value="0.0"
                      units="K m s-1"

From 29ed0206e345729ab04380b108b634555b4da107 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Tue, 3 Feb 2026 20:19:37 +0000
Subject: [PATCH 099/146] Fix indentation of deformation_coef_* variables in
 atmosphere core Registry.xml

The deformation_coef_* variables now use tabs for indentation rather than spaces
to match other variables in the default stream definitions.
---
 src/core_atmosphere/Registry.xml | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index a697eaf2e5..5ac9420576 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -524,11 +524,11 @@
 			<var name="deriv_two"/>
 			<var name="defc_a"/>
 			<var name="defc_b"/>
-                        <var name="deformation_coef_c2"/>
-                        <var name="deformation_coef_s2"/>
-                        <var name="deformation_coef_cs"/>
-                        <var name="deformation_coef_c"/>
-                        <var name="deformation_coef_s"/>
+			<var name="deformation_coef_c2"/>
+			<var name="deformation_coef_s2"/>
+			<var name="deformation_coef_cs"/>
+			<var name="deformation_coef_c"/>
+			<var name="deformation_coef_s"/>
 			<var name="coeffs_reconstruct"/>
 #ifdef MPAS_CAM_DYCORE
 			<var name="cell_gradient_coef_x"/>
@@ -632,11 +632,11 @@
 			<var name="deriv_two"/>
 			<var name="defc_a"/>
 			<var name="defc_b"/>
-                        <var name="deformation_coef_c2"/>
-                        <var name="deformation_coef_s2"/>
-                        <var name="deformation_coef_cs"/>
-                        <var name="deformation_coef_c"/>
-                        <var name="deformation_coef_s"/>
+			<var name="deformation_coef_c2"/>
+			<var name="deformation_coef_s2"/>
+			<var name="deformation_coef_cs"/>
+			<var name="deformation_coef_c"/>
+			<var name="deformation_coef_s"/>
 			<var name="coeffs_reconstruct"/>
 			<var name="east"/>
 			<var name="north"/>

From 3cb172cf46856afa12205e5d5f69485557705557 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Tue, 3 Feb 2026 20:28:35 +0000
Subject: [PATCH 100/146] Clean up indentation in atm_init_test_coefs routine
 in mpas_atm_advection.F

---
 src/core_init_atmosphere/mpas_atm_advection.F | 274 +++++++++---------
 1 file changed, 137 insertions(+), 137 deletions(-)

diff --git a/src/core_init_atmosphere/mpas_atm_advection.F b/src/core_init_atmosphere/mpas_atm_advection.F
index bff8843fbc..6b4c24658e 100644
--- a/src/core_init_atmosphere/mpas_atm_advection.F
+++ b/src/core_init_atmosphere/mpas_atm_advection.F
@@ -1025,14 +1025,14 @@ end subroutine atm_initialize_deformation_weights
 
 !-----------------------
 
-      subroutine atm_init_test_coefs( deformation_coef_c2, deformation_coef_s2, &
-                                      deformation_coef_cs, deformation_coef_c,  &
-                                      deformation_coef_s,                       &
-                                      is_periodic, on_a_sphere,                 &
-                                      x_period, y_period,                       &
-                                      xEdge, yEdge, zEdge,                      &
-                                      xCell, yCell, zCell, nCells,              &
-                                      angleEdge, nEdgesOnCell, edgesOnCell )
+   subroutine atm_init_test_coefs( deformation_coef_c2, deformation_coef_s2, &
+                                   deformation_coef_cs, deformation_coef_c,  &
+                                   deformation_coef_s,                       &
+                                   is_periodic, on_a_sphere,                 &
+                                   x_period, y_period,                       &
+                                   xEdge, yEdge, zEdge,                      &
+                                   xCell, yCell, zCell, nCells,              &
+                                   angleEdge, nEdgesOnCell, edgesOnCell )
 
       implicit none
 
@@ -1085,149 +1085,149 @@ subroutine atm_init_test_coefs( deformation_coef_c2, deformation_coef_s2, &
 
       if ( (.not. on_a_sphere) .and. (is_periodic) ) then  ! test is for doubly-periodic Cartesian plane only
 
-      dudx_err_max = 0.
-      dudy_err_max = 0.
-      dvdx_err_max = 0.
-      dvdy_err_max = 0.
-      dwdx_err_max = 0.
-      dwdy_err_max = 0.
-
-      dudx_err_tot = 0.
-      dudy_err_tot = 0.
-      dvdx_err_tot = 0.
-      dvdy_err_tot = 0.
-      dwdx_err_tot = 0.
-      dwdy_err_tot = 0.
-
-      dudx_max = 0.
-      dudy_max = 0.
-      dvdx_max = 0.
-      dvdy_max = 0.
-      dwdx_max = 0.
-      dwdy_max = 0.
+         dudx_err_max = 0.
+         dudy_err_max = 0.
+         dvdx_err_max = 0.
+         dvdy_err_max = 0.
+         dwdx_err_max = 0.
+         dwdy_err_max = 0.
+
+         dudx_err_tot = 0.
+         dudy_err_tot = 0.
+         dvdx_err_tot = 0.
+         dvdy_err_tot = 0.
+         dwdx_err_tot = 0.
+         dwdy_err_tot = 0.
+
+         dudx_max = 0.
+         dudy_max = 0.
+         dvdx_max = 0.
+         dvdy_max = 0.
+         dwdx_max = 0.
+         dwdy_max = 0.
+
+         do iCell = 1, nCells
+
+            dudx = 0.
+            dudy = 0.
+            dvdx = 0.
+            dvdy = 0.
+            dwdx = 0.
+            dwdy = 0.
+
+            xc = xCell(iCell)
+            yc = yCell(iCell)
+
+            dudx_c = dudx_cell(xc,yc,x_period,y_period)
+            dudy_c = dudy_cell(xc,yc,x_period,y_period)
+            dvdx_c = dvdx_cell(xc,yc,x_period,y_period)
+            dvdy_c = dvdy_cell(xc,yc,x_period,y_period)
+            dwdx_c = dwdx_cell(xc,yc,x_period,y_period)
+            dwdy_c = dwdy_cell(xc,yc,x_period,y_period)
+
+            do iEdge = 1, nEdgesOnCell(iCell)
+
+               ie = edgesOnCell(iEdge,iCell)
+               angle_e = angleEdge(ie)
+               xe = xEdge(ie)
+               ye = yEdge(ie)
+
+               xe = mpas_fix_periodicity(xe,xc,x_period)
+               ye = mpas_fix_periodicity(ye,yc,y_period)
+
+               ue = u_edge(xe,ye,angle_e,x_period,y_period)
+               ve = v_edge(xe,ye,angle_e,x_period,y_period)
+               we = w_edge(xe,ye,x_period,y_period)
+
+               dudx = dudx + deformation_coef_c2(iEdge,iCell)*ue &
+                           - deformation_coef_cs(iEdge,iCell)*ve
+               dudy = dudy + deformation_coef_cs(iEdge,iCell)*ue &
+                           - deformation_coef_s2(iEdge,iCell)*ve
+               dvdx = dvdx + deformation_coef_cs(iEdge,iCell)*ue &
+                           + deformation_coef_c2(iEdge,iCell)*ve
+               dvdy = dvdy + deformation_coef_s2(iEdge,iCell)*ue &
+                           + deformation_coef_cs(iEdge,iCell)*ve
+
+               dwdx = dwdx + deformation_coef_c(iEdge,iCell)*we
+               dwdy = dwdy + deformation_coef_s(iEdge,iCell)*we
 
-      do iCell = 1, nCells
-
-        dudx = 0.
-        dudy = 0.
-        dvdx = 0.
-        dvdy = 0.
-        dwdx = 0.
-        dwdy = 0.
-
-        xc = xCell(iCell)
-        yc = yCell(iCell)
-
-        dudx_c = dudx_cell(xc,yc,x_period,y_period)
-        dudy_c = dudy_cell(xc,yc,x_period,y_period)
-        dvdx_c = dvdx_cell(xc,yc,x_period,y_period)
-        dvdy_c = dvdy_cell(xc,yc,x_period,y_period)
-        dwdx_c = dwdx_cell(xc,yc,x_period,y_period)
-        dwdy_c = dwdy_cell(xc,yc,x_period,y_period)
-
-        do iEdge = 1, nEdgesOnCell(iCell)
-
-          ie = edgesOnCell(iEdge,iCell)
-          angle_e = angleEdge(ie)
-          xe = xEdge(ie)
-          ye = yEdge(ie)
-
-          xe = mpas_fix_periodicity(xe,xc,x_period)
-          ye = mpas_fix_periodicity(ye,yc,y_period)
-
-          ue = u_edge(xe,ye,angle_e,x_period,y_period)
-          ve = v_edge(xe,ye,angle_e,x_period,y_period)
-          we = w_edge(xe,ye,x_period,y_period)
-
-          dudx = dudx + deformation_coef_c2(iEdge,iCell)*ue &
-                      - deformation_coef_cs(iEdge,iCell)*ve
-          dudy = dudy + deformation_coef_cs(iEdge,iCell)*ue &
-                      - deformation_coef_s2(iEdge,iCell)*ve
-          dvdx = dvdx + deformation_coef_cs(iEdge,iCell)*ue &
-                      + deformation_coef_c2(iEdge,iCell)*ve
-          dvdy = dvdy + deformation_coef_s2(iEdge,iCell)*ue &
-                      + deformation_coef_cs(iEdge,iCell)*ve
-
-          dwdx = dwdx + deformation_coef_c(iEdge,iCell)*we
-          dwdy = dwdy + deformation_coef_s(iEdge,iCell)*we
-
-        end do
+            end do
 
-        ! call mpas_log_write(' u_x, u_y, $r, $r ', realArgs=(/dudx, dudy/))
-        ! call mpas_log_write(' v_x, v_y, $r, $r ', realArgs=(/dvdx, dvdy/))
-        ! call mpas_log_write(' w_x, w_y, $r, $r ', realArgs=(/dwdx, dwdy/))
+            ! call mpas_log_write(' u_x, u_y, $r, $r ', realArgs=(/dudx, dudy/))
+            ! call mpas_log_write(' v_x, v_y, $r, $r ', realArgs=(/dvdx, dvdy/))
+            ! call mpas_log_write(' w_x, w_y, $r, $r ', realArgs=(/dwdx, dwdy/))
 
-        ! check result for cell          
+            ! check result for cell
 
-        e_int = abs(dudx_c - dudx)
-        dudx_err_tot = dudx_err_tot + e_int
-        dudx_err_max = max(dudx_err_max, e_int) 
+            e_int = abs(dudx_c - dudx)
+            dudx_err_tot = dudx_err_tot + e_int
+            dudx_err_max = max(dudx_err_max, e_int)
 
-        e_int = abs(dudy_c - dudy)
-        dudy_err_tot = dudy_err_tot + e_int
-        dudy_err_max = max(dudy_err_max, e_int) 
+            e_int = abs(dudy_c - dudy)
+            dudy_err_tot = dudy_err_tot + e_int
+            dudy_err_max = max(dudy_err_max, e_int)
 
-        e_int = abs(dvdx_c - dvdx)
-        dvdx_err_tot = dvdx_err_tot + e_int
-        dvdx_err_max = max(dvdx_err_max, e_int) 
+            e_int = abs(dvdx_c - dvdx)
+            dvdx_err_tot = dvdx_err_tot + e_int
+            dvdx_err_max = max(dvdx_err_max, e_int)
 
-        e_int = abs(dvdy_c - dvdy)
-        dvdy_err_tot = dvdy_err_tot + e_int
-        dvdy_err_max = max(dvdy_err_max, e_int) 
+            e_int = abs(dvdy_c - dvdy)
+            dvdy_err_tot = dvdy_err_tot + e_int
+            dvdy_err_max = max(dvdy_err_max, e_int)
 
-        e_int = abs(dwdx_c - dwdx)
-        dwdx_err_tot = dwdx_err_tot + e_int
-        dwdx_err_max = max(dwdx_err_max, e_int) 
+            e_int = abs(dwdx_c - dwdx)
+            dwdx_err_tot = dwdx_err_tot + e_int
+            dwdx_err_max = max(dwdx_err_max, e_int)
 
-        e_int = abs(dwdy_c - dwdy)
-        dwdy_err_tot = dwdy_err_tot + e_int
-        dwdy_err_max = max(dwdy_err_max, e_int) 
+            e_int = abs(dwdy_c - dwdy)
+            dwdy_err_tot = dwdy_err_tot + e_int
+            dwdy_err_max = max(dwdy_err_max, e_int)
 
-        dudx_max = max(dudx_max, abs(dudx_c))
-        dudy_max = max(dudy_max, abs(dudy_c))
-        dvdx_max = max(dvdx_max, abs(dvdx_c))
-        dvdy_max = max(dvdy_max, abs(dvdy_c))
-        dwdx_max = max(dwdx_max, abs(dwdx_c))
-        dwdy_max = max(dwdy_max, abs(dwdy_c))
+            dudx_max = max(dudx_max, abs(dudx_c))
+            dudy_max = max(dudy_max, abs(dudy_c))
+            dvdx_max = max(dvdx_max, abs(dvdx_c))
+            dvdy_max = max(dvdy_max, abs(dvdy_c))
+            dwdx_max = max(dwdx_max, abs(dwdx_c))
+            dwdy_max = max(dwdy_max, abs(dwdy_c))
 
-      end do
+         end do
 
-      ! scale errors
-
-      dudx_err_max = dudx_err_max/dudx_max
-      dudy_err_max = dudy_err_max/dudy_max
-      dvdx_err_max = dvdx_err_max/dvdx_max
-      dvdy_err_max = dvdy_err_max/dvdy_max
-      dwdx_err_max = dwdx_err_max/dwdx_max
-      dwdy_err_max = dwdy_err_max/dwdy_max
-
-      dudx_err_tot = dudx_err_tot/dudx_max/real(nCells)
-      dudy_err_tot = dudy_err_tot/dudy_max/real(nCells)
-      dvdx_err_tot = dvdx_err_tot/dvdx_max/real(nCells)
-      dvdy_err_tot = dvdy_err_tot/dvdy_max/real(nCells)
-      dwdx_err_tot = dwdx_err_tot/dwdx_max/real(nCells)
-      dwdy_err_tot = dwdy_err_tot/dwdy_max/real(nCells)
-
-      ! output 
-
-      call mpas_log_write('  ')
-      call mpas_log_write(' deformation coefficients check ')
-      call mpas_log_write(' dudx check, max abs(dudx), max and avg error  $r, $r, $r', &
-                          realArgs=(/dudx_max, dudx_err_max, dudx_err_tot/))
-      call mpas_log_write(' dudy check, max abs(dudy), max and avg error  $r, $r, $r', &
-                          realArgs=(/dudy_max, dudy_err_max, dudy_err_tot/))
-      call mpas_log_write(' dvdx check, max abs(dvdx), max and avg error  $r, $r, $r', &
-                          realArgs=(/dvdx_max, dvdx_err_max, dvdx_err_tot/))
-      call mpas_log_write(' dvdy check, max abs(dvdy), max and avg error  $r, $r, $r', &
-                          realArgs=(/dvdy_max, dvdy_err_max, dvdy_err_tot/))
-      call mpas_log_write(' dwdx check, max abs(dwdx), max and avg error  $r, $r, $r', &
-                          realArgs=(/dwdx_max, dwdx_err_max, dwdx_err_tot/))
-      call mpas_log_write(' dwdy check, max abs(dwdy), max and avg error  $r, $r, $r', &
-                          realArgs=(/dwdy_max, dwdy_err_max, dwdy_err_tot/))
-      call mpas_log_write('  ')
+         ! scale errors
+
+         dudx_err_max = dudx_err_max/dudx_max
+         dudy_err_max = dudy_err_max/dudy_max
+         dvdx_err_max = dvdx_err_max/dvdx_max
+         dvdy_err_max = dvdy_err_max/dvdy_max
+         dwdx_err_max = dwdx_err_max/dwdx_max
+         dwdy_err_max = dwdy_err_max/dwdy_max
+
+         dudx_err_tot = dudx_err_tot/dudx_max/real(nCells)
+         dudy_err_tot = dudy_err_tot/dudy_max/real(nCells)
+         dvdx_err_tot = dvdx_err_tot/dvdx_max/real(nCells)
+         dvdy_err_tot = dvdy_err_tot/dvdy_max/real(nCells)
+         dwdx_err_tot = dwdx_err_tot/dwdx_max/real(nCells)
+         dwdy_err_tot = dwdy_err_tot/dwdy_max/real(nCells)
+
+         ! output
+
+         call mpas_log_write('  ')
+         call mpas_log_write(' deformation coefficients check ')
+         call mpas_log_write(' dudx check, max abs(dudx), max and avg error  $r, $r, $r', &
+                             realArgs=(/dudx_max, dudx_err_max, dudx_err_tot/))
+         call mpas_log_write(' dudy check, max abs(dudy), max and avg error  $r, $r, $r', &
+                             realArgs=(/dudy_max, dudy_err_max, dudy_err_tot/))
+         call mpas_log_write(' dvdx check, max abs(dvdx), max and avg error  $r, $r, $r', &
+                             realArgs=(/dvdx_max, dvdx_err_max, dvdx_err_tot/))
+         call mpas_log_write(' dvdy check, max abs(dvdy), max and avg error  $r, $r, $r', &
+                             realArgs=(/dvdy_max, dvdy_err_max, dvdy_err_tot/))
+         call mpas_log_write(' dwdx check, max abs(dwdx), max and avg error  $r, $r, $r', &
+                             realArgs=(/dwdx_max, dwdx_err_max, dwdx_err_tot/))
+         call mpas_log_write(' dwdy check, max abs(dwdy), max and avg error  $r, $r, $r', &
+                             realArgs=(/dwdy_max, dwdy_err_max, dwdy_err_tot/))
+         call mpas_log_write('  ')
 
       end if
 
-      end subroutine atm_init_test_coefs
+   end subroutine atm_init_test_coefs
 
 end module atm_advection

From d03b67ca332b39acaceef524061ff637d14c9a43 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Tue, 3 Feb 2026 20:51:03 +0000
Subject: [PATCH 101/146] Tidy up module use statements in
 mpas_atm_time_integration.F

---
 src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 3d79ef059c..de8c7b6c9c 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -22,9 +22,8 @@ module atm_time_integration
    use mpas_dmpar
    use mpas_vector_reconstruction
    ! Added only clause to keep xlf90 from getting confused from the overloaded abs intrinsic in mpas_timekeeping
-   use mpas_derived_types, only : MPAS_NOW
-   use mpas_timekeeping, only: MPAS_Time_type, MPAS_TimeInterval_type, mpas_get_clock_time, &
-                               mpas_set_time, mpas_set_timeInterval, mpas_get_time, operator(+), add_t_ti
+   use mpas_derived_types, only : MPAS_Time_type, MPAS_TimeInterval_type, MPAS_NOW
+   use mpas_timekeeping, only: mpas_get_clock_time, mpas_set_time, mpas_set_timeInterval, mpas_get_time, operator(+)
    use mpas_timer
 
 #ifdef DO_PHYSICS

From 27561d05ebd3684960181edca439e29a9e55e2c7 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Tue, 3 Feb 2026 20:53:16 +0000
Subject: [PATCH 102/146] Fix indentation of call to atm_compute_dyn_tend in
 atm_srk3

---
 .../dynamics/mpas_atm_time_integration.F      | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index de8c7b6c9c..1c543c5716 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -1210,16 +1210,16 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             !$acc end parallel
 
 !$OMP PARALLEL DO
-               do thread=1,nThreads
-                  call atm_compute_dyn_tend( tend, tend_physics, state, diag, mesh, diag_physics, &
-                                             block % configs, nVertLevels, rk_step, dynamics_substep, dt, & 
-                                             cellThreadStart(thread), cellThreadEnd(thread), &
-                                             vertexThreadStart(thread), vertexThreadEnd(thread), &
-                                             edgeThreadStart(thread), edgeThreadEnd(thread), &
-                                             cellSolveThreadStart(thread), cellSolveThreadEnd(thread), &
-                                             vertexSolveThreadStart(thread), vertexSolveThreadEnd(thread), &
-                                             edgeSolveThreadStart(thread), edgeSolveThreadEnd(thread))
-               end do
+            do thread=1,nThreads
+               call atm_compute_dyn_tend( tend, tend_physics, state, diag, mesh, diag_physics, &
+                                          block % configs, nVertLevels, rk_step, dynamics_substep, dt, & 
+                                          cellThreadStart(thread), cellThreadEnd(thread), &
+                                          vertexThreadStart(thread), vertexThreadEnd(thread), &
+                                          edgeThreadStart(thread), edgeThreadEnd(thread), &
+                                          cellSolveThreadStart(thread), cellSolveThreadEnd(thread), &
+                                          vertexSolveThreadStart(thread), vertexSolveThreadEnd(thread), &
+                                          edgeSolveThreadStart(thread), edgeSolveThreadEnd(thread))
+            end do
 !$OMP END PARALLEL DO
 
             deallocate(delsq_theta)

From 73d73c6764f014d6e9931a2b6b31a59bf1825715 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Wed, 4 Feb 2026 00:28:38 +0000
Subject: [PATCH 103/146] Fix indentation of calls to calculate_n2 and
 les_models in atm_compute_dyn_tend_work

---
 .../dynamics/mpas_atm_time_integration.F      | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 1c543c5716..a0ceae6422 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5407,25 +5407,25 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
          else if (config_les_model /= "none") then
 
-                 ! call mpas_log_write(' BV call, index qv, qc, tke $i $i $i ', intArgs=(/index_qv, index_qc, index_tke/))
+            ! call mpas_log_write(' BV call, index qv, qc, tke $i $i $i ', intArgs=(/index_qv, index_qc, index_tke/))
 
-                 call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
-                                    cellStart, cellEnd, nCells)
+            call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
+                               cellStart, cellEnd, nCells)
 
-                 currTime = mpas_get_clock_time(clock, MPAS_NOW, ierr)
-                 call mpas_get_time(curr_time=currTime, H=H, M=M, S=S, S_n=S_n, S_d=S_d)
-                 time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*dt
-                 ! call mpas_log_write(' les integration, timestep midpoint time of day in seconds, $r ', realArgs=(/time_of_day_seconds/))
+            currTime = mpas_get_clock_time(clock, MPAS_NOW, ierr)
+            call mpas_get_time(curr_time=currTime, H=H, M=M, S=S, S_n=S_n, S_d=S_d)
+            time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*dt
+            ! call mpas_log_write(' les integration, timestep midpoint time of day in seconds, $r ', realArgs=(/time_of_day_seconds/))
       
-                 call les_models( config_les_model, config_les_surface, dynamics_substep, eddy_visc_horz, eddy_visc_vert,                  &
-                                  u, v, ur_cell, vr_cell,                                            &
-                                  w, c_s, bn2, zgrid, config_len_disp,                               &
-                                  deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
-                                  deformation_coef_c, deformation_coef_s, prandtl_3d_inv,            &
-                                  invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
-                                  scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2,         &
-                                  cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
-                                  nCells, nEdges, nVertLevels, maxEdges, num_scalars                )
+            call les_models( config_les_model, config_les_surface, dynamics_substep, eddy_visc_horz, eddy_visc_vert,                  &
+                             u, v, ur_cell, vr_cell,                                            &
+                             w, c_s, bn2, zgrid, config_len_disp,                               &
+                             deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
+                             deformation_coef_c, deformation_coef_s, prandtl_3d_inv,            &
+                             invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
+                             scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2,         &
+                             cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge,        &
+                             nCells, nEdges, nVertLevels, maxEdges, num_scalars                )
 
          end if
 

From b8094d5eb62420467f7a9097cbee44b56f9eaa7d Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Wed, 4 Feb 2026 00:34:57 +0000
Subject: [PATCH 104/146] Remove commented-out calls to non-existent
 dissipation routines

This commit removes commented-out calls to the u_dissipation, w_dissipation,
theta_dissipation, and theta_dissipation_3d routines in the
atm_compute_dyn_tend_work routine; the implementations of these routines no
longer exist, and they had only been used for testing during the migration to
new dissipation routines for u, w, and scalars. This commit also removes the
local variable test_dissipation_3d, which was used to control whether the
aforementioned routines were called.
---
 .../dynamics/mpas_atm_time_integration.F      | 157 +++++-------------
 1 file changed, 45 insertions(+), 112 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index a0ceae6422..c063760f36 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5238,7 +5238,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND) :: config_visc4_2dsmag
       real (kind=RKIND) :: config_len_disp
       real (kind=RKIND) :: config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2
-      logical, parameter :: test_dissipation_3d=.true.
 
       integer, intent(in) :: rk_step, dynamics_substep
       real (kind=RKIND), intent(in) :: dt
@@ -5628,36 +5627,20 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
 !$OMP BARRIER
 
-!          if(test_dissipation_3d) then
-
-            call  u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
-                                    cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
-                                    cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
-                                    nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign,                          &
-                                    invAreaCell, invAreaTriangle, invDvEdge, invDcEdge,                          &
-                                    angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4,                 &
-                                    config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
-                                    config_del4u_div_factor, zgrid,                                              &
-                                    eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
-                                    fzm, fzp, config_les_model, config_les_surface,                              &
-                                    config_surface_drag_coefficient,                                             &
-                                    delsq_u, delsq_vorticity, delsq_divergence,                                  &
-                                    u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, tend_u_euler )
-
-!          else  ! this is the original MPAS dissipation code
-!
-!            call  u_dissipation( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
-!                                 cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
-!                                 cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
-!                                 nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign,                          &
-!                                 invAreaCell, invAreaTriangle, invDvEdge, invDcEdge,                          &
-!                                 angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4,                 &
-!                                 config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
-!                                 config_del4u_div_factor, zgrid, kdiff,                                       &
-!                                 delsq_u, delsq_vorticity, delsq_divergence,                                  &
-!                                 u, divergence, vorticity, rho_edge, u_init, v_init, tend_u_euler            )
-!
-!          end if
+         call  u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd,    &
+                                 cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree,                 &
+                                 cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex,                     &
+                                 nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign,                          &
+                                 invAreaCell, invAreaTriangle, invDvEdge, invDcEdge,                          &
+                                 angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4,                 &
+                                 config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
+                                 config_del4u_div_factor, zgrid,                                              &
+                                 eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
+                                 fzm, fzp, config_les_model, config_les_surface,                              &
+                                 config_surface_drag_coefficient,                                             &
+                                 delsq_u, delsq_vorticity, delsq_divergence,                                  &
+                                 u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, tend_u_euler )
+
 
       end if ! (rk_step 1 test for computing mixing terms)
 
@@ -5785,35 +5768,18 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
       if (rk_step == 1) then
 
-!          if(test_dissipation_3d) then
-
-            call  w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                    nCells, nEdges,                                           &
-                                    nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                    invAreaCell, invDcEdge, dvEdge,                           &
-                                    meshScalingDel2, meshScalingDel4,                         &
-                                    rdzw, rdzu,                                               &
-                                    v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
-                                    delsq_w,                                                  &
-                                    w, rho_edge, rho_zz, divergence, zz,                      &
-                                    eddy_visc_horz, eddy_visc_vert,                           &
-                                    config_les_model, config_les_surface,                     &
-                                    tend_w_euler                                             )
-
-!          else
-!
-!            call  w_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-!                                 nCells, nEdges,                                           &
-!                                 nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-!                                 invAreaCell, invDcEdge, dvEdge,                           &
-!                                 meshScalingDel2, meshScalingDel4,                         &
-!                                 rdzw, rdzu,                                               &
-!                                 v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
-!                                 delsq_w,                                                  &
-!                                 w, rho_edge, kdiff, rho_zz,                               &
-!                                 tend_w_euler                                             )
-!
-!          end if
+         call  w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                 nCells, nEdges,                                           &
+                                 nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                 invAreaCell, invDcEdge, dvEdge,                           &
+                                 meshScalingDel2, meshScalingDel4,                         &
+                                 rdzw, rdzu,                                               &
+                                 v_mom_eddy_visc2, h_mom_eddy_visc4,                       &
+                                 delsq_w,                                                  &
+                                 w, rho_edge, rho_zz, divergence, zz,                      &
+                                 eddy_visc_horz, eddy_visc_vert,                           &
+                                 config_les_model, config_les_surface,                     &
+                                 tend_w_euler                                             )
 
       end if ! mixing for w computed in first rk_step
 
@@ -5958,58 +5924,25 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
       if (rk_step == 1) then
 
-!          if(test_dissipation_3d) then
-
-            !call theta_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-            !                           nCells, nEdges,                                           &
-            !                           nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-            !                           invAreaCell, invDcEdge, dvEdge,                           &
-            !                           meshScalingDel2, meshScalingDel4,                         &
-            !                           config_mix_full, t_init, zgrid,                           &
-            !                           rdzw, rdzu, fzm, fzp,                                     &
-            !                           v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
-            !                           delsq_theta,                                              &
-            !                           theta_m, rho_edge, rho_zz, zz,                            &
-            !                           eddy_visc_horz, eddy_visc_vert,                           &
-            !                           config_les_model,                                         &
-            !                           tend_theta_euler                                         )
-
-            call scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-                                            nCells, nEdges,                                           &
-                                            nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-                                            invAreaCell, invDcEdge, dvEdge,                           &
-                                            meshScalingDel2, meshScalingDel4,                         &
-                                            config_mix_full, t_init, zgrid,                           &
-                                            rdzw, rdzu, fzm, fzp,                                     &
-                                            v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
-                                            prandtl_3d_inv,                                           &
-                                            delsq_theta,                                              &
-                                            theta_m, rho_edge, rho_zz, zz,                            &
-                                            eddy_visc_horz, eddy_visc_vert,                           &
-                                            bn2, config_len_disp, scalars, tend_scalars,              &
-                                            index_tke, index_qv, num_scalars, config_mix_scalars,     &
-                                            config_les_model, config_les_surface, time_of_day_seconds,&
-                                            config_surface_heat_flux, config_surface_moisture_flux,   &
-                                            ur_cell, vr_cell,                                         &
-                                            hfx, qfx,                                                 &
-                                            tend_theta_euler, dynamics_substep                       )
-
-
-!          else  ! this is the original MPAS dissipation code
-!
-!            call theta_dissipation( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
-!                                    nCells, nEdges,                                           &
-!                                    nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
-!                                    invAreaCell, invDcEdge, dvEdge,                           &
-!                                    meshScalingDel2, meshScalingDel4,                         &
-!                                    config_mix_full, t_init, zgrid,                           &
-!                                    rdzw, rdzu,                                               &
-!                                    v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
-!                                    delsq_theta,                                              &
-!                                    theta_m, rho_edge, kdiff, rho_zz,                         &
-!                                    tend_theta_euler                                         )
-!
-!          end if
+         call scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSolveEnd,         &
+                                         nCells, nEdges,                                           &
+                                         nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+                                         invAreaCell, invDcEdge, dvEdge,                           &
+                                         meshScalingDel2, meshScalingDel4,                         &
+                                         config_mix_full, t_init, zgrid,                           &
+                                         rdzw, rdzu, fzm, fzp,                                     &
+                                         v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv,      &
+                                         prandtl_3d_inv,                                           &
+                                         delsq_theta,                                              &
+                                         theta_m, rho_edge, rho_zz, zz,                            &
+                                         eddy_visc_horz, eddy_visc_vert,                           &
+                                         bn2, config_len_disp, scalars, tend_scalars,              &
+                                         index_tke, index_qv, num_scalars, config_mix_scalars,     &
+                                         config_les_model, config_les_surface, time_of_day_seconds,&
+                                         config_surface_heat_flux, config_surface_moisture_flux,   &
+                                         ur_cell, vr_cell,                                         &
+                                         hfx, qfx,                                                 &
+                                         tend_theta_euler, dynamics_substep                       )
 
       end if ! theta mixing calculated first rk_step
 

From c13e7f5517cb64f46e4b5ea2b5d3adbb47ed4152 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Wed, 4 Feb 2026 00:39:23 +0000
Subject: [PATCH 105/146] Set the Prandtl number back to 1.0 in mpas_constants

The value of the Prandtl number had been changed to 1/3 during development
of the initial LES capability for MPAS-Atmosphere.
---
 src/framework/mpas_constants.F | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/framework/mpas_constants.F b/src/framework/mpas_constants.F
index 5822f9c0e6..2c8168510a 100644
--- a/src/framework/mpas_constants.F
+++ b/src/framework/mpas_constants.F
@@ -53,7 +53,7 @@ module mpas_constants
    real (kind=RKIND), parameter :: cvpm    = -cv / cp                 !
 #endif
    real (kind=RKIND), parameter :: p0 = 1.0e5_RKIND                   !< Constant: 100000 Pa
-   real (kind=RKIND), parameter :: prandtl = 1.0_RKIND/3.0_RKIND      !< Constant: Prandtl number
+   real (kind=RKIND), parameter :: prandtl = 1.0_RKIND                !< Constant: Prandtl number
 
 
    contains

From 50013056497b2f51111313e493ecb9d827406d55 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Wed, 4 Feb 2026 01:01:57 +0000
Subject: [PATCH 106/146] Remove unnecessary variables from the atmosphere
 core's restart stream

This commit removes the following variables from the definition of the restart
stream in the atmosphere core:

  deriv_two
  defc_a
  defc_b
  deformation_coef_c2
  deformation_coef_s2
  deformation_coef_cs
  deformation_coef_c
  deformation_coef_s
  coeffs_reconstruct
  east
  north

These variables are either already indirectly included in the restart stream
through the invariant stream or they are not needed in restart files.
---
 src/core_atmosphere/Registry.xml | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 5ac9420576..fa243580ca 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -629,17 +629,6 @@
 			<var name="cell_gradient_coef_x"/>
 			<var name="cell_gradient_coef_y"/>
 #endif
-			<var name="deriv_two"/>
-			<var name="defc_a"/>
-			<var name="defc_b"/>
-			<var name="deformation_coef_c2"/>
-			<var name="deformation_coef_s2"/>
-			<var name="deformation_coef_cs"/>
-			<var name="deformation_coef_c"/>
-			<var name="deformation_coef_s"/>
-			<var name="coeffs_reconstruct"/>
-			<var name="east"/>
-			<var name="north"/>
 			<var_array name="scalars"/>
 			<var name="initial_time"/>
 			<var name="xtime"/>

From bfba0ca9bff5212525265f45ea7777840a2be137 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Wed, 4 Feb 2026 01:34:38 +0000
Subject: [PATCH 107/146] Move computation of time_of_day_seconds into
 flux_les_sas routine

The logic to compute time_of_day_seconds was previously in the
atm_compute_dyn_tend_work routine, though the time_of_day_seconds variable was
only used in the commented-out routine flux_les_sas in the
mpas_atm_dissipation_models module.

In an attempt to keep the atm_compute_dyn_tend_work routine cleaner, this commit
pushes the computation of time_of_day_seconds down to the flux_les_sas routine
where it is actually used. In order to do this, the simulation clock and
timestep, dt, are now passed as arguments to scalar_dissipation_3d_les and
thereafter into flux_les_sas.
---
 .../dynamics/mpas_atm_dissipation_models.F    | 25 ++++++++++++++-----
 .../dynamics/mpas_atm_time_integration.F      | 14 ++---------
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index d97f0aa2c4..c524bb5332 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -14,7 +14,8 @@ module mpas_atm_dissipation_models
    use mpas_atmphys_constants
    use mpas_constants
    use mpas_log
-   use mpas_derived_types, only : MPAS_LOG_CRIT
+   use mpas_timekeeping, only : mpas_get_clock_time, mpas_get_time
+   use mpas_derived_types, only : MPAS_Clock_type, MPAS_Time_type, MPAS_NOW, MPAS_LOG_CRIT
 
    logical, parameter :: les_test = .true., les_sas_test = .false.
    !! real (kind=RKIND), parameter :: tke_heat_flux = 0.03  ! shear case from Moeng et al., first hour
@@ -1145,7 +1146,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                          eddy_visc_horz, eddy_visc_vert,                           &
                                          bv_freq2, config_len_disp, scalars, tend_scalars,         &
                                          index_tke, index_qv, num_scalars_dummy, mix_scalars,      &
-                                         config_les_model, config_les_surface, time_of_day_seconds,&
+                                         config_les_model, config_les_surface, clock, dt,          &
                                          config_surface_heat_flux, config_surface_moisture_flux,   &
                                          uReconstructZonal, uReconstructMeridional,                &
                                          hfx, qfx,                                                 &
@@ -1166,7 +1167,6 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       integer, intent(in) :: index_tke, index_qv
       integer, intent(in) :: dynamics_substep
 
-      real (kind=RKIND), intent(in) :: time_of_day_seconds
       real (kind=RKIND), intent(in) :: config_surface_heat_flux
       real (kind=RKIND), intent(in) :: config_surface_moisture_flux
 
@@ -1175,6 +1175,9 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       character (len=StrKIND) :: config_les_model
       character (len=StrKIND) :: config_les_surface
 
+      type (MPAS_Clock_type), intent(in) :: clock
+      real (kind=RKIND), intent(in) :: dt
+
       integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
       integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
 
@@ -1526,7 +1529,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                   heat_flux = config_surface_heat_flux
 
 !                 place holder routine for time-varying specified
-!                 call flux_les_sas( heat_flux, moisture_flux, time_of_day_seconds )
+!                 call flux_les_sas( heat_flux, moisture_flux, clock, dt )
 
                else if ( les_surface_opt == LES_SURFACE_VARYING ) then
                   heat_flux = hfx(iCell)/rho_zz(1,iCell)/cp
@@ -1606,18 +1609,28 @@ end subroutine scalar_dissipation_3d_les
 
 !-----------
 
-!  subroutine flux_les_sas(heat_flux, moisture_flux, time_of_day_seconds)
+!  subroutine flux_les_sas(heat_flux, moisture_flux, clock, dt)
 
 !     implicit none
 
-!     real (kind=RKIND), intent(in) :: time_of_day_seconds
 !     real (kind=RKIND), intent(out) :: heat_flux, moisture_flux
+!     type (MPAS_Clock_type), intent(in) :: clock
+!     real (kind=RKIND), intent(in) :: dt
 
 !     real (kind=RKIND), parameter:: t_start_t_flux = 3600.*6.0
 !     real (kind=RKIND), parameter:: t_end_t_flux = 3600.*19.50
 !     real (kind=RKIND), parameter:: t_start_q_flux = 3600.*7.0
 !     real (kind=RKIND), parameter:: t_end_q_flux = 3600.*19.50
 !     real (kind=RKIND) :: rel_time_t_flux, rel_time_q_flux
+!     real (kind=RKIND) :: time_of_day_seconds
+!     type (MPAS_Time_type) :: currTime
+!     integer :: H, M, S, S_n, S_d
+!     integer :: ierr
+
+!     currTime = mpas_get_clock_time(clock, MPAS_NOW, ierr)
+!     call mpas_get_time(curr_time=currTime, H=H, M=M, S=S, S_n=S_n, S_d=S_d)
+!     time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*dt
+!     call mpas_log_write(' les integration, timestep midpoint time of day in seconds, $r ', realArgs=(/time_of_day_seconds/))
 
 !     rel_time_t_flux = max(0.,(time_of_day_seconds - t_start_t_flux)/(t_end_t_flux - t_start_t_flux))
 !     rel_time_q_flux = max(0.,(time_of_day_seconds - t_start_q_flux)/(t_end_q_flux - t_start_q_flux))
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index c063760f36..25179e987c 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -23,7 +23,7 @@ module atm_time_integration
    use mpas_vector_reconstruction
    ! Added only clause to keep xlf90 from getting confused from the overloaded abs intrinsic in mpas_timekeeping
    use mpas_derived_types, only : MPAS_Time_type, MPAS_TimeInterval_type, MPAS_NOW
-   use mpas_timekeeping, only: mpas_get_clock_time, mpas_set_time, mpas_set_timeInterval, mpas_get_time, operator(+)
+   use mpas_timekeeping, only: mpas_set_time, mpas_set_timeInterval, mpas_get_time, operator(+)
    use mpas_timer
 
 #ifdef DO_PHYSICS
@@ -5287,11 +5287,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       logical, parameter :: perturbation_coriolis = .true.
       real (kind=RKIND) :: reference_u
 
-      type (MPAS_Time_Type) :: currTime
-      integer :: H, M, S, S_n, S_d
-      integer :: ierr
-      real(kind=RKIND) :: time_of_day_seconds
-
       flux4(q_im2, q_im1, q_i, q_ip1, ua) =                     &
                 ua*( 7.*(q_i + q_im1) - (q_ip1 + q_im2) )/12.0
 
@@ -5410,11 +5405,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
             call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
                                cellStart, cellEnd, nCells)
-
-            currTime = mpas_get_clock_time(clock, MPAS_NOW, ierr)
-            call mpas_get_time(curr_time=currTime, H=H, M=M, S=S, S_n=S_n, S_d=S_d)
-            time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*dt
-            ! call mpas_log_write(' les integration, timestep midpoint time of day in seconds, $r ', realArgs=(/time_of_day_seconds/))
       
             call les_models( config_les_model, config_les_surface, dynamics_substep, eddy_visc_horz, eddy_visc_vert,                  &
                              u, v, ur_cell, vr_cell,                                            &
@@ -5938,7 +5928,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                          eddy_visc_horz, eddy_visc_vert,                           &
                                          bn2, config_len_disp, scalars, tend_scalars,              &
                                          index_tke, index_qv, num_scalars, config_mix_scalars,     &
-                                         config_les_model, config_les_surface, time_of_day_seconds,&
+                                         config_les_model, config_les_surface, clock, dt,          &
                                          config_surface_heat_flux, config_surface_moisture_flux,   &
                                          ur_cell, vr_cell,                                         &
                                          hfx, qfx,                                                 &

From 676bb7ff07bf64018e7e2765b2331c16a3c07b5b Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Wed, 4 Feb 2026 16:48:49 -0700
Subject: [PATCH 108/146] Fix implementation of "CAM-MPAS" 2nd-order horizontal
 filter

The implementation of the "CAM-MPAS" 2nd-order horizontal filter contained code
to place a lower-bound on the 'kdiff' variable, which is no longer used, and it
also included out-dated code to apply the filter over only the top three layers
in the model.

This commit updates the filter code (active when config_mpas_cam_coef > 0.0) so
that it correctly enforces a lower-bound on 'eddy_visc_horz' over a variable
number of layers below the model top.

As part of the changes in this commit, the variable 'kdiff' can be removed
entirely from the atmosphere core, as it has been supplanted by the
'eddy_visc_horz' variable.
---
 src/core_atmosphere/Registry.xml                  |  3 ---
 .../dynamics/mpas_atm_time_integration.F          | 15 ++++-----------
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index fa243580ca..a0a94ad03c 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -1973,9 +1973,6 @@
                 <var name="rho_p_save" type="real" dimensions="nVertLevels nCells Time" units="kg m^{-3}"
                      description="predicted value rho_p, saved before acoustic steps"/>
 
-                <var name="kdiff" type="real" dimensions="nVertLevels nCells Time" units="m^2 s^{-1}"
-                     description="Smagorinsky horizontal eddy viscosity"/>
-
                 <var name="eddy_visc_vert" type="real" dimensions="nVertLevels nCells Time" units="m^2 s^{-1}"
                      description="vertical eddy viscosity for les models"/>
 
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 25179e987c..4307dca8f0 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4820,7 +4820,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
                                                     divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
                                                     rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
-                                                    h_divergence, kdiff, bn2, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save
+                                                    h_divergence, bn2, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save
 
       real (kind=RKIND), dimension(:,:), pointer :: eddy_visc_horz, eddy_visc_vert
 
@@ -4929,7 +4929,6 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       call mpas_pool_get_array(diag, 'rho_p', rr)
       call mpas_pool_get_array(diag, 'rho_p_save', rr_save)
       call mpas_pool_get_array(diag, 'v', v)
-      call mpas_pool_get_array(diag, 'kdiff', kdiff)
       call mpas_pool_get_array(diag, 'eddy_visc_horz', eddy_visc_horz)
       call mpas_pool_get_array(diag, 'eddy_visc_vert', eddy_visc_vert)
       call mpas_pool_get_array(diag, 'bn2', bn2)
@@ -5060,7 +5059,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
          weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
          divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
          rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
-         h_divergence, kdiff, bn2, eddy_visc_horz, eddy_visc_vert, index_tke, &
+         h_divergence, bn2, eddy_visc_horz, eddy_visc_vert, index_tke, &
          edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
          theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
          cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, &
@@ -5095,7 +5094,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
       divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
       rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & 
-      h_divergence, kdiff, bn2, eddy_visc_horz, eddy_visc_vert, index_tke, &
+      h_divergence, bn2, eddy_visc_horz, eddy_visc_vert, index_tke, &
       edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
       theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
       cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, &
@@ -5164,7 +5163,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: zxu
       real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: cqu
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: h_divergence
-      real (kind=RKIND), dimension(nVertLevels,nCells+1) :: kdiff
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_horz
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_vert
       real (kind=RKIND), dimension(nVertLevels,nCells+1) :: bn2
@@ -5309,7 +5307,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          !$acc enter data create(tend_theta_euler)
          !$acc enter data create(tend_rho)
 
-         !$acc enter data create(kdiff)
          !$acc enter data copyin(tend_rho_physics)
          !$acc enter data copyin(rb, rr_save)
          !$acc enter data copyin(divergence, vorticity)
@@ -5431,11 +5428,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                do k = nVertLevels-config_number_cam_damping_levels + 1, nVertLevels
                   visc2cam = 4.0*2.0833*config_len_disp*config_mpas_cam_coef
                   visc2cam = visc2cam*(1.0-real(nVertLevels-k)/real(config_number_cam_damping_levels))
-                  kdiff(k  ,iCell) = max(kdiff(k  ,iCell),visc2cam)
+                  eddy_visc_horz(k,iCell) = max(eddy_visc_horz(k,iCell),visc2cam)
                end do
-               eddy_visc_horz(nVertLevels-2,iCell) = max(eddy_visc_horz(nVertLevels-2,iCell),    2.0833*config_len_disp*config_mpas_cam_coef)
-               eddy_visc_horz(nVertLevels-1,iCell) = max(eddy_visc_horz(nVertLevels-1,iCell),2.0*2.0833*config_len_disp*config_mpas_cam_coef)
-               eddy_visc_horz(nVertLevels  ,iCell) = max(eddy_visc_horz(nVertLevels  ,iCell),4.0*2.0833*config_len_disp*config_mpas_cam_coef)
             end do
             !$acc end parallel
 
@@ -5999,7 +5993,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          !$acc exit data copyout(tend_theta_euler)
          !$acc exit data copyout(tend_rho)
 
-         !$acc exit data delete(kdiff)
          !$acc exit data delete(tend_rho_physics)
          !$acc exit data delete(rb, rr_save)
          !$acc exit data delete(divergence, vorticity)

From 60e765b03efe0a0bc7b31e0c06e666dc70cdefa0 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Fri, 13 Feb 2026 11:15:43 -0700
Subject: [PATCH 109/146] fixup: tidy up whitespace and comments in
 mpas_atm_advection.F

---
 src/core_init_atmosphere/mpas_atm_advection.F | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/core_init_atmosphere/mpas_atm_advection.F b/src/core_init_atmosphere/mpas_atm_advection.F
index 6b4c24658e..4852b17117 100644
--- a/src/core_init_atmosphere/mpas_atm_advection.F
+++ b/src/core_init_atmosphere/mpas_atm_advection.F
@@ -778,7 +778,6 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
       real (kind=RKIND) :: pii
       real (kind=RKIND), dimension(25) :: xp, yp
       real (kind=RKIND) :: xe, ye
-
       
       real (kind=RKIND) :: length_scale
       integer, dimension(25) :: cell_list
@@ -790,6 +789,7 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
       logical, pointer :: is_periodic
       real(kind=RKIND), pointer :: x_period, y_period
 
+
       call mpas_pool_get_config(mesh, 'is_periodic', is_periodic)
       call mpas_pool_get_config(mesh, 'x_period', x_period)
       call mpas_pool_get_config(mesh, 'y_period', y_period)
@@ -1023,7 +1023,6 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
 
    end subroutine atm_initialize_deformation_weights
 
-!-----------------------
 
    subroutine atm_init_test_coefs( deformation_coef_c2, deformation_coef_s2, &
                                    deformation_coef_cs, deformation_coef_c,  &

From 8bc6582f3031c305b24444849e733b7da5700cd1 Mon Sep 17 00:00:00 2001
From: Michael Duda <duda@ucar.edu>
Date: Fri, 13 Feb 2026 12:21:31 -0700
Subject: [PATCH 110/146] fixup: eliminate duplicated code to convert namelist
 options to integer parameters for LES

This commit also converts namelist string options to integers earlier in the
call tree, in atm_compute_dyn_tend.
---
 .../dynamics/mpas_atm_dissipation_models.F    | 201 ++++++++----------
 .../dynamics/mpas_atm_time_integration.F      |  39 ++--
 2 files changed, 113 insertions(+), 127 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
index c524bb5332..450ed045c1 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -28,6 +28,7 @@ module mpas_atm_dissipation_models
    ! real (kind=RKIND), parameter :: c_k = 0.1
    real (kind=RKIND), parameter :: c_k = 0.25
 
+   integer, parameter :: LES_INVALID_OPT = -1
 
    integer, parameter :: LES_MODEL_NONE = 0, &
                          LES_MODEL_3D_SMAGORINSKY = 1, &
@@ -40,6 +41,82 @@ module mpas_atm_dissipation_models
 contains
 
 
+   !-----------------------------------------------------------------------
+   !  routine les_model_from_string
+   !
+   !> \brief Converts an LES model option from a string to an integer parameter
+   !> \author Michael Duda
+   !> \date   13 February 2026
+   !> \details
+   !>  Given a string that contains the name of a valid LES model option, this
+   !>  routine returns an integer parameter corresponding to that option.
+   !>
+   !>  If the given string is not recognized as a valid LES model option, the
+   !>  integer parameter LES_INVALID_OPT is returned.
+   !
+   !-----------------------------------------------------------------------
+   pure function les_model_from_string(les_model_str) result(les_model_opt)
+
+      implicit none
+
+      ! Arguments
+      character(len=*), intent(in) :: les_model_str
+
+      ! Return value
+      integer :: les_model_opt
+
+
+      if (trim(les_model_str) == 'none') then
+         les_model_opt = LES_MODEL_NONE
+      else if (trim(les_model_str) == '3d_smagorinsky') then
+         les_model_opt = LES_MODEL_3D_SMAGORINSKY
+      else if (trim(les_model_str) == 'prognostic_1.5_order') then
+         les_model_opt = LES_MODEL_PROGNOSTIC_15_ORDER
+      else
+         les_model_opt = LES_INVALID_OPT
+      end if
+
+   end function les_model_from_string
+
+
+   !-----------------------------------------------------------------------
+   !  routine les_surface_from_string
+   !
+   !> \brief Converts an LES surface option from a string to an integer parameter
+   !> \author Michael Duda
+   !> \date   13 February 2026
+   !> \details
+   !>  Given a string that contains the name of a valid LES surface option, this
+   !>  routine returns an integer parameter corresponding to that option.
+   !>
+   !>  If the given string is not recognized as a valid LES surface option, the
+   !>  integer parameter LES_INVALID_OPT is returned.
+   !
+   !-----------------------------------------------------------------------
+   pure function les_surface_from_string(les_surface_str) result(les_surface_opt)
+
+      implicit none
+
+      ! Arguments
+      character(len=*), intent(in) :: les_surface_str
+
+      ! Return value
+      integer :: les_surface_opt
+
+
+      if (trim(les_surface_str) == 'none') then
+         les_surface_opt = LES_SURFACE_NONE
+      else if (trim(les_surface_str) == 'specified') then
+         les_surface_opt = LES_SURFACE_SPECIFIED
+      else if (trim(les_surface_str) == 'varying') then
+         les_surface_opt = LES_SURFACE_VARYING
+      else
+         les_surface_opt = LES_INVALID_OPT
+      end if
+
+   end function les_surface_from_string
+
+
    subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp,                                 &
                               deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
                               invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4,  &
@@ -129,7 +206,7 @@ end subroutine smagorinsky_2d
 
 !---------------------------------------
 
-   subroutine les_models( config_les_model, config_les_surface, dynamics_substep, eddy_visc_horz, eddy_visc_vert,                  &
+   subroutine les_models( les_model_opt, les_surface_opt, dynamics_substep, eddy_visc_horz, eddy_visc_vert, &
                           u, v, uCell, vCell,                                                &
                           w, c_s, bv_freq2, zgrid, config_len_disp,                          &
                           deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
@@ -141,8 +218,8 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
 
       implicit none
 
-      character (len=StrKIND), intent(in) :: config_les_model
-      character (len=StrKIND), intent(in) :: config_les_surface
+      integer, intent(in) :: les_model_opt
+      integer, intent(in) :: les_surface_opt
 
       integer, intent(in) :: cellStart, cellEnd, nCells, nEdges, nVertLevels, maxEdges, index_tke, num_scalars
       integer, intent(in) :: dynamics_substep
@@ -187,32 +264,6 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
       logical, parameter :: test_tke=.true.
       ! real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
 
-      integer :: les_model_opt, les_surface_opt
-
-
-      DEBUG_WRITE(' begin les_models ')
-      DEBUG_WRITE(' les scheme is '//trim(config_les_model))
-      DEBUG_WRITE(' les surface scheme is '//trim(config_les_surface))
-
-      if (trim(config_les_model) == 'none') then
-         les_model_opt = LES_MODEL_NONE
-      else if (trim(config_les_model) == '3d_smagorinsky') then
-         les_model_opt = LES_MODEL_3D_SMAGORINSKY
-      else if (trim(config_les_model) == 'prognostic_1.5_order') then
-         les_model_opt = LES_MODEL_PROGNOSTIC_15_ORDER
-      else
-         ! Error
-      end if
-
-      if (trim(config_les_surface) == 'none') then
-         les_surface_opt = LES_SURFACE_NONE
-      else if (trim(config_les_surface) == 'specified') then
-         les_surface_opt = LES_SURFACE_SPECIFIED
-      else if (trim(config_les_surface) == 'varying') then
-         les_surface_opt = LES_SURFACE_VARYING
-      else
-         ! Error
-      end if
 
       !$acc enter data create(dudx, dudy, dvdx, dvdy, dwdx, dwdy, dudz, dvdz, dwdz)
       !$acc enter data create(d_11, d_22, d_33, d_12, d_13, d_23)
@@ -394,7 +445,7 @@ subroutine les_models( config_les_model, config_les_surface, dynamics_substep, e
 
 !MGD            call mpas_log_write(' in les_models, no les scheme for '//trim(config_les_model), messageType=MPAS_LOG_CRIT)
 
-         end if ! end of config_les_model test
+         end if ! end of les_model_opt test
 
       end do ! loop over all owned cells (columns)
 
@@ -515,7 +566,7 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
                                 config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
                                 config_del4u_div_factor, zgrid,                                              &
                                 eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
-                                fzm, fzp, config_les_model, config_les_surface,                              &
+                                fzm, fzp, les_model_opt, les_surface_opt,                                    &
                                 config_surface_drag_coefficient,                                             &
                                 delsq_u, delsq_vorticity, delsq_divergence,                                  &
                                 u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm,         &
@@ -531,8 +582,8 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       integer, intent(in) :: nCells, nEdges, nVertices
       logical, intent(in) :: config_mix_full
 
-      character (len=StrKIND) :: config_les_model
-      character (len=StrKIND) :: config_les_surface
+      integer, intent(in) :: les_model_opt
+      integer, intent(in) :: les_surface_opt
 
       integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
       integer, dimension(2,nEdges+1), intent(in) :: verticesOnEdge
@@ -598,35 +649,11 @@ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, v
       real (kind=RKIND) :: velocity_magnitude
       real (kind=RKIND) :: tau_12_factor
 
-      integer :: les_model_opt, les_surface_opt
-
 
       DEBUG_WRITE(' begin u_dissipation_3d ')
-      DEBUG_WRITE(' les model is '//trim(config_les_model))
-      DEBUG_WRITE(' les surface is '//trim(config_les_surface))
       DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_mom_eddy_visc4/))
       DEBUG_WRITE(' 4th order divergence factor is $r ' COMMA realArgs=(/config_del4u_div_factor/))
 
-      if (trim(config_les_model) == 'none') then
-         les_model_opt = LES_MODEL_NONE
-      else if (trim(config_les_model) == '3d_smagorinsky') then
-         les_model_opt = LES_MODEL_3D_SMAGORINSKY
-      else if (trim(config_les_model) == 'prognostic_1.5_order') then
-         les_model_opt = LES_MODEL_PROGNOSTIC_15_ORDER
-      else
-         ! Error
-      end if
-
-      if (trim(config_les_surface) == 'none') then
-         les_surface_opt = LES_SURFACE_NONE
-      else if (trim(config_les_surface) == 'specified') then
-         les_surface_opt = LES_SURFACE_SPECIFIED
-      else if (trim(config_les_surface) == 'varying') then
-         les_surface_opt = LES_SURFACE_VARYING
-      else
-         ! Error
-      end if
-
 !$OMP BARRIER
 
       ! del^4 horizontal filter.  We compute this as del^2 ( del^2 (u) ).
@@ -912,7 +939,7 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
                                 delsq_w,                                                  &
                                 w, rho_edge, rho_zz, divergence, zz,                      &
                                 eddy_visc_horz, eddy_visc_vert,                           &
-                                config_les_model, config_les_surface,                     &
+                                les_model_opt, les_surface_opt,                           &
                                 tend_w_euler                                             )
 
 
@@ -932,8 +959,8 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
       integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
 
-      character (len=StrKIND) :: config_les_model
-      character (len=StrKIND) :: config_les_surface
+      integer, intent(in) :: les_model_opt
+      integer, intent(in) :: les_surface_opt
 
       real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
       real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
@@ -966,8 +993,6 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
       integer :: cell1, cell2, iEdge, iCell, i, k
       real (kind=RKIND) :: r_areaCell, edge_sign, w_turb_flux
 
-      integer :: les_model_opt, les_surface_opt
-
 
 !  !OMP BARRIER  why is this openmp barrier here???
 
@@ -978,30 +1003,8 @@ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd,
 
 
       DEBUG_WRITE(' begin w_dissipation_3d ')
-      DEBUG_WRITE(' les model is '//trim(config_les_model))
-      DEBUG_WRITE(' les surface is '//trim(config_les_surface))
       DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_mom_eddy_visc4/))
 
-      if (trim(config_les_model) == 'none') then
-         les_model_opt = LES_MODEL_NONE
-      else if (trim(config_les_model) == '3d_smagorinsky') then
-         les_model_opt = LES_MODEL_3D_SMAGORINSKY
-      else if (trim(config_les_model) == 'prognostic_1.5_order') then
-         les_model_opt = LES_MODEL_PROGNOSTIC_15_ORDER
-      else
-         ! Error
-      end if
-
-      if (trim(config_les_surface) == 'none') then
-         les_surface_opt = LES_SURFACE_NONE
-      else if (trim(config_les_surface) == 'specified') then
-         les_surface_opt = LES_SURFACE_SPECIFIED
-      else if (trim(config_les_surface) == 'varying') then
-         les_surface_opt = LES_SURFACE_VARYING
-      else
-         ! Error
-      end if
-
       !$acc enter data create(turb_vflux)
 
       !$acc parallel default(present)
@@ -1146,7 +1149,7 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
                                          eddy_visc_horz, eddy_visc_vert,                           &
                                          bv_freq2, config_len_disp, scalars, tend_scalars,         &
                                          index_tke, index_qv, num_scalars_dummy, mix_scalars,      &
-                                         config_les_model, config_les_surface, clock, dt,          &
+                                         les_model_opt, les_surface_opt, clock, dt,                &
                                          config_surface_heat_flux, config_surface_moisture_flux,   &
                                          uReconstructZonal, uReconstructMeridional,                &
                                          hfx, qfx,                                                 &
@@ -1172,8 +1175,8 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
 
       logical, intent(in) :: config_mix_full, mix_scalars
 
-      character (len=StrKIND) :: config_les_model
-      character (len=StrKIND) :: config_les_surface
+      integer, intent(in) :: les_model_opt
+      integer, intent(in) :: les_surface_opt
 
       type (MPAS_Clock_type), intent(in) :: clock
       real (kind=RKIND), intent(in) :: dt
@@ -1231,34 +1234,10 @@ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSo
       real (kind=RKIND) :: moisture_flux, heat_flux, theta_m_flux
       real (kind=RKIND) :: qv_cell, theta_m_cell, theta_cell
 
-      integer :: les_model_opt, les_surface_opt
-
 
       DEBUG_WRITE(' begin scalar_dissipation_3d ')
-      DEBUG_WRITE(' les model is '//trim(config_les_model))
-      DEBUG_WRITE(' les surface is '//trim(config_les_surface))
       DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_theta_eddy_visc4/))
 
-      if (trim(config_les_model) == 'none') then
-         les_model_opt = LES_MODEL_NONE
-      else if (trim(config_les_model) == '3d_smagorinsky') then
-         les_model_opt = LES_MODEL_3D_SMAGORINSKY
-      else if (trim(config_les_model) == 'prognostic_1.5_order') then
-         les_model_opt = LES_MODEL_PROGNOSTIC_15_ORDER
-      else
-         ! Error
-      end if
-
-      if (trim(config_les_surface) == 'none') then
-         les_surface_opt = LES_SURFACE_NONE
-      else if (trim(config_les_surface) == 'specified') then
-         les_surface_opt = LES_SURFACE_SPECIFIED
-      else if (trim(config_les_surface) == 'varying') then
-         les_surface_opt = LES_SURFACE_VARYING
-      else
-         ! Error
-      end if
-
       if( mix_scalars .and. (dynamics_substep == 1)) call mpas_log_write(' scalar mixing on ')
 
       !$acc enter data create(turb_vflux_scalars)
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 4307dca8f0..644963c3e6 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -4791,6 +4791,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
    !                
    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
 
+      use mpas_atm_dissipation_models, only : les_model_from_string, les_surface_from_string
+
       implicit none
 
       !
@@ -4868,6 +4870,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       character (len=StrKIND), pointer :: config_horiz_mixing
       character (len=StrKIND), pointer :: config_les_model
       character (len=StrKIND), pointer :: config_les_surface
+      integer :: les_model_opt, les_surface_opt
       real (kind=RKIND), pointer :: config_surface_heat_flux
       real (kind=RKIND), pointer :: config_surface_moisture_flux
       real (kind=RKIND), pointer :: config_surface_drag_coefficient
@@ -5052,6 +5055,9 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
       call mpas_pool_get_array(mesh, 'cf2', cf2)
       call mpas_pool_get_array(mesh, 'cf3', cf3)
 
+      les_model_opt = les_model_from_string(config_les_model)
+      les_surface_opt = les_surface_from_string(config_les_surface)
+
       call atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels, &
          nCellsSolve, nEdgesSolve, vertexDegree, maxEdges, maxEdges2, num_scalars, index_qv, index_qc, moist_start, moist_end, &
          tend_scalars, &
@@ -5066,8 +5072,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_phys
          latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
          rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
          deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
-         tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_mix_scalars, config_horiz_mixing, config_les_model, &
-         config_les_surface, prandtl_3d_inv, config_del4u_div_factor, &
+         tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_mix_scalars, config_horiz_mixing, les_model_opt, &
+         les_surface_opt, prandtl_3d_inv, config_del4u_div_factor, &
          config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
          config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dynamics_substep, dt, &
          config_mpas_cam_coef, &
@@ -5101,8 +5107,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
       rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
       deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
-      tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_mix_scalars, config_horiz_mixing, config_les_model, &
-      config_les_surface, prandtl_3d_inv, config_del4u_div_factor, &
+      tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_mix_scalars, config_horiz_mixing, les_model_opt, &
+      les_surface_opt, prandtl_3d_inv, config_del4u_div_factor, &
       config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
       config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dynamics_substep, dt, &
       config_mpas_cam_coef, &
@@ -5115,6 +5121,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
 
       use mpas_atm_dimensions
+      use mpas_atm_dissipation_models, only : LES_MODEL_NONE
 
 
       implicit none
@@ -5225,8 +5232,8 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       real (kind=RKIND) :: coef_3rd_order, c_s
       logical :: config_mix_full, config_mix_scalars
       character (len=StrKIND) :: config_horiz_mixing
-      character (len=StrKIND) :: config_les_model
-      character (len=StrKIND) :: config_les_surface
+      integer, intent(in) :: les_model_opt
+      integer, intent(in) :: les_surface_opt
       real (kind=RKIND) :: config_surface_heat_flux
       real (kind=RKIND) :: config_surface_moisture_flux
       real (kind=RKIND) :: config_surface_drag_coefficient
@@ -5297,7 +5304,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       if (perturbation_coriolis) then
          !$acc enter data copyin(u_init, v_init)
       end if
-      if (trim(config_les_model) /= 'none') then
+      if (les_model_opt /= LES_MODEL_NONE) then
          !$acc enter data copyin(exner, pressure_b, bn2)
       end if
       !$acc enter data copyin(ustm, hfx, qfx)
@@ -5333,7 +5340,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       !$acc enter data copyin(rw_save, rt_diabatic_tend)
       !$acc enter data create(rthdynten)
       !$acc enter data copyin(t_init)
-      if (trim(config_les_model) /= 'none') then
+      if (les_model_opt /= LES_MODEL_NONE) then
          !$acc enter data copyin(ur_cell, vr_cell)
       else
 #ifdef CURVATURE
@@ -5369,7 +5376,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
          ! Smagorinsky eddy viscosity, based on horizontal deformation (in this case on model coordinate surfaces).
          ! The integration coefficients were precomputed and stored in deformation_coef_*
 
-         if(config_les_model == "none") then
+         if(les_model_opt == LES_MODEL_NONE) then
 
             if(config_horiz_mixing == "2d_smagorinsky") then
 
@@ -5396,14 +5403,14 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
             end if
 
-         else if (config_les_model /= "none") then
+         else if (les_model_opt /= LES_MODEL_NONE) then
 
             ! call mpas_log_write(' BV call, index qv, qc, tke $i $i $i ', intArgs=(/index_qv, index_qc, index_tke/))
 
             call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
                                cellStart, cellEnd, nCells)
       
-            call les_models( config_les_model, config_les_surface, dynamics_substep, eddy_visc_horz, eddy_visc_vert,                  &
+            call les_models( les_model_opt, les_surface_opt, dynamics_substep, eddy_visc_horz, eddy_visc_vert, &
                              u, v, ur_cell, vr_cell,                                            &
                              w, c_s, bn2, zgrid, config_len_disp,                               &
                              deformation_coef_c2, deformation_coef_s2, deformation_coef_cs,     &
@@ -5620,7 +5627,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                  config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2,                         &
                                  config_del4u_div_factor, zgrid,                                              &
                                  eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw,                              &
-                                 fzm, fzp, config_les_model, config_les_surface,                              &
+                                 fzm, fzp, les_model_opt, les_surface_opt,                                    &
                                  config_surface_drag_coefficient,                                             &
                                  delsq_u, delsq_vorticity, delsq_divergence,                                  &
                                  u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, tend_u_euler )
@@ -5762,7 +5769,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                  delsq_w,                                                  &
                                  w, rho_edge, rho_zz, divergence, zz,                      &
                                  eddy_visc_horz, eddy_visc_vert,                           &
-                                 config_les_model, config_les_surface,                     &
+                                 les_model_opt, les_surface_opt,                           &
                                  tend_w_euler                                             )
 
       end if ! mixing for w computed in first rk_step
@@ -5922,7 +5929,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
                                          eddy_visc_horz, eddy_visc_vert,                           &
                                          bn2, config_len_disp, scalars, tend_scalars,              &
                                          index_tke, index_qv, num_scalars, config_mix_scalars,     &
-                                         config_les_model, config_les_surface, clock, dt,          &
+                                         les_model_opt, les_surface_opt, clock, dt,                &
                                          config_surface_heat_flux, config_surface_moisture_flux,   &
                                          ur_cell, vr_cell,                                         &
                                          hfx, qfx,                                                 &
@@ -5982,7 +5989,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       if (perturbation_coriolis) then
          !$acc exit data delete(u_init, v_init)
       end if
-      if (trim(config_les_model) /= 'none') then
+      if (les_model_opt /= LES_MODEL_NONE) then
          !$acc exit data delete(exner, pressure_b)
          !$acc exit data copyout(bn2)
       end if
@@ -6019,7 +6026,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       !$acc exit data delete(rw_save, rt_diabatic_tend)
       !$acc exit data copyout(rthdynten)
       !$acc exit data delete(t_init)
-      if (trim(config_les_model) /= 'none') then
+      if (les_model_opt /= LES_MODEL_NONE) then
          !$acc exit data delete(ur_cell, vr_cell)
       else
 #ifdef CURVATURE

From b762d35567c0ff801020f5312ff488af38dcb6d0 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Tue, 13 May 2025 11:23:11 -0600
Subject: [PATCH 111/146] Consolidating OpenACC device-host memory transfers

This PR consolidates much of the OpenACC host and device data transfers during
the course of the dynamical execution to two subroutines mpas_atm_pre_dynamics
_h2d and mpas_atm_post_dynamics_d2h that are called before and after the call
to atm_srk3 subroutine. Due to atm_compute_solve_diagnostics also being called
once before the start of model run, we also have a pair of subroutines mpas_atm
_pre_computesolvediag_h2d and mpas_atm_post_computesolvediag_d2h to handle data
movements around the first call to atm_compute_solve_diagnostics. Any fields
copied onto the device in these subroutines are removed from explicit data
movement statements in the dynamical core.

The mesh/time-invariant fields are still copied onto the device in mpas_atm_
dynamics_init and removed from the device in mpas_atm_dynamics_finalize, with
the exception of select fields moved in mpas_atm_pre_computesolvediag_h2d and
mpas_atm_post_computesolvediag_d2h. This is a special case due to atm_compute_
solve_diagnostics being called for the first time before the call to mpas_atm_
dynamics_init

This PR also includes explicit host-device data transfers in the mpas_atm_iau,
mpas_atmphys_interface and mpas_atmphys_todynamics modules to ensure that the
physics and IAU regions, which run on CPU, use the latest values from the
dynamical core running on GPUs, and vice versa. In addition, this PR also
includes explicit data transfers around halo exchanges in the atm_srk3
subroutine.

These subroutines for data routines, and the acc update statements are an
interim solution until we have a book-keeping method in place. This PR also
introduces a couple of new timers to keep track of the cost of data transfers.
---
 .../dynamics/mpas_atm_boundaries.F            |   32 -
 src/core_atmosphere/dynamics/mpas_atm_iau.F   |   14 +
 .../dynamics/mpas_atm_time_integration.F      | 1377 ++++++++++++-----
 src/core_atmosphere/mpas_atm_core.F           |    5 +-
 .../physics/mpas_atmphys_interface.F          |   20 +
 .../physics/mpas_atmphys_todynamics.F         |   17 +
 6 files changed, 1077 insertions(+), 388 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F
index 787e7719a1..6c19ed7931 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F
@@ -395,18 +395,14 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t
         nullify(tend)
         call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1)
 
-        MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]')
         if (associated(tend)) then
-            !$acc enter data copyin(tend)
         else
             call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1)
-            !$acc enter data copyin(tend_scalars)
 
             ! Ensure the integer pointed to by idx_ptr is copied to the gpu device
             call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr)
             idx = idx_ptr
         end if
-        MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]')
 
         !$acc parallel default(present)
         if (associated(tend)) then
@@ -426,13 +422,6 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t
         end if
         !$acc end parallel
 
-        MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]')
-        if (associated(tend)) then
-            !$acc exit data delete(tend)
-        else
-            !$acc exit data delete(tend_scalars)
-        end if
-        MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]')
 
     end subroutine mpas_atm_get_bdy_tend
 
@@ -533,9 +522,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
         ! query the field as a scalar constituent
         !
         if (associated(tend) .and. associated(state)) then
-            MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
-            !$acc enter data copyin(tend, state)
-            MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
 
             !$acc parallel default(present)
             !$acc loop gang vector collapse(2)
@@ -546,9 +532,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
             end do
             !$acc end parallel
 
-            MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
-            !$acc exit data delete(tend, state)
-            MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
         else
             call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1)
             call mpas_pool_get_array(lbc, 'lbc_scalars', state_scalars, 2)
@@ -556,10 +539,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
 
             idx=idx_ptr ! Avoid non-array pointer for OpenACC
 
-            MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
-            !$acc enter data copyin(tend_scalars, state_scalars)
-            MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
-
             !$acc parallel default(present)
             !$acc loop gang vector collapse(2)
             do i=1, horizDim+1
@@ -569,9 +548,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
             end do
             !$acc end parallel
 
-            MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
-            !$acc exit data delete(tend_scalars, state_scalars)
-            MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
         end if
 
     end subroutine mpas_atm_get_bdy_state_2d
@@ -652,10 +628,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim,
         call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1)
         call mpas_pool_get_array(lbc, 'lbc_'//trim(field), state, 2)
 
-        MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
-        !$acc enter data copyin(tend, state)
-        MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
-
         !$acc parallel default(present)
         !$acc loop gang vector collapse(3)
         do i=1, horizDim+1
@@ -667,10 +639,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim,
         end do
         !$acc end parallel
 
-        MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
-        !$acc exit data delete(tend, state)
-        MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
-
     end subroutine mpas_atm_get_bdy_state_3d
 
 
diff --git a/src/core_atmosphere/dynamics/mpas_atm_iau.F b/src/core_atmosphere/dynamics/mpas_atm_iau.F
index 654fd3ae82..b380e3c0e8 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_iau.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_iau.F
@@ -13,9 +13,20 @@ module mpas_atm_iau
  use mpas_dmpar
  use mpas_constants   
  use mpas_log, only : mpas_log_write
+ use mpas_timer
 
  !public :: atm_compute_iau_coef, atm_add_tend_anal_incr  
       
+
+ #ifdef MPAS_OPENACC
+ #define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
+ #define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
+ #else
+ #define MPAS_ACC_TIMER_START(X)
+ #define MPAS_ACC_TIMER_STOP(X)
+ #endif
+ 
+
  contains
 
 !==================================================================================================
@@ -137,6 +148,7 @@ subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, ten
       call mpas_pool_get_array(state, 'scalars', scalars, 1)
       call mpas_pool_get_array(state, 'rho_zz',   rho_zz, 2) 
       call mpas_pool_get_array(diag , 'rho_edge', rho_edge)
+      !$acc update self(theta_m, scalars, rho_zz, rho_edge)
       
       call mpas_pool_get_dimension(state, 'moist_start', moist_start)
       call mpas_pool_get_dimension(state, 'moist_end', moist_end)
@@ -149,6 +161,8 @@ subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, ten
 !      call mpas_pool_get_array(tend, 'rho_zz',  tend_rho)
 !      call mpas_pool_get_array(tend, 'theta_m', tend_theta)
       call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars)
+      !$acc update self(tend_scalars)
+      MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
 
       call mpas_pool_get_array(tend_iau, 'theta',     theta_amb)
       call mpas_pool_get_array(tend_iau, 'rho',         rho_amb)
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 644963c3e6..3f046bf758 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -272,6 +272,8 @@ subroutine mpas_atm_dynamics_init(domain)
       real (kind=RKIND), dimension(:), pointer :: angleEdge
       real (kind=RKIND), dimension(:), pointer :: meshScalingDel2
       real (kind=RKIND), dimension(:), pointer :: meshScalingDel4
+      real (kind=RKIND), dimension(:), pointer :: u_init, v_init, qv_init
+      real (kind=RKIND), dimension(:,:), pointer :: t_init
       real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2
       real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s2
       real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_cs
@@ -297,6 +299,7 @@ subroutine mpas_atm_dynamics_init(domain)
       nullify(mesh)
       call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh)
 
+      MPAS_ACC_TIMER_START('mpas_dynamics_init [ACC_data_xfer]')
       call mpas_pool_get_array(mesh, 'dvEdge', dvEdge)
       !$acc enter data copyin(dvEdge)
 
@@ -461,24 +464,919 @@ subroutine mpas_atm_dynamics_init(domain)
 
       call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4)
       !$acc enter data copyin(meshScalingDel4)
+      
+      call mpas_pool_get_array(mesh, 'u_init', u_init)
+      !$acc enter data copyin(u_init)
+      call mpas_pool_get_array(mesh, 'v_init', v_init)
+      !$acc enter data copyin(v_init)
+      call mpas_pool_get_array(mesh, 't_init', t_init)
+      !$acc enter data copyin(t_init)
+      call mpas_pool_get_array(mesh, 'qv_init', qv_init)
+      !$acc enter data copyin(qv_init)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2)
+      !$acc enter data copyin(deformation_coef_c2)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2)
+      !$acc enter data copyin(deformation_coef_s2)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs)
+      !$acc enter data copyin(deformation_coef_cs)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c)
+      !$acc enter data copyin(deformation_coef_c)
+
+      call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s)
+      !$acc enter data copyin(deformation_coef_s)
+
+      MPAS_ACC_TIMER_STOP('mpas_dynamics_init [ACC_data_xfer]')
+#endif
+
+   end subroutine mpas_atm_dynamics_init
+
+   subroutine mpas_atm_pre_computesolvediag_h2d(block)
+
+      implicit none
+
+      type (block_type), intent(inout) :: block
+
+
+#ifdef MPAS_OPENACC
+      type (mpas_pool_type), pointer :: mesh
+      type (mpas_pool_type), pointer :: diag
+      type (mpas_pool_type), pointer :: state
+      type (mpas_pool_type), pointer :: tend_physics
+      real (kind=RKIND), dimension(:,:), pointer :: rthdynten
+
+      real (kind=RKIND), dimension(:,:), pointer :: h_edge, v, vorticity, ke, pv_edge, &
+                                                   pv_vertex, pv_cell, gradPVn, gradPVt, divergence
+      real (kind=RKIND), dimension(:,:), pointer :: u, h
+
+      real (kind=RKIND), dimension(:,:), pointer :: zz
+      real (kind=RKIND), dimension(:,:,:), pointer :: zb_cell
+      real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell
+      real (kind=RKIND), dimension(:), pointer :: fzm
+      real (kind=RKIND), dimension(:), pointer :: fzp
+      real (kind=RKIND), dimension(:,:,:), pointer :: zb
+      real (kind=RKIND), dimension(:,:,:), pointer :: zb3
+
+
+      real (kind=RKIND), dimension(:), pointer :: dvEdge
+      integer, dimension(:,:), pointer :: cellsOnCell
+      integer, dimension(:,:), pointer :: cellsOnEdge
+      integer, dimension(:,:), pointer :: advCellsForEdge
+      integer, dimension(:,:), pointer :: edgesOnCell
+      integer, dimension(:), pointer :: nAdvCellsForEdge
+      integer, dimension(:), pointer :: nEdgesOnCell
+      real (kind=RKIND), dimension(:,:), pointer :: adv_coefs
+      real (kind=RKIND), dimension(:,:), pointer :: adv_coefs_3rd
+      real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign
+      real (kind=RKIND), dimension(:), pointer :: invAreaCell
+      integer, dimension(:), pointer :: bdyMaskCell
+      integer, dimension(:), pointer :: bdyMaskEdge
+      real (kind=RKIND), dimension(:), pointer :: specZoneMaskEdge
+      real (kind=RKIND), dimension(:), pointer :: invDvEdge
+      real (kind=RKIND), dimension(:), pointer :: dcEdge
+      real (kind=RKIND), dimension(:), pointer :: invDcEdge
+      integer, dimension(:,:), pointer :: edgesOnEdge
+      integer, dimension(:,:), pointer :: edgesOnVertex
+      real (kind=RKIND), dimension(:,:), pointer :: edgesOnVertex_sign
+      integer, dimension(:), pointer :: nEdgesOnEdge
+      real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge
+      integer, dimension(:,:), pointer :: cellsOnVertex
+      integer, dimension(:,:), pointer :: verticesOnCell
+      integer, dimension(:,:), pointer :: verticesOnEdge
+      real (kind=RKIND), dimension(:), pointer :: invAreaTriangle
+      integer, dimension(:,:), pointer :: kiteForCell
+      real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex
+      real (kind=RKIND), dimension(:), pointer :: fEdge
+      real (kind=RKIND), dimension(:), pointer :: fVertex
+
+      nullify(mesh)
+      call mpas_pool_get_subpool(block % structs, 'mesh', mesh)
+      nullify(state)
+      call mpas_pool_get_subpool(block % structs, 'state', state)
+      nullify(diag)
+      call mpas_pool_get_subpool(block % structs, 'diag', diag)
+
+      MPAS_ACC_TIMER_START('first_compute_solve_diagnostics [ACC_data_xfer]')
+      call mpas_pool_get_array(state, 'rho_zz', h, 1)
+      !$acc enter data create(h)
+      call mpas_pool_get_array(state, 'u', u, 1)
+      !$acc enter data copyin(u)
+
+      call mpas_pool_get_array(diag, 'v', v)
+      !$acc enter data copyin(v)
+      call mpas_pool_get_array(diag, 'rho_edge', h_edge)
+      !$acc enter data copyin(h_edge)
+      call mpas_pool_get_array(diag, 'vorticity', vorticity)
+      !$acc enter data copyin(vorticity)
+      call mpas_pool_get_array(diag, 'divergence', divergence)
+      !$acc enter data copyin(divergence)
+      call mpas_pool_get_array(diag, 'ke', ke)
+      !$acc enter data copyin(ke)
+      call mpas_pool_get_array(diag, 'pv_edge', pv_edge)
+      !$acc enter data copyin(pv_edge)
+      call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex)
+      !$acc enter data copyin(pv_vertex)
+      call mpas_pool_get_array(diag, 'pv_cell', pv_cell)
+      !$acc enter data copyin(pv_cell)
+      call mpas_pool_get_array(diag, 'gradPVn', gradPVn)
+      !$acc enter data copyin(gradPVn)
+      call mpas_pool_get_array(diag, 'gradPVt', gradPVt)
+      !$acc enter data copyin(gradPVt)
+
+      ! Required by atm_init_coupled_diagnostics
+      call mpas_pool_get_array(mesh, 'zz', zz)
+      !$acc enter data copyin(zz)
+
+      call mpas_pool_get_array(mesh, 'zb_cell', zb_cell)
+      !$acc enter data copyin(zb_cell)
+
+      call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell)
+      !$acc enter data copyin(zb3_cell)
+
+      call mpas_pool_get_array(mesh, 'fzm', fzm)
+      !$acc enter data copyin(fzm)
+
+      call mpas_pool_get_array(mesh, 'fzp', fzp)
+      !$acc enter data copyin(fzp)
+
+      call mpas_pool_get_array(mesh, 'zb', zb)
+      !$acc enter data copyin(zb)
+
+      call mpas_pool_get_array(mesh, 'zb3', zb3)
+      !$acc enter data copyin(zb3)
+
+      ! Required by atm_compute_solve_diagnostics
+      call mpas_pool_get_array(mesh, 'dvEdge', dvEdge)
+      !$acc enter data copyin(dvEdge)
+
+      call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)
+      !$acc enter data copyin(cellsOnEdge)
+
+      call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell)
+      !$acc enter data copyin(edgesOnCell)
+
+      call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell)
+      !$acc enter data copyin(nEdgesOnCell)
+
+      call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign)
+      !$acc enter data copyin(edgesOnCell_sign)
+
+      call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell)
+      !$acc enter data copyin(invAreaCell)
+
+      call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge)
+      !$acc enter data copyin(invDvEdge)
+
+      call mpas_pool_get_array(mesh, 'dcEdge', dcEdge)
+      !$acc enter data copyin(dcEdge)
+
+      call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge)
+      !$acc enter data copyin(invDcEdge)
+
+      call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge)
+      !$acc enter data copyin(edgesOnEdge)
+
+      call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex)
+      !$acc enter data copyin(edgesOnVertex)
+
+      call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign)
+      !$acc enter data copyin(edgesOnVertex_sign)
+
+      call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge)
+      !$acc enter data copyin(nEdgesOnEdge)
+
+      call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
+      !$acc enter data copyin(weightsOnEdge)
+
+      call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell)
+      !$acc enter data copyin(verticesOnCell)
+
+      call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge)
+      !$acc enter data copyin(verticesOnEdge)
+
+      call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle)
+      !$acc enter data copyin(invAreaTriangle)
+
+      call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell)
+      !$acc enter data copyin(kiteForCell)
+
+      call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex)
+      !$acc enter data copyin(kiteAreasOnVertex)
+
+      call mpas_pool_get_array(mesh, 'fVertex', fVertex)
+      !$acc enter data copyin(fVertex)
+
+      MPAS_ACC_TIMER_STOP('first_compute_solve_diagnostics [ACC_data_xfer]')
+#endif
+
+   end subroutine mpas_atm_pre_computesolvediag_h2d
+
+
+   subroutine mpas_atm_post_computesolvediag_d2h(block)
+
+      implicit none
+
+      type (block_type), intent(inout) :: block
+
+
+#ifdef MPAS_OPENACC
+      type (mpas_pool_type), pointer :: mesh
+      type (mpas_pool_type), pointer :: diag
+      type (mpas_pool_type), pointer :: state
+      type (mpas_pool_type), pointer :: tend_physics
+      real (kind=RKIND), dimension(:,:), pointer :: rthdynten
+
+      real (kind=RKIND), dimension(:,:), pointer :: h_edge, v, vorticity, ke, pv_edge, &
+                                                   pv_vertex, pv_cell, gradPVn, gradPVt, divergence
+      real (kind=RKIND), dimension(:,:), pointer :: u, h
+
+      real (kind=RKIND), dimension(:,:), pointer :: zz
+      real (kind=RKIND), dimension(:,:,:), pointer :: zb_cell
+      real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell
+      real (kind=RKIND), dimension(:), pointer :: fzm
+      real (kind=RKIND), dimension(:), pointer :: fzp
+      real (kind=RKIND), dimension(:,:,:), pointer :: zb
+      real (kind=RKIND), dimension(:,:,:), pointer :: zb3
+
+
+      real (kind=RKIND), dimension(:), pointer :: dvEdge
+      integer, dimension(:,:), pointer :: cellsOnCell
+      integer, dimension(:,:), pointer :: cellsOnEdge
+      integer, dimension(:,:), pointer :: advCellsForEdge
+      integer, dimension(:,:), pointer :: edgesOnCell
+      integer, dimension(:), pointer :: nAdvCellsForEdge
+      integer, dimension(:), pointer :: nEdgesOnCell
+      real (kind=RKIND), dimension(:,:), pointer :: adv_coefs
+      real (kind=RKIND), dimension(:,:), pointer :: adv_coefs_3rd
+      real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign
+      real (kind=RKIND), dimension(:), pointer :: invAreaCell
+      integer, dimension(:), pointer :: bdyMaskCell
+      integer, dimension(:), pointer :: bdyMaskEdge
+      real (kind=RKIND), dimension(:), pointer :: specZoneMaskEdge
+      real (kind=RKIND), dimension(:), pointer :: invDvEdge
+      real (kind=RKIND), dimension(:), pointer :: dcEdge
+      real (kind=RKIND), dimension(:), pointer :: invDcEdge
+      integer, dimension(:,:), pointer :: edgesOnEdge
+      integer, dimension(:,:), pointer :: edgesOnVertex
+      real (kind=RKIND), dimension(:,:), pointer :: edgesOnVertex_sign
+      integer, dimension(:), pointer :: nEdgesOnEdge
+      real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge
+      integer, dimension(:,:), pointer :: cellsOnVertex
+      integer, dimension(:,:), pointer :: verticesOnCell
+      integer, dimension(:,:), pointer :: verticesOnEdge
+      real (kind=RKIND), dimension(:), pointer :: invAreaTriangle
+      integer, dimension(:,:), pointer :: kiteForCell
+      real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex
+      real (kind=RKIND), dimension(:), pointer :: fEdge
+      real (kind=RKIND), dimension(:), pointer :: fVertex
+
+      nullify(mesh)
+      call mpas_pool_get_subpool(block % structs, 'mesh', mesh)
+      nullify(state)
+      call mpas_pool_get_subpool(block % structs, 'state', state)
+      nullify(diag)
+      call mpas_pool_get_subpool(block % structs, 'diag', diag)
+
+      MPAS_ACC_TIMER_START('first_compute_solve_diagnostics [ACC_data_xfer]')
+
+      call mpas_pool_get_array(state, 'rho_zz', h, 1)
+      !$acc exit data copyout(h)
+      call mpas_pool_get_array(state, 'u', u, 1)
+      !$acc exit data copyout(u)
+
+      call mpas_pool_get_array(diag, 'v', v)
+      !$acc exit data copyout(v)
+      call mpas_pool_get_array(diag, 'rho_edge', h_edge)
+      !$acc exit data copyout(h_edge)
+      call mpas_pool_get_array(diag, 'vorticity', vorticity)
+      !$acc exit data copyout(vorticity)
+      call mpas_pool_get_array(diag, 'divergence', divergence)
+      !$acc exit data copyout(divergence)
+      call mpas_pool_get_array(diag, 'ke', ke)
+      !$acc exit data copyout(ke)
+      call mpas_pool_get_array(diag, 'pv_edge', pv_edge)
+      !$acc exit data copyout(pv_edge)
+      call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex)
+      !$acc exit data copyout(pv_vertex)
+      call mpas_pool_get_array(diag, 'pv_cell', pv_cell)
+      !$acc exit data copyout(pv_cell)
+      call mpas_pool_get_array(diag, 'gradPVn', gradPVn)
+      !$acc exit data copyout(gradPVn)
+      call mpas_pool_get_array(diag, 'gradPVt', gradPVt)
+      !$acc exit data copyout(gradPVt)
+
+      ! Required by atm_init_coupled_diagnostics
+      call mpas_pool_get_array(mesh, 'zz', zz)
+      !$acc exit data delete(zz)
+
+      call mpas_pool_get_array(mesh, 'zb_cell', zb_cell)
+      !$acc exit data delete(zb_cell)
+
+      call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell)
+      !$acc exit data delete(zb3_cell)
+
+      call mpas_pool_get_array(mesh, 'fzm', fzm)
+      !$acc exit data delete(fzm)
+
+      call mpas_pool_get_array(mesh, 'fzp', fzp)
+      !$acc exit data delete(fzp)
+
+      call mpas_pool_get_array(mesh, 'zb', zb)
+      !$acc exit data delete(zb)
+
+      call mpas_pool_get_array(mesh, 'zb3', zb3)
+      !$acc exit data delete(zb3)
+
+
+      call mpas_pool_get_array(mesh, 'dvEdge', dvEdge)
+      !$acc exit data delete(dvEdge)
+
+      call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)
+      !$acc exit data delete(cellsOnEdge)
+
+      call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell)
+      !$acc exit data delete(edgesOnCell)
+
+      call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell)
+      !$acc exit data delete(nEdgesOnCell)
+
+      call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign)
+      !$acc exit data delete(edgesOnCell_sign)
+
+      call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell)
+      !$acc exit data delete(invAreaCell)
+
+      call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge)
+      !$acc exit data delete(invDvEdge)
+
+      call mpas_pool_get_array(mesh, 'dcEdge', dcEdge)
+      !$acc exit data delete(dcEdge)
+
+      call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge)
+      !$acc exit data delete(invDcEdge)
+
+      call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge)
+      !$acc exit data delete(edgesOnEdge)
+
+      call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex)
+      !$acc exit data delete(edgesOnVertex)
+
+      call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign)
+      !$acc exit data delete(edgesOnVertex_sign)
+
+      call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge)
+      !$acc exit data delete(nEdgesOnEdge)
+
+      call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
+      !$acc exit data delete(weightsOnEdge)
+
+      call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell)
+      !$acc exit data delete(verticesOnCell)
+
+      call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge)
+      !$acc exit data delete(verticesOnEdge)
+
+      call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle)
+      !$acc exit data delete(invAreaTriangle)
+
+      call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell)
+      !$acc exit data delete(kiteForCell)
+
+      call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex)
+      !$acc exit data delete(kiteAreasOnVertex)
+
+      call mpas_pool_get_array(mesh, 'fVertex', fVertex)
+      !$acc exit data delete(fVertex)
+
+      MPAS_ACC_TIMER_STOP('first_compute_solve_diagnostics [ACC_data_xfer]')
+#endif
+
+   end subroutine mpas_atm_post_computesolvediag_d2h
+
+   subroutine mpas_atm_pre_dynamics_h2d(domain)
+
+      implicit none
+
+      type (domain_type), intent(inout) :: domain
+
+
+#ifdef MPAS_OPENACC
+      type (mpas_pool_type), pointer :: state
+      type (mpas_pool_type), pointer :: diag
+      type (mpas_pool_type), pointer :: tend
+      type (mpas_pool_type), pointer :: tend_physics
+      type (mpas_pool_type), pointer :: lbc
+
+
+      real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p
+      real (kind=RKIND), dimension(:,:), pointer :: ru_save
+      real (kind=RKIND), dimension(:,:), pointer :: rw, rw_p
+      real (kind=RKIND), dimension(:,:), pointer :: rw_save
+      real (kind=RKIND), dimension(:,:), pointer :: rtheta_p
+      real (kind=RKIND), dimension(:,:), pointer :: exner, exner_base
+      real (kind=RKIND), dimension(:,:), pointer :: rtheta_base, rho_base
+      real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save
+      real (kind=RKIND), dimension(:,:), pointer :: rho_p, rho_pp, rho, theta, theta_base
+      real (kind=RKIND), dimension(:,:), pointer :: rho_p_save
+      real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split
+      real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp
+      real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, pressure, v
+      real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke
+      real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri
+      real (kind=RKIND), dimension(:), pointer :: cofrz
+      real (kind=RKIND), dimension(:,:), pointer :: gradPVn, gradPVt
+
+
+      real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2
+      real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2
+      real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2
+      real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2
+      real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2
+      real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split
+
+      real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend
+      real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler
+      real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy
+      real(kind=RKIND), dimension(:,:,:), pointer :: scalar_tend_save
+
+      real (kind=RKIND), dimension(:,:), pointer :: rthdynten, divergence, vorticity
+
+      real (kind=RKIND), dimension(:,:), pointer :: lbc_u, lbc_w, lbc_ru, lbc_rho_edge, lbc_rho, lbc_rtheta_m, lbc_rho_zz, lbc_theta
+      real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_u, lbc_tend_w, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_rho
+      real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_rtheta_m, lbc_tend_rho_zz, lbc_tend_theta
+
+      real (kind=RKIND), dimension(:,:,:), pointer :: lbc_scalars, lbc_tend_scalars
+
+      nullify(state)
+      nullify(diag)
+      nullify(tend)
+      nullify(tend_physics)
+      nullify(lbc)
+      call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state)
+      call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag)
+      call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend)
+      call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics)
+      call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc)
+
+      MPAS_ACC_TIMER_START('atm_srk3 [ACC_data_xfer]')
+      call mpas_pool_get_array(diag, 'ru', ru)
+      !$acc enter data copyin(ru) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'ru_p', ru_p)
+      !$acc enter data copyin(ru_p)
+      call mpas_pool_get_array(diag, 'ru_save', ru_save)
+      !$acc enter data copyin(ru_save)
+      call mpas_pool_get_array(diag, 'rw', rw)
+      !$acc enter data copyin(rw) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'rw_p', rw_p)
+      !$acc enter data copyin(rw_p)
+      call mpas_pool_get_array(diag, 'rw_save', rw_save)
+      !$acc enter data copyin(rw_save)
+      call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
+      !$acc enter data copyin(rtheta_p) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save)
+      !$acc enter data copyin(rtheta_p_save)
+      call mpas_pool_get_array(diag, 'exner', exner)
+      !$acc enter data copyin(exner) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'exner_base', exner_base)
+      !$acc enter data copyin(exner_base) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base)
+      !$acc enter data copyin(rtheta_base) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'rho_base', rho_base)
+      !$acc enter data copyin(rho_base)
+      call mpas_pool_get_array(diag, 'rho', rho)
+      !$acc enter data copyin(rho)
+      call mpas_pool_get_array(diag, 'theta', theta)
+      !$acc enter data copyin(theta)
+      call mpas_pool_get_array(diag, 'theta_base', theta_base)
+      !$acc enter data copyin(theta_base)
+      call mpas_pool_get_array(diag, 'rho_p', rho_p)
+      !$acc enter data copyin(rho_p) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save)
+      !$acc enter data copyin(rho_p_save)
+      call mpas_pool_get_array(diag, 'rho_pp', rho_pp)
+      !$acc enter data copyin(rho_pp)
+      call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split)
+      !$acc enter data copyin(rho_zz_old_split)
+      call mpas_pool_get_array(diag, 'cqw', cqw)
+      !$acc enter data copyin(cqw)
+      call mpas_pool_get_array(diag, 'cqu', cqu)
+      !$acc enter data copyin(cqu)
+      call mpas_pool_get_array(diag, 'pressure_p', pressure_p)
+      !$acc enter data copyin(pressure_p) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'pressure_base', pressure_base)
+      !$acc enter data copyin(pressure_base) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'pressure', pressure)
+      !$acc enter data copyin(pressure)
+      call mpas_pool_get_array(diag, 'v', v)
+      !$acc enter data copyin(v) ! use values from atm_compute_solve_diagnostics
+      call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp)
+      !$acc enter data copyin(rtheta_pp)
+      call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old)
+      !$acc enter data copyin(rtheta_pp_old)
+      call mpas_pool_get_array(diag, 'kdiff', kdiff)
+      !$acc enter data copyin(kdiff)
+      call mpas_pool_get_array(diag, 'pv_edge', pv_edge)
+      !$acc enter data copyin(pv_edge) ! use values from atm_compute_solve_diagnostics
+      call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex)
+      !$acc enter data copyin(pv_vertex) 
+      call mpas_pool_get_array(diag, 'pv_cell', pv_cell)
+      !$acc enter data copyin(pv_cell) 
+      call mpas_pool_get_array(diag, 'rho_edge', rho_edge)
+      !$acc enter data copyin(rho_edge) ! use values from atm_compute_solve_diagnostics
+      call mpas_pool_get_array(diag, 'h_divergence', h_divergence)
+      !$acc enter data copyin(h_divergence)
+      call mpas_pool_get_array(diag, 'ke', ke)
+      !$acc enter data copyin(ke) ! use values from atm_compute_solve_diagnostics
+      call mpas_pool_get_array(diag, 'gradPVn', gradPVn)
+      !$acc enter data copyin(gradPVn)
+      call mpas_pool_get_array(diag, 'gradPVt', gradPVt)
+      !$acc enter data copyin(gradPVt)
 
-      call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2)
-      !$acc enter data copyin(deformation_coef_c2)
+      call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri)
+      !$acc enter data copyin(alpha_tri)
+      call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri)
+      !$acc enter data copyin(gamma_tri)
+      call mpas_pool_get_array(diag, 'a_tri', a_tri)
+      !$acc enter data copyin(a_tri)
+      call mpas_pool_get_array(diag, 'cofwr', cofwr)
+      !$acc enter data copyin(cofwr)
+      call mpas_pool_get_array(diag, 'cofwz', cofwz)
+      !$acc enter data copyin(cofwz)
+      call mpas_pool_get_array(diag, 'coftz', coftz)
+      !$acc enter data copyin(coftz)
+      call mpas_pool_get_array(diag, 'cofwt', cofwt)
+      !$acc enter data copyin(cofwt)
+      call mpas_pool_get_array(diag, 'cofrz', cofrz)
+      !$acc enter data copyin(cofrz)
+      call mpas_pool_get_array(diag, 'vorticity', vorticity)
+      !$acc enter data copyin(vorticity)
+      call mpas_pool_get_array(diag, 'divergence', divergence)
+      !$acc enter data copyin(divergence)
+      call mpas_pool_get_array(diag, 'ruAvg', ruAvg)
+      !$acc enter data copyin(ruAvg)
+      call mpas_pool_get_array(diag, 'ruAvg_split', ruAvg_split)
+      !$acc enter data copyin(ruAvg_split)
+      call mpas_pool_get_array(diag, 'wwAvg', wwAvg)
+      !$acc enter data copyin(wwAvg)
+      call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split)
+      !$acc enter data copyin(wwAvg_split)
 
-      call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2)
-      !$acc enter data copyin(deformation_coef_s2)
+      call mpas_pool_get_array(state, 'u', u_1, 1)
+      !$acc enter data copyin(u_1)
+      call mpas_pool_get_array(state, 'u', u_2, 2)
+      !$acc enter data copyin(u_2)
+      call mpas_pool_get_array(state, 'w', w_1, 1)
+      !$acc enter data copyin(w_1)
+      call mpas_pool_get_array(state, 'w', w_2, 2)
+      !$acc enter data copyin(w_2)
+      call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1)  
+      !$acc enter data copyin(theta_m_1)   ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2)
+      !$acc enter data copyin(theta_m_2)
+      call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1)
+      !$acc enter data copyin(rho_zz_1)
+      call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2)
+      !$acc enter data copyin(rho_zz_2)
+      call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
+      !$acc enter data copyin(scalars_1)
+      call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+      !$acc enter data copyin(scalars_2)
 
-      call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs)
-      !$acc enter data copyin(deformation_coef_cs)
 
-      call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c)
-      !$acc enter data copyin(deformation_coef_c)
+      call mpas_pool_get_array(tend, 'u', tend_ru)
+      !$acc enter data copyin(tend_ru)
+      call mpas_pool_get_array(tend, 'rho_zz', tend_rho)
+      !$acc enter data copyin(tend_rho)
+      call mpas_pool_get_array(tend, 'theta_m', tend_rt)
+      !$acc enter data copyin(tend_rt)
+      call mpas_pool_get_array(tend, 'w', tend_rw)
+      !$acc enter data copyin(tend_rw)
+      call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend)
+      !$acc enter data copyin(rt_diabatic_tend)
+      call mpas_pool_get_array(tend, 'u_euler', tend_u_euler)
+      !$acc enter data copyin(tend_u_euler)
+      call mpas_pool_get_array(tend, 'theta_euler', tend_theta_euler)
+      !$acc enter data copyin(tend_theta_euler)
+      call mpas_pool_get_array(tend, 'w_euler', tend_w_euler)
+      !$acc enter data copyin(tend_w_euler)
+      call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf)
+      !$acc enter data copyin(tend_w_pgf)
+      call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy)
+      !$acc enter data copyin(tend_w_buoy)
+      call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save)
+      !$acc enter data copyin(scalar_tend_save)
 
-      call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s)
-      !$acc enter data copyin(deformation_coef_s)
+
+      call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2)
+      !$acc enter data copyin(lbc_u)
+      call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2)
+      !$acc enter data copyin(lbc_w)
+      call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2)
+      !$acc enter data copyin(lbc_ru)
+      call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2)
+      !$acc enter data copyin(lbc_rho_edge)
+      call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2)
+      !$acc enter data copyin(lbc_theta)
+      call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2)
+      !$acc enter data copyin(lbc_rtheta_m)
+      call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2)
+      !$acc enter data copyin(lbc_rho_zz)
+      call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2)
+      !$acc enter data copyin(lbc_rho)
+      call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2)
+      !$acc enter data copyin(lbc_scalars)
+
+      
+      call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1)
+      !$acc enter data copyin(lbc_tend_u)
+      call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1)
+      !$acc enter data copyin(lbc_tend_ru)
+      call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1)
+      !$acc enter data copyin(lbc_tend_rho_edge)
+      call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1)
+      !$acc enter data copyin(lbc_tend_w)
+      call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1)
+      !$acc enter data copyin(lbc_tend_theta)
+      call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1)
+      !$acc enter data copyin(lbc_tend_rtheta_m)
+      call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1)
+      !$acc enter data copyin(lbc_tend_rho_zz)
+      call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1)
+      !$acc enter data copyin(lbc_tend_rho)
+      call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1)
+      !$acc enter data copyin(lbc_tend_scalars)
+
+      call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
+      !$acc enter data copyin(rthdynten)
+
+      MPAS_ACC_TIMER_STOP('atm_srk3 [ACC_data_xfer]')
 #endif
 
-   end subroutine mpas_atm_dynamics_init
+   end subroutine mpas_atm_pre_dynamics_h2d
+
+
+   subroutine mpas_atm_post_dynamics_d2h(domain)
+
+      implicit none
+
+      type (domain_type), intent(inout) :: domain
+
+
+#ifdef MPAS_OPENACC
+      type (mpas_pool_type), pointer :: state
+      type (mpas_pool_type), pointer :: diag
+      type (mpas_pool_type), pointer :: tend
+      type (mpas_pool_type), pointer :: tend_physics
+      type (mpas_pool_type), pointer :: lbc
+
+
+      real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p
+      real (kind=RKIND), dimension(:,:), pointer :: ru_save
+      real (kind=RKIND), dimension(:,:), pointer :: rw, rw_p
+      real (kind=RKIND), dimension(:,:), pointer :: rw_save
+      real (kind=RKIND), dimension(:,:), pointer :: rtheta_p
+      real (kind=RKIND), dimension(:,:), pointer :: exner, exner_base
+      real (kind=RKIND), dimension(:,:), pointer :: rtheta_base, rho_base
+      real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save
+      real (kind=RKIND), dimension(:,:), pointer :: rho_p, rho_pp, rho, theta, theta_base
+      real (kind=RKIND), dimension(:,:), pointer :: rho_p_save
+      real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split
+      real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp
+      real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, pressure, v
+      real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke
+      real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri
+      real (kind=RKIND), dimension(:), pointer :: cofrz
+      real (kind=RKIND), dimension(:,:), pointer :: gradPVn, gradPVt
+
+
+      real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2
+      real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2
+      real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2
+      real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2
+      real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2
+      real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split
+
+      real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend
+      real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler
+      real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy
+      real(kind=RKIND), dimension(:,:,:), pointer :: scalar_tend_save
+
+      real (kind=RKIND), dimension(:,:), pointer :: rthdynten, divergence, vorticity
+
+      real (kind=RKIND), dimension(:,:), pointer :: lbc_u, lbc_w, lbc_ru, lbc_rho_edge, lbc_rho, lbc_rtheta_m, lbc_rho_zz, lbc_theta
+      real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_u, lbc_tend_w, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_rho
+      real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_rtheta_m, lbc_tend_rho_zz, lbc_tend_theta
+
+      real (kind=RKIND), dimension(:,:,:), pointer :: lbc_scalars, lbc_tend_scalars
+
+      nullify(state)
+      nullify(diag)
+      nullify(tend)
+      nullify(tend_physics)
+      nullify(lbc)
+      call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state)
+      call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag)
+      call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend)
+      call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics)
+      call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc)
+
+      MPAS_ACC_TIMER_START('atm_srk3 [ACC_data_xfer]')
+      call mpas_pool_get_array(diag, 'ru', ru)
+      !$acc exit data copyout(ru) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'ru_p', ru_p)
+      !$acc exit data copyout(ru_p)
+      call mpas_pool_get_array(diag, 'ru_save', ru_save)
+      !$acc exit data delete(ru_save)
+      call mpas_pool_get_array(diag, 'rw', rw)
+      !$acc exit data copyout(rw) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'rw_p', rw_p)
+      !$acc exit data copyout(rw_p)
+      call mpas_pool_get_array(diag, 'rw_save', rw_save)
+      !$acc exit data delete(rw_save)
+      call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
+      !$acc exit data copyout(rtheta_p) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save)
+      !$acc exit data delete(rtheta_p_save)
+      call mpas_pool_get_array(diag, 'exner', exner)
+      !$acc exit data copyout(exner) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'exner_base', exner_base)
+      !$acc exit data copyout(exner_base) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base)
+      !$acc exit data copyout(rtheta_base) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'rho_base', rho_base)
+      !$acc exit data copyout(rho_base)
+      call mpas_pool_get_array(diag, 'rho', rho)
+      !$acc exit data copyout(rho)
+      call mpas_pool_get_array(diag, 'theta', theta)
+      !$acc exit data copyout(theta)
+      call mpas_pool_get_array(diag, 'theta_base', theta_base)
+      !$acc exit data copyout(theta_base)
+      call mpas_pool_get_array(diag, 'rho_p', rho_p)
+      !$acc exit data copyout(rho_p) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save)
+      !$acc exit data delete(rho_p_save)
+      call mpas_pool_get_array(diag, 'rho_pp', rho_pp)
+      !$acc exit data copyout(rho_pp)
+      call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split)
+      !$acc exit data delete(rho_zz_old_split)
+      call mpas_pool_get_array(diag, 'cqw', cqw)
+      !$acc exit data delete(cqw)
+      call mpas_pool_get_array(diag, 'cqu', cqu)
+      !$acc exit data copyout(cqu)
+      call mpas_pool_get_array(diag, 'pressure_p', pressure_p)
+      !$acc exit data copyout(pressure_p) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'pressure_base', pressure_base)
+      !$acc exit data copyout(pressure_base) ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(diag, 'pressure', pressure)
+      !$acc exit data copyout(pressure)
+      call mpas_pool_get_array(diag, 'v', v)
+      !$acc exit data copyout(v) ! use values from atm_compute_solve_diagnostics
+      call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp)
+      !$acc exit data copyout(rtheta_pp)
+      call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old)
+      !$acc exit data copyout(rtheta_pp_old)
+      call mpas_pool_get_array(diag, 'kdiff', kdiff)
+      !$acc exit data copyout(kdiff)
+      call mpas_pool_get_array(diag, 'pv_edge', pv_edge)
+      !$acc exit data copyout(pv_edge) ! use values from atm_compute_solve_diagnostics
+      call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex)
+      !$acc exit data copyout(pv_vertex) 
+      call mpas_pool_get_array(diag, 'pv_cell', pv_cell)
+      !$acc exit data delete(pv_cell) 
+      call mpas_pool_get_array(diag, 'rho_edge', rho_edge)
+      !$acc exit data copyout(rho_edge) ! use values from atm_compute_solve_diagnostics
+      call mpas_pool_get_array(diag, 'h_divergence', h_divergence)
+      !$acc exit data copyout(h_divergence)
+      call mpas_pool_get_array(diag, 'ke', ke)
+      !$acc exit data copyout(ke) ! use values from atm_compute_solve_diagnostics
+      call mpas_pool_get_array(diag, 'gradPVn', gradPVn)
+      !$acc exit data delete(gradPVn)
+      call mpas_pool_get_array(diag, 'gradPVt', gradPVt)
+      !$acc exit data delete(gradPVt)
+
+      call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri)
+      !$acc exit data delete(alpha_tri)
+      call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri)
+      !$acc exit data delete(gamma_tri)
+      call mpas_pool_get_array(diag, 'a_tri', a_tri)
+      !$acc exit data delete(a_tri)
+      call mpas_pool_get_array(diag, 'cofwr', cofwr)
+      !$acc exit data delete(cofwr)
+      call mpas_pool_get_array(diag, 'cofwz', cofwz)
+      !$acc exit data delete(cofwz)
+      call mpas_pool_get_array(diag, 'coftz', coftz)
+      !$acc exit data delete(coftz)
+      call mpas_pool_get_array(diag, 'cofwt', cofwt)
+      !$acc exit data delete(cofwt)
+      call mpas_pool_get_array(diag, 'cofrz', cofrz)
+      !$acc exit data delete(cofrz)
+      call mpas_pool_get_array(diag, 'vorticity', vorticity)
+      !$acc exit data copyout(vorticity)
+      call mpas_pool_get_array(diag, 'divergence', divergence)
+      !$acc exit data copyout(divergence)
+      call mpas_pool_get_array(diag, 'ruAvg', ruAvg)
+      !$acc exit data copyout(ruAvg)
+      call mpas_pool_get_array(diag, 'ruAvg_split', ruAvg_split)
+      !$acc exit data copyout(ruAvg_split)
+      call mpas_pool_get_array(diag, 'wwAvg', wwAvg)
+      !$acc exit data copyout(wwAvg)
+      call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split)
+      !$acc exit data copyout(wwAvg_split)
+
+      call mpas_pool_get_array(state, 'u', u_1, 1)
+      !$acc exit data copyout(u_1)
+      call mpas_pool_get_array(state, 'u', u_2, 2)
+      !$acc exit data delete(u_2)
+      call mpas_pool_get_array(state, 'w', w_1, 1)
+      !$acc exit data copyout(w_1)
+      call mpas_pool_get_array(state, 'w', w_2, 2)
+      !$acc exit data delete(w_2)
+      call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1)  
+      !$acc exit data copyout(theta_m_1)   ! use values from atm_init_coupled_diagnostics
+      call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2)
+      !$acc exit data copyout(theta_m_2)   ! Delete gives incorrect results
+      call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1)
+      !$acc exit data copyout(rho_zz_1)
+      call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2)
+      !$acc exit data delete(rho_zz_2)
+      call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
+      !$acc exit data copyout(scalars_1)
+      call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+      !$acc exit data copyout(scalars_2)   ! Delete gives incorrect results
+
+
+      call mpas_pool_get_array(tend, 'u', tend_ru)
+      !$acc exit data copyout(tend_ru)
+      call mpas_pool_get_array(tend, 'rho_zz', tend_rho)
+      !$acc exit data copyout(tend_rho)
+      call mpas_pool_get_array(tend, 'theta_m', tend_rt)
+      !$acc exit data copyout(tend_rt)
+      call mpas_pool_get_array(tend, 'w', tend_rw)
+      !$acc exit data copyout(tend_rw)
+      call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend)
+      !$acc exit data copyout(rt_diabatic_tend)
+      call mpas_pool_get_array(tend, 'u_euler', tend_u_euler)
+      !$acc exit data copyout(tend_u_euler)
+      call mpas_pool_get_array(tend, 'theta_euler', tend_theta_euler)
+      !$acc exit data copyout(tend_theta_euler)
+      call mpas_pool_get_array(tend, 'w_euler', tend_w_euler)
+      !$acc exit data copyout(tend_w_euler)
+      call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf)
+      !$acc exit data copyout(tend_w_pgf)
+      call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy)
+      !$acc exit data copyout(tend_w_buoy)
+      call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save)
+      !$acc exit data copyout(scalar_tend_save)
+
+
+      call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2)
+      !$acc exit data delete(lbc_u)
+      call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2)
+      !$acc exit data delete(lbc_w)
+      call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2)
+      !$acc exit data delete(lbc_ru)
+      call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2)
+      !$acc exit data delete(lbc_rho_edge)
+      call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2)
+      !$acc exit data delete(lbc_theta)
+      call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2)
+      !$acc exit data delete(lbc_rtheta_m)
+      call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2)
+      !$acc exit data delete(lbc_rho_zz)
+      call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2)
+      !$acc exit data delete(lbc_rho)
+      call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2)
+      !$acc exit data delete(lbc_scalars)
+
+      
+      call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1)
+      !$acc exit data delete(lbc_tend_u)
+      call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1)
+      !$acc exit data delete(lbc_tend_ru)
+      call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1)
+      !$acc exit data delete(lbc_tend_rho_edge)
+      call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1)
+      !$acc exit data delete(lbc_tend_w)
+      call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1)
+      !$acc exit data delete(lbc_tend_theta)
+      call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1)
+      !$acc exit data delete(lbc_tend_rtheta_m)
+      call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1)
+      !$acc exit data delete(lbc_tend_rho_zz)
+      call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1)
+      !$acc exit data delete(lbc_tend_rho)
+      call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1)
+      !$acc exit data delete(lbc_tend_scalars)
+
+      call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
+      !$acc exit data copyout(rthdynten)
+      MPAS_ACC_TIMER_STOP('atm_srk3 [ACC_data_xfer]')
+#endif
+
+   end subroutine mpas_atm_post_dynamics_d2h
 
 
    !----------------------------------------------------------------------------
@@ -814,12 +1712,14 @@ subroutine atm_timestep(domain, dt, nowTime, itimestep, exchange_halo_group)
 
       config_apply_lbcs = config_apply_lbcs_ptr
 
+      call mpas_atm_pre_dynamics_h2d(domain)
       if (trim(config_time_integration) == 'SRK3') then
          call atm_srk3(domain, dt, itimestep, exchange_halo_group)
       else
          call mpas_log_write('Unknown time integration option '//trim(config_time_integration), messageType=MPAS_LOG_ERR)
          call mpas_log_write('Currently, only ''SRK3'' is supported.', messageType=MPAS_LOG_CRIT)
       end if
+      call mpas_atm_post_dynamics_d2h(domain)
 
       call mpas_set_timeInterval(dtInterval, dt=dt)
       currTime = nowTime + dtInterval
@@ -913,6 +1813,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
       real (kind=RKIND), dimension(:,:,:), pointer :: scalars, scalars_1, scalars_2
 
       real (kind=RKIND), dimension(:,:), pointer :: rqvdynten, rthdynten, theta_m
+      real (kind=RKIND), dimension(:,:), pointer :: pressure_p, rtheta_p, exner, tend_u
+      real (kind=RKIND), dimension(:,:), pointer :: rho_pp, rtheta_pp, ru_p, rw_p, pv_edge, rho_edge
       real (kind=RKIND) :: theta_local, fac_m
 
 #ifndef MPAS_CAM_DYCORE
@@ -1080,7 +1982,15 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
       !
       ! Communicate halos for theta_m, scalars, pressure_p, and rtheta_p
       !
+      MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+      call mpas_pool_get_array(state, 'theta_m', theta_m, 1)
+      call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
+      call mpas_pool_get_array(diag, 'pressure_p', pressure_p)
+      call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
+      !$acc update self(theta_m,scalars_1,pressure_p,rtheta_p)
       call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p')
+      !$acc update device(theta_m,scalars_1,pressure_p,rtheta_p)
+      MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
       call mpas_timer_start('atm_rk_integration_setup')
 
@@ -1142,6 +2052,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
                                      tend_ru_physics, tend_rtheta_physics, tend_rho_physics)
       end if
 
+      !$acc enter data copyin(tend_rtheta_physics,tend_rho_physics,tend_ru_physics)
+
 
       DYNAMICS_SUBSTEPS : do dynamics_substep = 1, dynamics_split
 
@@ -1161,8 +2073,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 !$OMP END PARALLEL DO
          call mpas_timer_stop('atm_compute_vert_imp_coefs')
 
+      MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+      call mpas_pool_get_array(diag, 'exner', exner)      
+      !$acc update self(exner)
          call exchange_halo_group(domain, 'dynamics:exner')
-
+      !$acc update device(exner)
+      MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
          !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
          ! BEGIN Runge-Kutta loop 
@@ -1241,7 +2157,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             !***********************************
 
 ! tend_u
+            MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+            call mpas_pool_get_array(tend, 'u', tend_u)
+            !$acc update self(tend_u)
             call exchange_halo_group(domain, 'dynamics:tend_u')
+            !$acc update device(tend_u)
+            MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
    
             call mpas_timer_start('small_step_prep')
    
@@ -1317,7 +2238,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
             do small_step = 1, number_sub_steps(rk_step)
 
+               MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+               call mpas_pool_get_array(diag, 'rho_pp', rho_pp)
+               !$acc update self(rho_pp)
                call exchange_halo_group(domain, 'dynamics:rho_pp')
+               !$acc update device(rho_pp)
+               MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
                call mpas_timer_start('atm_advance_acoustic_step')
 
@@ -1339,8 +2265,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
   
 ! rtheta_pp
 ! This is the only communications needed during the acoustic steps because we solve for u on all edges of owned cells
-
+               MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+               call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp)
+               !$acc update self(rtheta_pp)
                call exchange_halo_group(domain, 'dynamics:rtheta_pp')
+               !$acc update device(rtheta_pp)
+               MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
 !  complete update of horizontal momentum by including 3d divergence damping at the end of the acoustic step
 
@@ -1360,7 +2290,15 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             !
             ! Communicate halos for rw_p[1,2], ru_p[1,2], rho_pp[1,2], rtheta_pp[2]
             !
+            MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+            call mpas_pool_get_array(diag, 'ru_p', ru_p)
+            call mpas_pool_get_array(diag, 'rw_p', rw_p)
+            call mpas_pool_get_array(diag, 'rho_pp', rho_pp)
+            call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp)
+            !$acc update self(rw_p,ru_p,rho_pp,rtheta_pp)
             call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp')
+            !$acc update device(rw_p,ru_p,rho_pp,rtheta_pp)
+            MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             call mpas_timer_start('atm_recover_large_step_variables')
 
@@ -1395,7 +2333,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
                call mpas_atm_get_bdy_state(clock, block, nVertLevels, nEdges, 'u', time_dyn_step, ru_driving_values)
 
                ! do this inline at present - it is simple enough
-               !$acc enter data copyin(u)
                !$acc parallel default(present)
                !$acc loop gang worker
                do iEdge = 1, nEdgesSolve
@@ -1407,12 +2344,10 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
                   end if
                end do
                !$acc end parallel
-               !$acc exit data copyout(u)
 
                call mpas_atm_get_bdy_state(clock, block, nVertLevels, nEdges, 'ru', time_dyn_step, ru_driving_values)
                call mpas_pool_get_array(diag, 'ru', u)
                ! do this inline at present - it is simple enough
-               !$acc enter data copyin(u)
                !$acc parallel default(present)
                !$acc loop gang worker
                do iEdge = 1, nEdges
@@ -1424,7 +2359,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
                   end if
                end do
                !$acc end parallel
-               !$acc exit data copyout(u)
 
                deallocate(ru_driving_values)
 
@@ -1432,12 +2366,17 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
 !-------------------------------------------------------------------
             
+            MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+            call mpas_pool_get_array(state, 'u', u, 2)
+            !$acc update self(u)
             ! u
             if (config_apply_lbcs) then
                call exchange_halo_group(domain, 'dynamics:u_123')
             else
                call exchange_halo_group(domain, 'dynamics:u_3')
             end if
+            !$acc update device(u)
+            MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             ! scalar advection: RK3 scheme of Skamarock and Gassmann (2011). 
             ! PD or monotonicity constraints applied only on the final Runge-Kutta substep.
@@ -1449,7 +2388,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
                if (config_apply_lbcs) then  ! adjust boundary tendencies for regional_MPAS scalar transport
 
+                  MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+                  call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+                  !$acc update self(scalars_2)
                   call exchange_halo_group(domain, 'dynamics:scalars')
+                  !$acc update device(scalars_2)
+                  MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
                   allocate(scalars_driving(num_scalars,nVertLevels,nCells+1))
 
@@ -1501,17 +2445,27 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
             call mpas_timer_stop('atm_compute_solve_diagnostics')
 
+            MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+            call mpas_pool_get_array(state, 'w', w, 2)
+            call mpas_pool_get_array(diag, 'pv_edge', pv_edge)
+            call mpas_pool_get_array(diag, 'rho_edge', rho_edge)            
+            !$acc update self(w,pv_edge,rho_edge)
             if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then
                !
                ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2], scalars[1,2]
                !
+               call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+               !$acc update self(scalars_2)
                call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars')
+               !$acc update device(scalars_2)
             else
                !
                ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2]
                !
                call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge')
             end if
+            !$acc update device(w,pv_edge,rho_edge)
+            MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             ! set the zero-gradient condition on w for regional_MPAS
             
@@ -1524,8 +2478,13 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
               end do
 !$OMP END PARALLEL DO
 
+              MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
               ! w halo values needs resetting after regional boundary update
+              call mpas_pool_get_array(state, 'w', w, 2)
+              !$acc update self(w)
               call exchange_halo_group(domain, 'dynamics:w')
+              !$acc update device(w)
+              MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             end if ! end of regional_MPAS addition 
 
@@ -1536,7 +2495,14 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             !
             ! Communicate halos for theta_m[1,2], pressure_p[1,2], and rtheta_p[1,2]
             !
+            MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+            call mpas_pool_get_array(state, 'theta_m', theta_m, 2)
+            call mpas_pool_get_array(diag, 'pressure_p', pressure_p)
+            call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
+            !$acc update self(theta_m,pressure_p,rtheta_p)
             call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p')
+            !$acc update device(theta_m,pressure_p,rtheta_p)
+            MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             !
             ! Note: A halo exchange for 'exner' here as well as after the call
@@ -1573,6 +2539,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
       deallocate(qtot)  !  we are finished with these now
 
+      !$acc exit data delete(tend_rtheta_physics,tend_rho_physics,tend_ru_physics)
 #ifndef MPAS_CAM_DYCORE
       call mpas_deallocate_scratch_field(tend_rtheta_physicsField)
       call mpas_deallocate_scratch_field(tend_rho_physicsField)
@@ -1600,8 +2567,13 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
             if (config_apply_lbcs) then  ! adjust boundary tendencies for regional_MPAS scalar transport
 
+               MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
                ! need to fill halo for horizontal filter
+               call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+               !$acc update self(scalars_2)
                call exchange_halo_group(domain, 'dynamics:scalars')
+               !$acc update device(scalars_2)
+               MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
    
                allocate(scalars_driving(num_scalars,nVertLevels,nCells+1))
 
@@ -1627,7 +2599,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 !------------------------------------------------------------------------------------------------------------------------
 
             if (rk_step < 3) then
+               MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+               call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+               !$acc update self(scalars_2)
                call exchange_halo_group(domain, 'dynamics:scalars')
+               !$acc update device(scalars_2)
+               MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
             end if
 
          end do RK3_SPLIT_TRANSPORT
@@ -1659,16 +2636,25 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
       !
 
 #ifdef DO_PHYSICS
+      MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
       call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
+      !$acc update self(scalars_1)
       call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+      !$acc update self(scalars_2)
+      MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
 
       if(config_convection_scheme == 'cu_grell_freitas' .or. &
          config_convection_scheme == 'cu_ntiedtke') then
 
+         MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
          call mpas_pool_get_array(tend_physics, 'rqvdynten', rqvdynten)
          call mpas_pool_get_array(state, 'theta_m', theta_m, 2)
+         !$acc update self(theta_m)
 
          call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
+         !$acc update self(rthdynten)
+         MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
+
 
          !NOTE: The calculation of the tendency due to horizontal and vertical advection for the water vapor mixing ratio
          !requires that the subroutine atm_advance_scalars_mono was called on the third Runge Kutta step, so that a halo
@@ -1693,6 +2679,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
       where ( scalars_2(:,:,:) < 0.0)  &
          scalars_2(:,:,:) = 0.0
 
+      MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
+      !$acc update device(scalars_2, rthdynten)
+      MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
       !call microphysics schemes:
       if (trim(config_microp_scheme) /= 'off')  then
         call mpas_timer_start('microphysics')
@@ -1740,7 +2729,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
       if (config_apply_lbcs) then  ! adjust boundary values for regional_MPAS scalar transport
 
+         MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+         call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+         !$acc update self(scalars_2)
          call exchange_halo_group(domain, 'dynamics:scalars')
+         !$acc update device(scalars_2)
+         MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
          allocate(scalars_driving(num_scalars,nVertLevels,nCells+1))
 
@@ -2017,12 +3011,6 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, &
       call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
       call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
 
-      MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]')
-      !$acc enter data create(ru_save, u_2, rw_save, rtheta_p_save, rho_p_save, &
-      !$acc                   w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) &
-      !$acc            copyin(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, &
-      !$acc                   rho_zz_1, scalars_1)
-      MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]')
 
       !$acc kernels
       theta_m_2(:,cellEnd+1) = 0.0_RKIND
@@ -2070,12 +3058,6 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, &
       end do
       !$acc end parallel
 
-      MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]')
-      !$acc exit data copyout(ru_save, rw_save, rtheta_p_save, rho_p_save, u_2, &
-      !$acc                   w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) &
-      !$acc            delete(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, &
-      !$acc                  rho_zz_1, scalars_1)
-      MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]')
 
    end subroutine atm_rk_integration_setup
 
@@ -2126,11 +3108,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, &
       moist_start = moist_start_ptr
       moist_end = moist_end_ptr
 
-      MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]')
-      !$acc enter data create(cqw, cqu) & 
-      !$acc            copyin(scalars)
-      MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]')
-
       !$acc parallel default(present)
       !$acc loop gang worker
 !      do iCell = cellSolveStart,cellSolveEnd
@@ -2179,10 +3156,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, &
       end do
       !$acc end parallel
 
-      MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]')
-      !$acc exit data copyout(cqw, cqu) &
-      !$acc           delete(scalars)
-      MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]')
 
    end subroutine atm_compute_moist_coefficients
 
@@ -2315,9 +3288,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts,
       real (kind=RKIND), dimension( nVertLevels ) :: b_tri, c_tri
 
       MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]')
-      !$acc enter data copyin(cqw, p, t, rb, rtb, rt, pb)
-      !$acc enter data create(cofrz, cofwr, cofwz, coftz, cofwt, a_tri, b_tri, &
-      !$acc                   c_tri, alpha_tri, gamma_tri)
+      !$acc enter data create(b_tri, c_tri)
       MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]')
 
       !  set coefficients
@@ -2399,9 +3370,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts,
       !$acc end parallel
 
       MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]')
-      !$acc exit data copyout(cofrz, cofwr, cofwz, coftz, cofwt, a_tri, b_tri, &
-      !$acc                   c_tri, alpha_tri, gamma_tri)
-      !$acc exit data delete(cqw, p, t, rb, rtb, rt, pb)
+      !$acc exit data copyout(b_tri, c_tri)
       MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]')
 
    end subroutine atm_compute_vert_imp_coefs_work
@@ -2506,9 +3475,6 @@ subroutine atm_set_smlstep_pert_variables_work(nCells, nEdges, &
       integer :: iCell, iEdge, i, k
       real (kind=RKIND) :: flux
 
-      MPAS_ACC_TIMER_START('atm_set_smlstep_pert_variables [ACC_data_xfer]')
-      !$acc enter data copyin(u_tend, w_tend)
-      MPAS_ACC_TIMER_STOP('atm_set_smlstep_pert_variables [ACC_data_xfer]')
 
       ! we solve for omega instead of w (see Klemp et al MWR 2007),
       ! so here we change the w_p tendency to an omega_p tendency
@@ -2541,10 +3507,6 @@ subroutine atm_set_smlstep_pert_variables_work(nCells, nEdges, &
       end do
       !$acc end parallel
 
-      MPAS_ACC_TIMER_START('atm_set_smlstep_pert_variables [ACC_data_xfer]')
-      !$acc exit data delete(u_tend)
-      !$acc exit data copyout(w_tend)
-      MPAS_ACC_TIMER_STOP('atm_set_smlstep_pert_variables [ACC_data_xfer]')
 
    end subroutine atm_set_smlstep_pert_variables_work
 
@@ -2777,17 +3739,6 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart
       resm = (1.0 - epssm) / (1.0 + epssm)
       rdts = 1./dts
 
-      MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]')
-      !$acc enter data copyin(exner,cqu,cofwt,coftz,cofrz,cofwr,cofwz, &
-      !$acc                a_tri,alpha_tri,gamma_tri,rho_zz,theta_m,w, &
-      !$acc                tend_ru,tend_rho,tend_rt,tend_rw,rw,rw_save)
-      !$acc enter data create(rtheta_pp_old)
-      if(small_step == 1) then
-         !$acc enter data create(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p)
-      else
-         !$acc enter data copyin(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p)
-      end if
-      MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]')
 
       if(small_step /= 1) then  !  not needed on first small step
 
@@ -3014,13 +3965,6 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart
       end do !  end of loop over cells
       !$acc end parallel
 
-      MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]')
-      !$acc exit data delete(exner,cqu,cofwt,coftz,cofrz,cofwr,cofwz, &
-      !$acc                a_tri,alpha_tri,gamma_tri,rho_zz,theta_m,w, &
-      !$acc                tend_ru,tend_rho,tend_rt,tend_rw,rw,rw_save)
-      !$acc exit data copyout(rtheta_pp_old,ru_p,ruAvg,rho_pp, &
-      !$acc                   rtheta_pp,wwAvg,rw_p)
-      MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]')
 
    end subroutine atm_advance_acoustic_step_work
 
@@ -3072,9 +4016,6 @@ subroutine atm_divergence_damping_3d( state, diag, mesh, configs, dts, edgeStart
       nCellsSolve = nCellsSolve_ptr
       nVertLevels = nVertLevels_ptr
 
-      MPAS_ACC_TIMER_START('atm_divergence_damping_3d [ACC_data_xfer]')
-      !$acc enter data copyin(ru_p, rtheta_pp, rtheta_pp_old, theta_m)
-      MPAS_ACC_TIMER_STOP('atm_divergence_damping_3d [ACC_data_xfer]')
 
       !$acc parallel default(present)
       !$acc loop gang worker
@@ -3107,10 +4048,6 @@ subroutine atm_divergence_damping_3d( state, diag, mesh, configs, dts, edgeStart
       end do ! end loop over edges
       !$acc end parallel
 
-      MPAS_ACC_TIMER_START('atm_divergence_damping_3d [ACC_data_xfer]')
-      !$acc exit data copyout(ru_p) &
-      !$acc           delete(rtheta_pp, rtheta_pp_old, theta_m)
-      MPAS_ACC_TIMER_STOP('atm_divergence_damping_3d [ACC_data_xfer]')
       
 
    end subroutine atm_divergence_damping_3d
@@ -3301,17 +4238,6 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE
       integer :: i, iCell, iEdge, k, cell1, cell2
       real (kind=RKIND) :: invNs, rcv, p0, flux
 
-      MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]')
-      !$acc enter data copyin(rho_p_save,rho_pp,rho_base,rw_save,rw_p, &
-      !$acc                   rtheta_p_save,rtheta_pp,rtheta_base, &
-      !$acc                   ru_save,ru_p,wwAvg,ruAvg) &
-      !$acc            create(rho_zz,rho_p,rw,w,rtheta_p,theta_m, &
-      !$acc                   ru,u)
-      if (rk_step == 3) then
-         !$acc enter data copyin(rt_diabatic_tend,exner_base) &
-         !$acc            create(exner,pressure_p)
-      end if
-      MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]')
 
       rcv = rgas/(cp-rgas)
       p0 = 1.0e+05  ! this should come from somewhere else...
@@ -3457,17 +4383,6 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE
       end do
       !$acc end parallel
 
-      MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]')
-      !$acc exit data delete(rho_p_save,rho_pp,rho_base,rw_save,rw_p, &
-      !$acc                   rtheta_p_save,rtheta_pp,rtheta_base, &
-      !$acc                   ru_save,ru_p) &
-      !$acc            copyout(rho_zz,rho_p,rw,w,rtheta_p,theta_m, &
-      !$acc                   ru,u,wwAvg,ruAvg)
-      if (rk_step == 3) then
-         !$acc exit data delete(rt_diabatic_tend,exner_base) &
-         !$acc            copyout(exner,pressure_p)
-      end if
-      MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]')
 
    end subroutine atm_recover_large_step_variables_work
 
@@ -3702,10 +4617,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, &
       weight_time_old = 1. - weight_time_new
 
 
-      MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]')
-      !$acc enter data copyin(uhAvg, scalar_new)
-      MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]')
-
       !$acc parallel async
       !$acc loop gang worker private(scalar_weight2, ica)
       do iEdge=edgeStart,edgeEnd
@@ -3800,12 +4711,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, &
       !
 
       MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]')
-#ifndef DO_PHYSICS
-      !$acc enter data create(scalar_tend_save)
-#else
-      !$acc enter data copyin(scalar_tend_save)
-#endif
-      !$acc enter data copyin(scalar_old, fnm, fnp, rdnw, wwAvg, rho_zz_old, rho_zz_new)
       !$acc enter data create(scalar_tend_column)
       MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]')
 
@@ -3888,9 +4793,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, &
       !$acc end parallel
 
       MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]')
-      !$acc exit data copyout(scalar_new)
-      !$acc exit data delete(scalar_tend_column, uhAvg, wwAvg, scalar_old, fnm, fnp, &
-      !$acc                  rdnw, rho_zz_old, rho_zz_new, scalar_tend_save)
+      !$acc exit data delete(scalar_tend_column)
       MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]')
 
    end subroutine atm_advance_scalars_work
@@ -4149,19 +5052,9 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
 
 
       MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-      !$acc data present(nEdgesOnCell, edgesOnCell, edgesOnCell_sign, &
-      !$acc              invAreaCell, cellsOnCell, cellsOnEdge, nAdvCellsForEdge, &
-      !$acc              advCellsForEdge, adv_coefs, adv_coefs_3rd, dvEdge, bdyMaskCell)
-
-#ifdef DO_PHYSICS
-      !$acc enter data copyin(scalar_tend)
-#else
-      !$acc enter data create(scalar_tend)
-#endif
       if (local_advance_density) then
          !$acc enter data copyin(rho_zz_int)
       end if
-      !$acc enter data copyin(scalars_old, rho_zz_old, rdnw, uhAvg, wwAvg)
       MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
       !$acc parallel
@@ -4186,8 +5079,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
       !$acc end parallel
 
       MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-      !$acc exit data copyout(scalar_tend)
-
       !$acc update self(scalars_old)
       MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
@@ -4251,10 +5142,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
       end if
 
       MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-      if (.not. local_advance_density) then
-         !$acc enter data copyin(rho_zz_new)
-      end if
-      !$acc enter data copyin(scalars_new, fnm, fnp)
       !$acc enter data create(scale_arr)
       MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
@@ -4762,14 +5649,8 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
       MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
       if (local_advance_density) then
          !$acc exit data copyout(rho_zz_int)
-      else
-         !$acc exit data delete(rho_zz_new)
       end if
-      !$acc exit data copyout(scalars_new)
-      !$acc exit data delete(scalars_old, scale_arr, rho_zz_old, wwAvg, &
-      !$acc                  uhAvg, fnm, fnp, rdnw)
-
-      !$acc end data
+      !$acc exit data delete(scale_arr)
       MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
    end subroutine atm_advance_scalars_mono_work
@@ -5301,45 +6182,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
 
       MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]')
-      if (perturbation_coriolis) then
-         !$acc enter data copyin(u_init, v_init)
-      end if
       if (les_model_opt /= LES_MODEL_NONE) then
          !$acc enter data copyin(exner, pressure_b, bn2)
       end if
       !$acc enter data copyin(ustm, hfx, qfx)
-      if (rk_step == 1) then
-         !$acc enter data create(tend_w_euler)
-         !$acc enter data create(tend_u_euler)
-         !$acc enter data create(tend_theta_euler)
-         !$acc enter data create(tend_rho)
-
-         !$acc enter data copyin(tend_rho_physics)
-         !$acc enter data copyin(rb, rr_save)
-         !$acc enter data copyin(divergence, vorticity)
-         !$acc enter data copyin(v)
-      else
-         !$acc enter data copyin(tend_w_euler)
-         !$acc enter data copyin(tend_u_euler)
-         !$acc enter data copyin(tend_theta_euler)
-         !$acc enter data copyin(tend_rho)
-      end if
-      !$acc enter data create(tend_u)
-      !$acc enter data copyin(cqu, pp, u, w, pv_edge, rho_edge, ke)
-      !$acc enter data create(h_divergence)
-      !$acc enter data copyin(ru, rw)
       !$acc enter data create(rayleigh_damp_coef)
-      !$acc enter data copyin(tend_ru_physics)
-      !$acc enter data create(tend_w)
-      !$acc enter data copyin(rho_zz)
-      !$acc enter data create(tend_theta)
-      !$acc enter data copyin(theta_m)
-      !$acc enter data copyin(ru_save, theta_m_save)
-      !$acc enter data copyin(cqw)
-      !$acc enter data copyin(tend_rtheta_physics)
-      !$acc enter data copyin(rw_save, rt_diabatic_tend)
-      !$acc enter data create(rthdynten)
-      !$acc enter data copyin(t_init)
       if (les_model_opt /= LES_MODEL_NONE) then
          !$acc enter data copyin(ur_cell, vr_cell)
       else
@@ -5350,8 +6197,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       !$acc enter data create(eddy_visc_horz)
       !$acc enter data create(eddy_visc_vert)
       !$acc enter data create(prandtl_3d_inv)
-      !$acc enter data copyin(scalars)
-      !$acc enter data copyin(tend_scalars)
       MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]')
 
       prandtl_inv = 1.0_RKIND / prandtl
@@ -5986,46 +6831,12 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       !$acc end parallel
 
       MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]')
-      if (perturbation_coriolis) then
-         !$acc exit data delete(u_init, v_init)
-      end if
       if (les_model_opt /= LES_MODEL_NONE) then
          !$acc exit data delete(exner, pressure_b)
          !$acc exit data copyout(bn2)
       end if
       !$acc exit data delete(ustm, hfx, qfx)
-      if (rk_step == 1) then
-         !$acc exit data copyout(tend_w_euler)
-         !$acc exit data copyout(tend_u_euler)
-         !$acc exit data copyout(tend_theta_euler)
-         !$acc exit data copyout(tend_rho)
-
-         !$acc exit data delete(tend_rho_physics)
-         !$acc exit data delete(rb, rr_save)
-         !$acc exit data delete(divergence, vorticity)
-         !$acc exit data delete(v)
-      else
-         !$acc exit data delete(tend_w_euler)
-         !$acc exit data delete(tend_u_euler)
-         !$acc exit data delete(tend_theta_euler)
-         !$acc exit data delete(tend_rho)
-      end if
-      !$acc exit data copyout(tend_u)
-      !$acc exit data delete(cqu, pp, u, w, pv_edge, rho_edge, ke)
-      !$acc exit data copyout(h_divergence)
-      !$acc exit data delete(ru, rw)
       !$acc exit data delete(rayleigh_damp_coef)
-      !$acc exit data delete(tend_ru_physics)
-      !$acc exit data copyout(tend_w)
-      !$acc exit data delete(rho_zz)
-      !$acc exit data copyout(tend_theta)
-      !$acc exit data delete(theta_m)
-      !$acc exit data delete(ru_save, theta_m_save)
-      !$acc exit data delete(cqw)
-      !$acc exit data delete(tend_rtheta_physics)
-      !$acc exit data delete(rw_save, rt_diabatic_tend)
-      !$acc exit data copyout(rthdynten)
-      !$acc exit data delete(t_init)
       if (les_model_opt /= LES_MODEL_NONE) then
          !$acc exit data delete(ur_cell, vr_cell)
       else
@@ -6036,8 +6847,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
       !$acc exit data delete(eddy_visc_horz)
       !$acc exit data delete(eddy_visc_vert)
       !$acc exit data delete(prandtl_3d_inv)
-      !$acc exit data delete(scalars)
-      !$acc exit data copyout(tend_scalars)
       MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]')
 
    end subroutine atm_compute_dyn_tend_work
@@ -6206,26 +7015,10 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
       logical :: reconstruct_v
 
 
-      MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
-      !$acc enter data copyin(cellsOnEdge,dcEdge,dvEdge, &
-      !$acc                   edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, &
-      !$acc                   nEdgesOnCell,edgesOnCell, &
-      !$acc                   edgesOnCell_sign,invAreaCell, &
-      !$acc                   invAreaTriangle,edgesOnVertex, &
-      !$acc                   verticesOnCell,kiteForCell,kiteAreasOnVertex, &
-      !$acc                   nEdgesOnEdge,edgesOnEdge,weightsOnEdge, &
-      !$acc                   fVertex, &
-      !$acc                   verticesOnEdge, &
-      !$acc                   invDvEdge,invDcEdge)
-      !$acc enter data copyin(u,h)
-      MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
 
       !
       ! Compute height on cell edges at velocity locations
       !
-      MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
-      !$acc enter data create(h_edge,vorticity,divergence)
-      MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
       !$acc parallel default(present)
       !$acc loop gang
       do iEdge=edgeStart,edgeEnd
@@ -6310,9 +7103,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
       !
       ! Replace 2.0 with 2 in exponentiation to avoid outside chance that
       ! compiler will actually allow "float raised to float" operation
-      MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
-      !$acc enter data create(ke)
-      MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
       !$acc parallel default(present)
       !$acc loop gang
       do iCell=cellStart,cellEnd
@@ -6407,14 +7197,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
         if(rk_step /= 3) reconstruct_v = .false.
       end if
 
-      MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
-      if (reconstruct_v) then
-         !$acc enter data create(v)
-      else
-         !$acc enter data copyin(v)
-      end if
-      MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
-
       if (reconstruct_v) then
         !$acc parallel default(present)
         !$acc loop gang
@@ -6442,9 +7224,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
       !
       ! Avoid dividing h_vertex by areaTriangle and move areaTriangle into
       ! numerator for the pv_vertex calculation
-      MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
-      !$acc enter data create(pv_vertex)
-      MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
       !$acc parallel default(present)
       !$acc loop collapse(2)
       do iVertex = vertexStart,vertexEnd
@@ -6468,9 +7247,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
       ! Compute pv at the edges
       !   ( this computes pv_edge at all edges bounding real cells )
       !
-      MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
-      !$acc enter data create(pv_edge)
-      MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
       !$acc parallel default(present)
       !$acc loop collapse(2)
       do iEdge = edgeStart,edgeEnd
@@ -6488,9 +7264,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
          !    ( this computes pv_cell for all real cells )
          !  only needed for APVM upwinding
          !
-         MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
-         !$acc enter data create(pv_cell)
-         MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
          !$acc parallel default(present)
          !$acc loop gang
          do iCell=cellStart,cellEnd
@@ -6529,9 +7302,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
          ! Merged loops for calculating gradPVt, gradPVn and pv_edge
          ! Also precomputed inverses of dvEdge and dcEdge to avoid repeated divisions
          !
-         MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
-         !$acc enter data create(gradPVt,gradPVn)
-         MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
          r = config_apvm_upwinding * dt
          !$acc parallel default(present)
          !$acc loop gang
@@ -6548,31 +7318,10 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
          end do
          !$acc end parallel
 
-         MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
-         !$acc exit data delete(pv_cell,gradPVt,gradPVn)
-         MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
 
       end if  ! apvm upwinding
 
 
-      MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
-      !$acc exit data delete(cellsOnEdge,dcEdge,dvEdge, &
-      !$acc                  edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, &
-      !$acc                  nEdgesOnCell,edgesOnCell, &
-      !$acc                  edgesOnCell_sign,invAreaCell, &
-      !$acc                  invAreaTriangle,edgesOnVertex, &
-      !$acc                  verticesOnCell,kiteForCell,kiteAreasOnVertex, &
-      !$acc                  nEdgesOnEdge,edgesOnEdge,weightsOnEdge, &
-      !$acc                  verticesOnEdge, &
-      !$acc                  fVertex,invDvEdge,invDcEdge)
-      !$acc exit data delete(u,h)
-      !$acc exit data copyout(h_edge,vorticity,divergence, &
-      !$acc                   ke, &
-      !$acc                   v, &
-      !$acc                   pv_vertex, &
-      !$acc                   pv_edge)
-      MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
-
    end subroutine atm_compute_solve_diagnostics_work
 
 
@@ -6661,17 +7410,13 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, &
       call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell)
 
       MPAS_ACC_TIMER_START('atm_init_coupled_diagnostics [ACC_data_xfer]')
-      ! copyin invariant fields
-      !$acc enter data copyin(cellsOnEdge,nEdgesOnCell,edgesOnCell, &
-      !$acc                   edgesOnCell_sign,zz,fzm,fzp,zb,zb3, &
-      !$acc                   zb_cell,zb3_cell)
 
       ! copyin the data that is only on the right-hand side
-      !$acc enter data copyin(scalars(index_qv,:,:),u,w,rho,theta, &
+      !$acc enter data copyin(scalars(index_qv,:,:),w,rho,theta, &
       !$acc                           rho_base,theta_base)
 
       ! copyin the data that will be modified in this routine
-      !$acc enter data create(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, &
+      !$acc enter data create(theta_m,ru,rw,rho_p,rtheta_base, &
       !$acc                   rtheta_p,exner,exner_base,pressure_p, &
       !$acc                   pressure_base)
       MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]')
@@ -6795,17 +7540,12 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, &
       !$acc end parallel
 
       MPAS_ACC_TIMER_START('atm_init_coupled_diagnostics [ACC_data_xfer]')
-      ! delete invariant fields
-      !$acc exit data delete(cellsOnEdge,nEdgesOnCell,edgesOnCell, &
-      !$acc                  edgesOnCell_sign,zz,fzm,fzp,zb,zb3, &
-      !$acc                  zb_cell,zb3_cell)
-
       ! delete the data that is only on the right-hand side
-      !$acc exit data delete(scalars(index_qv,:,:),u,w,rho,theta, &
+      !$acc exit data delete(scalars(index_qv,:,:),w,rho,theta, &
       !$acc                           rho_base,theta_base)
 
       ! copyout the data that will be modified in this routine
-      !$acc exit data copyout(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, &
+      !$acc exit data copyout(theta_m,ru,rw,rho_p,rtheta_base, &
       !$acc                   rtheta_p,exner,exner_base,pressure_p, &
       !$acc                   pressure_base)
       MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]')
@@ -6872,13 +7612,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su
       call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1)
       call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2)
 
-      MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]')
-      !$acc enter data create(ru_save, u_1, rtheta_p_save, theta_m_1, rho_p_save, rw_save, &
-      !$acc                   w_1, rho_zz_1) &
-      !$acc            copyin(ru, u_2, rtheta_p, rho_p, theta_m_2, rho_zz_2, rw, &
-      !$acc                   w_2, ruAvg, wwAvg, ruAvg_split, wwAvg_split, rho_zz_old_split)
-      MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]')
-
       ! Interim fix for the atm_compute_dyn_tend_work subroutine accessing uninitialized values
       ! in garbage cells of theta_m
       !$acc kernels
@@ -6983,13 +7716,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su
          !$acc end parallel
       end if
 
-      MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]')
-      !$acc exit data copyout(ru_save, u_1, rtheta_p_save, rho_p_save, rw_save, &
-      !$acc                   w_1, theta_m_1, rho_zz_1, ruAvg, wwAvg, ruAvg_split, &
-      !$acc                   wwAvg_split) &
-      !$acc            delete(ru, u_2, rtheta_p, rho_p, theta_m_2, rho_zz_2, rw, &
-      !$acc                  w_2, rho_zz_old_split)
-      MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]')
 
    end subroutine atm_rk_dynamics_substep_finish
 
@@ -7044,9 +7770,6 @@ subroutine atm_zero_gradient_w_bdy_work( w, bdyMaskCell, nearestRelaxationCell,
 
       integer :: iCell, k
 
-      MPAS_ACC_TIMER_START('atm_zero_gradient_w_bdy_work [ACC_data_xfer]')
-      !$acc enter data copyin(w)
-      MPAS_ACC_TIMER_STOP('atm_zero_gradient_w_bdy_work [ACC_data_xfer]')
 
       !$acc parallel default(present)
       !$acc loop gang worker
@@ -7062,9 +7785,6 @@ subroutine atm_zero_gradient_w_bdy_work( w, bdyMaskCell, nearestRelaxationCell,
       end do
       !$acc end parallel
 
-      MPAS_ACC_TIMER_START('atm_zero_gradient_w_bdy_work [ACC_data_xfer]')
-      !$acc exit data copyout(w)
-      MPAS_ACC_TIMER_STOP('atm_zero_gradient_w_bdy_work [ACC_data_xfer]')
 
 
    end subroutine atm_zero_gradient_w_bdy_work
@@ -7105,11 +7825,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel
       call mpas_pool_get_array(mesh, 'bdyMaskEdge', bdyMaskEdge)
       call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend)
 
-      MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]')
-      !$acc enter data copyin(tend_ru,tend_rho,tend_rt,tend_rw, &
-      !$acc                   rt_diabatic_tend)
-      MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]')
-
       !$acc parallel default(present)
       !$acc loop gang worker
       do iCell = cellSolveStart, cellSolveEnd
@@ -7136,11 +7851,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel
          end if
       end do
       !$acc end parallel
-
-      MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]')
-      !$acc exit data copyout(tend_ru,tend_rho,tend_rt, &
-      !$acc                   tend_rw,rt_diabatic_tend)
-      MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]')
       
     end subroutine atm_bdy_adjust_dynamics_speczone_tend
 
@@ -7227,7 +7937,6 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me
       vertexDegree = vertexDegree_ptr
 
       MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]')
-      !$acc enter data copyin(tend_rho, tend_rt, rho_zz, theta_m, tend_ru, ru)
       !$acc enter data create(divergence1, divergence2, vorticity1, vorticity2)
       MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]')
       
@@ -7375,9 +8084,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me
        !$acc end parallel
 
        MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]')
-      !$acc exit data copyout(tend_rho, tend_rt, tend_ru)
-      !$acc exit data delete(rho_zz, theta_m, ru, &
-      !$acc            divergence1, divergence2, vorticity1, vorticity2)
+      !$acc exit data delete(divergence1, divergence2, vorticity1, vorticity2)
       MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]')
        
    end subroutine atm_bdy_adjust_dynamics_relaxzone_tend
@@ -7412,10 +8119,6 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, &
       call mpas_pool_get_array(state, 'theta_m', theta_m, 2)
       call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
       call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base)
-
-      MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]')
-      !$acc enter data copyin(rtheta_base, theta_m, rtheta_p)
-      MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]')
       
       !$acc parallel default(present)
       !$acc loop gang worker
@@ -7430,11 +8133,6 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, &
       end do
       !$acc end parallel
 
-      MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]')
-      !$acc exit data copyout(theta_m, rtheta_p) &
-      !$acc            delete(rtheta_base)
-      MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]')
-
    end subroutine atm_bdy_reset_speczone_values
 
 !-------------------------------------------------------------------------
@@ -7524,8 +8222,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk,
 
       !---
       MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]')
-      !$acc enter data create(scalars_tmp) &
-      !$acc            copyin(scalars_new)
+      !$acc enter data create(scalars_tmp)
       MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]')
 
       !$acc parallel default(present)
@@ -7609,8 +8306,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk,
       !$acc end parallel
 
       MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]')
-      !$acc exit data delete(scalars_tmp) &
-      !$acc           copyout(scalars_new)
+      !$acc exit data delete(scalars_tmp)
       MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]')
 
    end subroutine atm_bdy_adjust_scalars_work
@@ -7681,10 +8377,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, &
 
       !---
 
-      MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]')
-      !$acc enter data copyin(scalars_new)
-      MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]')
-
       !$acc parallel default(present)
       !$acc loop gang worker
       do iCell = cellSolveStart, cellSolveEnd ! threaded over cells
@@ -7705,10 +8397,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, &
 
       end do  ! updates now in temp storage
       !$acc end parallel
-
-      MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]')
-      !$acc exit data copyout(scalars_new)
-      MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]')
             
    end subroutine atm_bdy_set_scalars_work
 
@@ -7778,16 +8466,6 @@ subroutine summarize_timestep(domain)
       nVertLevels = nVertLevels_ptr
       num_scalars = num_scalars_ptr
 
-      MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]')
-      if (config_print_detailed_minmax_vel) then
-         !$acc enter data copyin(w,u,v)
-      else if (config_print_global_minmax_vel) then
-         !$acc enter data copyin(w,u)
-      end if
-      if (config_print_global_minmax_sca) then
-         !$acc enter data copyin(scalars)
-      end if
-      MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]')
 
       if (config_print_detailed_minmax_vel) then
          call mpas_log_write('')
@@ -8146,17 +8824,6 @@ subroutine summarize_timestep(domain)
 
       end if
 
-      MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]')
-      if (config_print_detailed_minmax_vel) then
-         !$acc exit data delete(w,u,v)
-      else if (config_print_global_minmax_vel) then
-         !$acc exit data delete(w,u)
-      end if
-      if (config_print_global_minmax_sca) then
-         !$acc exit data delete(scalars)
-      end if
-      MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]')
-
    end subroutine summarize_timestep
 
 end module atm_time_integration
diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F
index f7d04a1f0c..d1b9931c6c 100644
--- a/src/core_atmosphere/mpas_atm_core.F
+++ b/src/core_atmosphere/mpas_atm_core.F
@@ -43,7 +43,8 @@ function atm_core_init(domain, startTimeStamp) result(ierr)
       use mpas_atm_dimensions, only : mpas_atm_set_dims
       use mpas_atm_diagnostics_manager, only : mpas_atm_diag_setup
       use mpas_atm_threading, only : mpas_atm_threading_init
-      use atm_time_integration, only : mpas_atm_dynamics_init
+      use atm_time_integration, only : mpas_atm_dynamics_init, &
+                              mpas_atm_pre_dynamics_h2d, mpas_atm_post_dynamics_d2h
       use mpas_timer, only : mpas_timer_start, mpas_timer_stop
       use mpas_attlist, only : mpas_modify_att
       use mpas_string_utils, only : mpas_string_replace
@@ -509,6 +510,7 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt)
       call mpas_pool_get_dimension(block % dimensions, 'edgeSolveThreadStart', edgeSolveThreadStart)
       call mpas_pool_get_dimension(block % dimensions, 'edgeSolveThreadEnd', edgeSolveThreadEnd)
 
+      call mpas_atm_pre_computesolvediag_h2d(block)
 !$OMP PARALLEL DO
       do thread=1,nThreads
          if (.not. config_do_restart .or. (config_do_restart .and. config_do_DAcycling)) then
@@ -527,6 +529,7 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt)
                                             edgeThreadStart(thread), edgeThreadEnd(thread))
       end do
 !$OMP END PARALLEL DO
+      call mpas_atm_post_computesolvediag_d2h(block)
 
       deallocate(ke_vertex)
       deallocate(ke_edge)
diff --git a/src/core_atmosphere/physics/mpas_atmphys_interface.F b/src/core_atmosphere/physics/mpas_atmphys_interface.F
index 71e46dfcd2..afd8ed2810 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_interface.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_interface.F
@@ -12,6 +12,16 @@ module mpas_atmphys_interface
 
  use mpas_atmphys_constants
  use mpas_atmphys_vars
+ use mpas_timer
+
+#ifdef MPAS_OPENACC
+#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
+#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
+#else
+#define MPAS_ACC_TIMER_START(X)
+#define MPAS_ACC_TIMER_STOP(X)
+#endif
+
 
  implicit none
  private
@@ -588,6 +598,7 @@ subroutine microphysics_from_MPAS(configs,mesh,state,time_lev,diag,diag_physics,
  call mpas_pool_get_array(mesh,'zgrid',zgrid)
  call mpas_pool_get_array(mesh,'zz'   ,zz   )
 
+ MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
  call mpas_pool_get_array(diag,'exner'        ,exner     )
  call mpas_pool_get_array(diag,'pressure_base',pressure_b)
  call mpas_pool_get_array(diag,'pressure_p'   ,pressure_p)
@@ -595,11 +606,14 @@ subroutine microphysics_from_MPAS(configs,mesh,state,time_lev,diag,diag_physics,
  call mpas_pool_get_array(state,'rho_zz' ,rho_zz ,time_lev)
  call mpas_pool_get_array(state,'theta_m',theta_m,time_lev)
  call mpas_pool_get_array(state,'w'      ,w      ,time_lev)
+ !$acc update host(exner, pressure_b, pressure_p, rho_zz, theta_m, w)
 
  call mpas_pool_get_dimension(state,'index_qv',index_qv)
  call mpas_pool_get_dimension(state,'index_qc',index_qc)
  call mpas_pool_get_dimension(state,'index_qr',index_qr)
  call mpas_pool_get_array(state,'scalars',scalars,time_lev)
+ !$acc update host(scalars)
+ MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
  qv => scalars(index_qv,:,:)
  qc => scalars(index_qc,:,:)
  qr => scalars(index_qr,:,:)
@@ -1040,6 +1054,12 @@ subroutine microphysics_to_MPAS(configs,mesh,state,time_lev,diag,diag_physics,te
     case default
  end select mp_tend_select
 
+ MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
+ !$acc update device(exner, exner_b, pressure_b, pressure_p, rtheta_b)
+ !$acc update device(rtheta_p, rho_zz, theta_m, scalars)
+ !$acc update device(rt_diabatic_tend)
+ MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
+
  end subroutine microphysics_to_MPAS
 
 !=================================================================================================================
diff --git a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F
index 284b072851..290cc56330 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F
@@ -13,11 +13,20 @@ module mpas_atmphys_todynamics
  use mpas_atm_dimensions
 
  use mpas_atmphys_constants, only: R_d,R_v,degrad
+ use mpas_timer
 
  implicit none
  private
  public:: physics_get_tend
 
+#ifdef MPAS_OPENACC
+#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
+#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
+#else
+#define MPAS_ACC_TIMER_START(X)
+#define MPAS_ACC_TIMER_STOP(X)
+#endif
+
 
 !Interface between the physics parameterizations and the non-hydrostatic dynamical core.
 !Laura D. Fowler (send comments to laura@ucar.edu).
@@ -127,12 +136,14 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s
  call mpas_pool_get_config(configs,'config_radt_lw_scheme'   ,radt_lw_scheme   )
  call mpas_pool_get_config(configs,'config_radt_sw_scheme'   ,radt_sw_scheme   )
 
+ MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
  call mpas_pool_get_array(state,'theta_m' ,theta_m,1)
  call mpas_pool_get_array(state,'scalars' ,scalars,1)
  call mpas_pool_get_array(state,'rho_zz'  ,mass,2   )
  call mpas_pool_get_array(diag ,'rho_edge',mass_edge)
  call mpas_pool_get_array(diag ,'tend_u_phys',tend_u_phys)
 
+ !$acc update self(theta_m, scalars, mass, mass_edge) 
  call mpas_pool_get_dimension(state,'index_qv',index_qv)
  call mpas_pool_get_dimension(state,'index_qc',index_qc)
  call mpas_pool_get_dimension(state,'index_qr',index_qr)
@@ -170,6 +181,8 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s
  call mpas_pool_get_array(tend_physics,'rthratensw',rthratensw)
 
  call mpas_pool_get_array(tend,'scalars_tend',tend_scalars)
+!$acc update self(tend_scalars) ! Probably not needed
+MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
 
 
 !initialize the tendency for the potential temperature and all scalars due to PBL, convection,
@@ -219,6 +232,10 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s
               tend_th,tend_rtheta_physics,tend_scalars,tend_ru_physics,tend_u_phys,      &
               exchange_halo_group)
 
+MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
+!$acc update device(tend_scalars)
+MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
+
 !clean up any pointers that were allocated with zero size before the call to physics_get_tend_work:
  if(size(rucuten) == 0 ) deallocate(rucuten )
  if(size(rvcuten) == 0 ) deallocate(rvcuten )

From a175fc7c3bee6502b5d099983ec5baa4de415f13 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Thu, 22 May 2025 13:56:08 -0600
Subject: [PATCH 112/146] Fixing bug associated with rho_zz_2 not being copied
 out at the end of dynamics

---
 src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 3f046bf758..70c2c5ada3 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -1301,7 +1301,7 @@ subroutine mpas_atm_post_dynamics_d2h(domain)
       call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1)
       !$acc exit data copyout(rho_zz_1)
       call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2)
-      !$acc exit data delete(rho_zz_2)
+      !$acc exit data copyout(rho_zz_2)
       call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
       !$acc exit data copyout(scalars_1)
       call mpas_pool_get_array(state, 'scalars', scalars_2, 2)

From 78f19b0eda8c3c02bbfdb4baa35c9ced55eb7f6f Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Fri, 13 Jun 2025 18:19:15 -0600
Subject: [PATCH 113/146] Moving some OpenACC data movements to subroutines

---
 src/core_atmosphere/dynamics/mpas_atm_iau.F   | 57 ++++++++---
 .../dynamics/mpas_atm_time_integration.F      |  7 +-
 .../physics/mpas_atmphys_interface.F          | 96 ++++++++++++++++---
 .../physics/mpas_atmphys_todynamics.F         | 81 ++++++++++++----
 4 files changed, 194 insertions(+), 47 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_iau.F b/src/core_atmosphere/dynamics/mpas_atm_iau.F
index b380e3c0e8..d5999e18c7 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_iau.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_iau.F
@@ -5,6 +5,15 @@
 ! Additional copyright and license information can be found in the LICENSE file
 ! distributed with this code, or at http://mpas-dev.github.com/license.html
 !
+
+ #ifdef MPAS_OPENACC
+ #define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
+ #define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
+ #else
+ #define MPAS_ACC_TIMER_START(X)
+ #define MPAS_ACC_TIMER_STOP(X)
+ #endif
+
 module mpas_atm_iau
 
  use mpas_derived_types
@@ -15,17 +24,7 @@ module mpas_atm_iau
  use mpas_log, only : mpas_log_write
  use mpas_timer
 
- !public :: atm_compute_iau_coef, atm_add_tend_anal_incr  
-      
-
- #ifdef MPAS_OPENACC
- #define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
- #define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
- #else
- #define MPAS_ACC_TIMER_START(X)
- #define MPAS_ACC_TIMER_STOP(X)
- #endif
- 
+ !public :: atm_compute_iau_coef, atm_add_tend_anal_incr    
 
  contains
 
@@ -87,6 +86,39 @@ real (kind=RKIND) function atm_iau_coef(configs, itimestep, dt) result(wgt_iau)
       end if  
 
  end function atm_iau_coef
+
+!==================================================================================================
+ subroutine update_d2h_pre_add_tend_anal_incr(configs,structs)
+!==================================================================================================
+  
+      implicit none
+
+      type (mpas_pool_type), intent(in) :: configs
+      type (mpas_pool_type), intent(inout) :: structs
+
+      type (mpas_pool_type), pointer :: tend
+      type (mpas_pool_type), pointer :: state
+      type (mpas_pool_type), pointer :: diag
+          
+      real (kind=RKIND), dimension(:,:), pointer :: rho_edge, rho_zz, theta_m
+      real(kind=RKIND),dimension(:,:,:), pointer :: scalars, tend_scalars
+
+      call mpas_pool_get_subpool(structs, 'tend', tend)
+      call mpas_pool_get_subpool(structs, 'state', state)
+      call mpas_pool_get_subpool(structs, 'diag', diag)
+
+      MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
+      call mpas_pool_get_array(state, 'theta_m', theta_m, 1)
+      call mpas_pool_get_array(state, 'scalars', scalars, 1)
+      call mpas_pool_get_array(state, 'rho_zz',   rho_zz, 2) 
+      call mpas_pool_get_array(diag , 'rho_edge', rho_edge)
+      !$acc update self(theta_m, scalars, rho_zz, rho_edge)
+
+      call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars)
+      !$acc update self(tend_scalars)
+      MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
+
+ end subroutine update_d2h_pre_add_tend_anal_incr
       
 !==================================================================================================
  subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, tend_rtheta, tend_rho)
@@ -148,7 +180,6 @@ subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, ten
       call mpas_pool_get_array(state, 'scalars', scalars, 1)
       call mpas_pool_get_array(state, 'rho_zz',   rho_zz, 2) 
       call mpas_pool_get_array(diag , 'rho_edge', rho_edge)
-      !$acc update self(theta_m, scalars, rho_zz, rho_edge)
       
       call mpas_pool_get_dimension(state, 'moist_start', moist_start)
       call mpas_pool_get_dimension(state, 'moist_end', moist_end)
@@ -161,8 +192,6 @@ subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, ten
 !      call mpas_pool_get_array(tend, 'rho_zz',  tend_rho)
 !      call mpas_pool_get_array(tend, 'theta_m', tend_theta)
       call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars)
-      !$acc update self(tend_scalars)
-      MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
 
       call mpas_pool_get_array(tend_iau, 'theta',     theta_amb)
       call mpas_pool_get_array(tend_iau, 'rho',         rho_amb)
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 70c2c5ada3..92cbb677b1 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -28,6 +28,7 @@ module atm_time_integration
 
 #ifdef DO_PHYSICS
    use mpas_atmphys_driver_microphysics
+   use mpas_atmphys_interface, only: update_d2h_pre_microphysics, update_h2d_post_microphysics
    use mpas_atmphys_todynamics
    use mpas_atmphys_utilities
 #endif
@@ -2025,6 +2026,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
       call mpas_timer_stop('atm_compute_moist_coefficients')
 
 #ifdef DO_PHYSICS
+      call update_d2h_pre_physics_get_tend(block % configs, state, diag, tend)
       call mpas_timer_start('physics_get_tend')
       rk_step = 1
       dynamics_substep = 1
@@ -2033,6 +2035,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
                              tend_ru_physics, tend_rtheta_physics, tend_rho_physics, &
                              exchange_halo_group )
       call mpas_timer_stop('physics_get_tend')
+      call update_h2d_post_physics_get_tend(block % configs, state, diag, tend)
 #else
 #ifndef MPAS_CAM_DYCORE
       !
@@ -2048,6 +2051,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
       ! IAU - Incremental Analysis Update
       !
       if (trim(config_IAU_option) /= 'off') then
+         call update_d2h_pre_add_tend_anal_incr(block % configs, block % structs)
          call atm_add_tend_anal_incr(block % configs, block % structs, itimestep, dt, &
                                      tend_ru_physics, tend_rtheta_physics, tend_rho_physics)
       end if
@@ -2655,7 +2659,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
          !$acc update self(rthdynten)
          MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
 
-
          !NOTE: The calculation of the tendency due to horizontal and vertical advection for the water vapor mixing ratio
          !requires that the subroutine atm_advance_scalars_mono was called on the third Runge Kutta step, so that a halo
          !update for the scalars at time_levs(1) is applied. A halo update for the scalars at time_levs(2) is done above. 
@@ -2684,6 +2687,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
       MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
       !call microphysics schemes:
       if (trim(config_microp_scheme) /= 'off')  then
+        call update_d2h_pre_microphysics( block % configs, state, diag, 2)
         call mpas_timer_start('microphysics')
 !$OMP PARALLEL DO
         do thread=1,nThreads
@@ -2692,6 +2696,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
         end do
 !$OMP END PARALLEL DO
         call mpas_timer_stop('microphysics')
+        call update_h2d_post_microphysics( block % configs, state, diag, tend, 2)
       end if
 
       !
diff --git a/src/core_atmosphere/physics/mpas_atmphys_interface.F b/src/core_atmosphere/physics/mpas_atmphys_interface.F
index afd8ed2810..4e65cffd7a 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_interface.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_interface.F
@@ -6,13 +6,6 @@
 ! distributed with this code, or at http://mpas-dev.github.com/license.html
 !
 !=================================================================================================================
- module mpas_atmphys_interface
- use mpas_kind_types
- use mpas_pool_routines
-
- use mpas_atmphys_constants
- use mpas_atmphys_vars
- use mpas_timer
 
 #ifdef MPAS_OPENACC
 #define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
@@ -22,6 +15,13 @@ module mpas_atmphys_interface
 #define MPAS_ACC_TIMER_STOP(X)
 #endif
 
+ module mpas_atmphys_interface
+ use mpas_kind_types
+ use mpas_pool_routines
+
+ use mpas_atmphys_constants
+ use mpas_atmphys_vars
+ use mpas_timer
 
  implicit none
  private
@@ -555,6 +555,40 @@ subroutine MPAS_to_physics(configs,mesh,state,time_lev,diag,diag_physics,its,ite
 
  end subroutine MPAS_to_physics
 
+!=================================================================================================================
+ subroutine update_d2h_pre_microphysics(configs,state,diag,time_lev)
+!=================================================================================================================
+
+!input variables:
+ type(mpas_pool_type),intent(in):: configs
+ type(mpas_pool_type),intent(in):: state
+ type(mpas_pool_type),intent(in):: diag
+
+ integer:: time_lev
+
+!local pointers:
+ real(kind=RKIND),dimension(:,:),pointer  :: exner,pressure_b,w
+ real(kind=RKIND),dimension(:,:),pointer  :: rho_zz,theta_m,pressure_p
+ real(kind=RKIND),dimension(:,:,:),pointer:: scalars
+
+
+ MPAS_ACC_TIMER_START('update_d2h_pre_microphysics [ACC_data_xfer]')
+ call mpas_pool_get_array(diag,'exner'        ,exner     )
+ call mpas_pool_get_array(diag,'pressure_base',pressure_b)
+ call mpas_pool_get_array(diag,'pressure_p'   ,pressure_p)
+
+ call mpas_pool_get_array(state,'rho_zz' ,rho_zz ,time_lev)
+ call mpas_pool_get_array(state,'theta_m',theta_m,time_lev)
+ call mpas_pool_get_array(state,'w'      ,w      ,time_lev)
+ !$acc update host(exner, pressure_b, pressure_p, rho_zz, theta_m, w)
+
+ call mpas_pool_get_array(state,'scalars',scalars,time_lev)
+ !$acc update host(scalars)
+
+ MPAS_ACC_TIMER_STOP('update_d2h_pre_microphysics [ACC_data_xfer]')
+
+end subroutine update_d2h_pre_microphysics
+
 !=================================================================================================================
  subroutine microphysics_from_MPAS(configs,mesh,state,time_lev,diag,diag_physics,tend_physics,its,ite)
 !=================================================================================================================
@@ -598,7 +632,6 @@ subroutine microphysics_from_MPAS(configs,mesh,state,time_lev,diag,diag_physics,
  call mpas_pool_get_array(mesh,'zgrid',zgrid)
  call mpas_pool_get_array(mesh,'zz'   ,zz   )
 
- MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
  call mpas_pool_get_array(diag,'exner'        ,exner     )
  call mpas_pool_get_array(diag,'pressure_base',pressure_b)
  call mpas_pool_get_array(diag,'pressure_p'   ,pressure_p)
@@ -606,14 +639,11 @@ subroutine microphysics_from_MPAS(configs,mesh,state,time_lev,diag,diag_physics,
  call mpas_pool_get_array(state,'rho_zz' ,rho_zz ,time_lev)
  call mpas_pool_get_array(state,'theta_m',theta_m,time_lev)
  call mpas_pool_get_array(state,'w'      ,w      ,time_lev)
- !$acc update host(exner, pressure_b, pressure_p, rho_zz, theta_m, w)
 
  call mpas_pool_get_dimension(state,'index_qv',index_qv)
  call mpas_pool_get_dimension(state,'index_qc',index_qc)
  call mpas_pool_get_dimension(state,'index_qr',index_qr)
- call mpas_pool_get_array(state,'scalars',scalars,time_lev)
- !$acc update host(scalars)
- MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
+ call mpas_pool_get_array(state,'scalars',scalars,time_lev) 
  qv => scalars(index_qv,:,:)
  qc => scalars(index_qc,:,:)
  qr => scalars(index_qr,:,:)
@@ -1054,13 +1084,49 @@ subroutine microphysics_to_MPAS(configs,mesh,state,time_lev,diag,diag_physics,te
     case default
  end select mp_tend_select
 
- MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
+ end subroutine microphysics_to_MPAS
+
+ !=================================================================================================================
+ subroutine update_h2d_post_microphysics(configs,state,diag,tend,time_lev)
+!=================================================================================================================
+
+!input variables:
+ type(mpas_pool_type),intent(in):: configs
+ type(mpas_pool_type),intent(in):: state
+ type(mpas_pool_type),intent(in):: diag
+ type(mpas_pool_type),intent(inout):: tend
+
+
+ integer:: time_lev
+
+!local pointers:
+ real(kind=RKIND),dimension(:,:),pointer  :: exner,exner_b,pressure_b,rtheta_p,rtheta_b
+ real(kind=RKIND),dimension(:,:),pointer  :: rho_zz,theta_m,pressure_p
+ real(kind=RKIND),dimension(:,:,:),pointer:: scalars
+ real(kind=RKIND),dimension(:,:),pointer  :: rt_diabatic_tend
+
+ call mpas_pool_get_array(diag,'exner'        ,exner     )
+ call mpas_pool_get_array(diag,'exner_base'   ,exner_b   )
+ call mpas_pool_get_array(diag,'pressure_base',pressure_b)
+ call mpas_pool_get_array(diag,'pressure_p'   ,pressure_p)
+ call mpas_pool_get_array(diag,'rtheta_base'  ,rtheta_b  )
+ call mpas_pool_get_array(diag,'rtheta_p'     ,rtheta_p  )
+
+ call mpas_pool_get_array(state,'rho_zz' ,rho_zz ,time_lev)
+ call mpas_pool_get_array(state,'theta_m',theta_m,time_lev)
+
+ call mpas_pool_get_array(state,'scalars',scalars,time_lev)
+
+ call mpas_pool_get_array(tend,'rt_diabatic_tend',rt_diabatic_tend)
+
+
+ MPAS_ACC_TIMER_START('update_h2d_post_microphysics [ACC_data_xfer]')
  !$acc update device(exner, exner_b, pressure_b, pressure_p, rtheta_b)
  !$acc update device(rtheta_p, rho_zz, theta_m, scalars)
  !$acc update device(rt_diabatic_tend)
- MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
+ MPAS_ACC_TIMER_STOP('update_h2d_post_microphysics [ACC_data_xfer]')
 
- end subroutine microphysics_to_MPAS
+end subroutine update_h2d_post_microphysics
 
 !=================================================================================================================
  end module mpas_atmphys_interface
diff --git a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F
index 290cc56330..2cb94a7ba5 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F
@@ -6,6 +6,15 @@
 ! distributed with this code, or at http://mpas-dev.github.com/license.html
 !
 !=================================================================================================================
+
+#ifdef MPAS_OPENACC
+#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
+#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
+#else
+#define MPAS_ACC_TIMER_START(X)
+#define MPAS_ACC_TIMER_STOP(X)
+#endif
+
  module mpas_atmphys_todynamics
  use mpas_kind_types
  use mpas_pool_routines
@@ -17,15 +26,7 @@ module mpas_atmphys_todynamics
 
  implicit none
  private
- public:: physics_get_tend
-
-#ifdef MPAS_OPENACC
-#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
-#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
-#else
-#define MPAS_ACC_TIMER_START(X)
-#define MPAS_ACC_TIMER_STOP(X)
-#endif
+ public:: physics_get_tend, update_d2h_pre_physics_get_tend, update_h2d_post_physics_get_tend
 
 
 !Interface between the physics parameterizations and the non-hydrostatic dynamical core.
@@ -69,6 +70,40 @@ end subroutine halo_exchange_routine
 
  contains
 
+
+!=================================================================================================================
+ subroutine update_d2h_pre_physics_get_tend(configs,state,diag,tend)
+!=================================================================================================================
+
+!input variables:
+ type(mpas_pool_type),intent(in):: configs
+ type(mpas_pool_type),intent(in):: state
+ type(mpas_pool_type),intent(in):: diag
+ type(mpas_pool_type),intent(in):: tend
+
+!local variables:
+ real(kind=RKIND),dimension(:,:),pointer:: mass          ! time level 2 rho_zz
+ real(kind=RKIND),dimension(:,:),pointer:: mass_edge     ! diag rho_edge
+ real(kind=RKIND),dimension(:,:),pointer:: theta_m       ! time level 1
+ real(kind=RKIND),dimension(:,:,:),pointer:: scalars
+ 
+ real(kind=RKIND),dimension(:,:),pointer:: tend_u_phys
+ real(kind=RKIND),dimension(:,:,:),pointer:: tend_scalars
+
+ MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
+ call mpas_pool_get_array(state,'theta_m' ,theta_m,1)
+ call mpas_pool_get_array(state,'scalars' ,scalars,1)
+ call mpas_pool_get_array(state,'rho_zz'  ,mass,2   )
+ call mpas_pool_get_array(diag ,'rho_edge',mass_edge)
+ call mpas_pool_get_array(diag ,'tend_u_phys',tend_u_phys)
+
+ !$acc update self(theta_m, scalars, mass, mass_edge)
+ 
+ call mpas_pool_get_array(tend,'scalars_tend',tend_scalars)
+ !$acc update self(tend_scalars) ! Probably not needed
+ MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
+
+ end subroutine update_d2h_pre_physics_get_tend
  
 !=================================================================================================================
  subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_step,dynamics_substep, &
@@ -136,14 +171,12 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s
  call mpas_pool_get_config(configs,'config_radt_lw_scheme'   ,radt_lw_scheme   )
  call mpas_pool_get_config(configs,'config_radt_sw_scheme'   ,radt_sw_scheme   )
 
- MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
  call mpas_pool_get_array(state,'theta_m' ,theta_m,1)
  call mpas_pool_get_array(state,'scalars' ,scalars,1)
  call mpas_pool_get_array(state,'rho_zz'  ,mass,2   )
  call mpas_pool_get_array(diag ,'rho_edge',mass_edge)
  call mpas_pool_get_array(diag ,'tend_u_phys',tend_u_phys)
 
- !$acc update self(theta_m, scalars, mass, mass_edge) 
  call mpas_pool_get_dimension(state,'index_qv',index_qv)
  call mpas_pool_get_dimension(state,'index_qc',index_qc)
  call mpas_pool_get_dimension(state,'index_qr',index_qr)
@@ -181,8 +214,6 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s
  call mpas_pool_get_array(tend_physics,'rthratensw',rthratensw)
 
  call mpas_pool_get_array(tend,'scalars_tend',tend_scalars)
-!$acc update self(tend_scalars) ! Probably not needed
-MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
 
 
 !initialize the tendency for the potential temperature and all scalars due to PBL, convection,
@@ -232,10 +263,6 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s
               tend_th,tend_rtheta_physics,tend_scalars,tend_ru_physics,tend_u_phys,      &
               exchange_halo_group)
 
-MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
-!$acc update device(tend_scalars)
-MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
-
 !clean up any pointers that were allocated with zero size before the call to physics_get_tend_work:
  if(size(rucuten) == 0 ) deallocate(rucuten )
  if(size(rvcuten) == 0 ) deallocate(rvcuten )
@@ -262,6 +289,26 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s
 
  end subroutine physics_get_tend
 
+ !=================================================================================================================
+ subroutine update_h2d_post_physics_get_tend(configs,state,diag,tend)
+!=================================================================================================================
+
+!input variables:
+ type(mpas_pool_type),intent(in):: configs
+ type(mpas_pool_type),intent(in):: state
+ type(mpas_pool_type),intent(in):: diag
+ type(mpas_pool_type),intent(in):: tend
+
+!local variables:
+ real(kind=RKIND),dimension(:,:,:),pointer:: tend_scalars
+
+ MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
+ call mpas_pool_get_array(tend,'scalars_tend',tend_scalars)
+ !$acc update device(tend_scalars)
+ MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
+
+ end subroutine update_h2d_post_physics_get_tend
+ 
 !=================================================================================================================
  subroutine physics_get_tend_work( &
                     block,mesh,nCells,nEdges,nCellsSolve,nEdgesSolve,rk_step,dynamics_substep, &

From 66a3179b99af3654edcd8290116abe8b113b8213 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Thu, 3 Jul 2025 17:10:06 -0600
Subject: [PATCH 114/146] Removing acc data xfer timers for device variables
 using create/delete

---
 .../dynamics/mpas_atm_time_integration.F           | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 92cbb677b1..24c280cb54 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -3292,9 +3292,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts,
       real (kind=RKIND) :: dtseps, c2, qtotal, rcv
       real (kind=RKIND), dimension( nVertLevels ) :: b_tri, c_tri
 
-      MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]')
       !$acc enter data create(b_tri, c_tri)
-      MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]')
 
       !  set coefficients
       dtseps = .5*dts*(1.+epssm)
@@ -3374,9 +3372,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts,
       end do ! loop over cells
       !$acc end parallel
 
-      MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]')
-      !$acc exit data copyout(b_tri, c_tri)
-      MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]')
+      !$acc exit data delete(b_tri, c_tri)
 
    end subroutine atm_compute_vert_imp_coefs_work
 
@@ -7941,9 +7937,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me
       divdamp_coef = divdamp_coef_ptr
       vertexDegree = vertexDegree_ptr
 
-      MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]')
       !$acc enter data create(divergence1, divergence2, vorticity1, vorticity2)
-      MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]')
       
       !  First, Rayleigh damping terms for ru, rtheta_m and rho_zz
       !$acc parallel default(present)
@@ -8088,9 +8082,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me
        end do  !  end of loop over edges
        !$acc end parallel
 
-       MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]')
       !$acc exit data delete(divergence1, divergence2, vorticity1, vorticity2)
-      MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]')
        
    end subroutine atm_bdy_adjust_dynamics_relaxzone_tend
       
@@ -8226,9 +8218,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk,
       integer :: iCell, iEdge, iScalar, i, k, cell1, cell2
 
       !---
-      MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]')
       !$acc enter data create(scalars_tmp)
-      MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]')
 
       !$acc parallel default(present)
       !$acc loop gang worker
@@ -8310,9 +8300,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk,
       end do
       !$acc end parallel
 
-      MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]')
       !$acc exit data delete(scalars_tmp)
-      MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]')
 
    end subroutine atm_bdy_adjust_scalars_work
 

From f34dd04f26caccd21b8ecfb9d732f6ad89c0f71d Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Mon, 7 Jul 2025 19:10:23 -0600
Subject: [PATCH 115/146] Using acc declare create for rho_zz_int and
 corresponding cleanup

---
 .../dynamics/mpas_atm_time_integration.F               | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 24c280cb54..a1cf331ee5 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -86,6 +86,7 @@ end subroutine halo_exchange_routine
    !$acc declare create(s_max_arr, s_min_arr)
    !$acc declare create(flux_array, flux_upwind_tmp_arr)
    !$acc declare create(flux_tmp_arr, wdtn_arr)
+   !$acc declare create(rho_zz_int)
 
    real (kind=RKIND), dimension(:,:), allocatable :: ru_driving_tend ! regional_MPAS addition 
    real (kind=RKIND), dimension(:,:), allocatable :: rt_driving_tend ! regional_MPAS addition 
@@ -5052,12 +5053,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
       !  The transport will maintain this positive definite solution and optionally, shape preservation (monotonicity).
 
 
-      MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-      if (local_advance_density) then
-         !$acc enter data copyin(rho_zz_int)
-      end if
-      MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
-
       !$acc parallel
 
       !$acc loop gang worker
@@ -5648,9 +5643,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
       end do !  loop over scalars
 
       MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-      if (local_advance_density) then
-         !$acc exit data copyout(rho_zz_int)
-      end if
       !$acc exit data delete(scale_arr)
       MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 

From bd75dbad0646b71c7297140cd88c873a12362787 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Mon, 7 Jul 2025 19:12:52 -0600
Subject: [PATCH 116/146] Removing atm_advance_scalars_mono ACC_data_xfer
 timers around create/delete

---
 src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index a1cf331ee5..42da40ff43 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -5137,9 +5137,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
 
       end if
 
-      MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
       !$acc enter data create(scale_arr)
-      MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
       do iScalar = 1, num_scalars
 
@@ -5642,9 +5640,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
 
       end do !  loop over scalars
 
-      MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
       !$acc exit data delete(scale_arr)
-      MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
    end subroutine atm_advance_scalars_mono_work
 

From 688f3995a229c3b89b5cd30d54c8f54807e5ed1a Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Tue, 8 Jul 2025 16:31:09 -0600
Subject: [PATCH 117/146] Simplifying OpenACC data transfers around the call to
 mpas_reconstruct_2d

This commit introduces two OpenACC data transfer routines,
mpas_reconstruct_2d_h2d and mpas_reconstruct_2d_d2h in order to remove the data
transfers from the mpas_reconstruct_2d routine itself. This also allows us to
remove extraneous data movements within the atm_srk3 routine.

mpas_reconstruct_2d_h2d and mpas_reconstruct_2d_d2h are called before and after
the call to mpas_reconstruct in atm_mpas_init_block. And the reconstructed
vector fields are also copied to and from the device before and after every
dynamics call in mpas_atm_pre_dynamics_h2d and mpas_atm_post_dynamics_d2h.
---
 .../dynamics/mpas_atm_time_integration.F      |  40 ++++++
 src/core_atmosphere/mpas_atm_core.F           |   4 +
 src/operators/mpas_vector_reconstruction.F    | 115 ++++++++++++++++--
 3 files changed, 148 insertions(+), 11 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 42da40ff43..d113931054 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -866,6 +866,7 @@ subroutine mpas_atm_pre_dynamics_h2d(domain)
 
 
 #ifdef MPAS_OPENACC
+      type (mpas_pool_type), pointer :: mesh
       type (mpas_pool_type), pointer :: state
       type (mpas_pool_type), pointer :: diag
       type (mpas_pool_type), pointer :: tend
@@ -899,6 +900,10 @@ subroutine mpas_atm_pre_dynamics_h2d(domain)
       real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2
       real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split
 
+      integer, pointer :: nCells_ptr
+      integer :: nCells
+      real (kind=RKIND), dimension(:,:), pointer :: uReconstructZonal, uReconstructMeridional, uReconstructX, uReconstructY, uReconstructZ
+
       real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend
       real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler
       real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy
@@ -912,11 +917,13 @@ subroutine mpas_atm_pre_dynamics_h2d(domain)
 
       real (kind=RKIND), dimension(:,:,:), pointer :: lbc_scalars, lbc_tend_scalars
 
+      nullify(mesh)
       nullify(state)
       nullify(diag)
       nullify(tend)
       nullify(tend_physics)
       nullify(lbc)
+      call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh)
       call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state)
       call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag)
       call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend)
@@ -1026,6 +1033,19 @@ subroutine mpas_atm_pre_dynamics_h2d(domain)
       call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split)
       !$acc enter data copyin(wwAvg_split)
 
+      call mpas_pool_get_dimension(mesh, 'nCellsSolve', nCells_ptr)
+      nCells = nCells_ptr
+      call mpas_pool_get_array(diag, 'uReconstructX', uReconstructX)
+      !$acc enter data create(uReconstructX(:,1:nCells))
+      call mpas_pool_get_array(diag, 'uReconstructY', uReconstructY)
+      !$acc enter data create(uReconstructY(:,1:nCells))
+      call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ)
+      !$acc enter data create(uReconstructZ(:,1:nCells))
+      call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal)
+      !$acc enter data create(uReconstructZonal(:,1:nCells))
+      call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional)
+      !$acc enter data create(uReconstructMeridional(:,1:nCells))
+
       call mpas_pool_get_array(state, 'u', u_1, 1)
       !$acc enter data copyin(u_1)
       call mpas_pool_get_array(state, 'u', u_2, 2)
@@ -1128,6 +1148,7 @@ subroutine mpas_atm_post_dynamics_d2h(domain)
 
 
 #ifdef MPAS_OPENACC
+      type (mpas_pool_type), pointer :: mesh
       type (mpas_pool_type), pointer :: state
       type (mpas_pool_type), pointer :: diag
       type (mpas_pool_type), pointer :: tend
@@ -1161,6 +1182,10 @@ subroutine mpas_atm_post_dynamics_d2h(domain)
       real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2
       real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split
 
+      integer, pointer :: nCells_ptr
+      integer :: nCells
+      real (kind=RKIND), dimension(:,:), pointer :: uReconstructZonal, uReconstructMeridional, uReconstructX, uReconstructY, uReconstructZ
+
       real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend
       real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler
       real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy
@@ -1174,11 +1199,13 @@ subroutine mpas_atm_post_dynamics_d2h(domain)
 
       real (kind=RKIND), dimension(:,:,:), pointer :: lbc_scalars, lbc_tend_scalars
 
+      nullify(mesh)
       nullify(state)
       nullify(diag)
       nullify(tend)
       nullify(tend_physics)
       nullify(lbc)
+      call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh)
       call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state)
       call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag)
       call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend)
@@ -1288,6 +1315,19 @@ subroutine mpas_atm_post_dynamics_d2h(domain)
       call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split)
       !$acc exit data copyout(wwAvg_split)
 
+      call mpas_pool_get_dimension(mesh, 'nCellsSolve', nCells_ptr)
+      nCells = nCells_ptr
+      call mpas_pool_get_array(diag, 'uReconstructX', uReconstructX)
+      !$acc exit data copyout(uReconstructX(:,1:nCells))
+      call mpas_pool_get_array(diag, 'uReconstructY', uReconstructY)
+      !$acc exit data copyout(uReconstructY(:,1:nCells))
+      call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ)
+      !$acc exit data copyout(uReconstructZ(:,1:nCells))
+      call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal)
+      !$acc exit data copyout(uReconstructZonal(:,1:nCells))
+      call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional)
+      !$acc exit data copyout(uReconstructMeridional(:,1:nCells))
+
       call mpas_pool_get_array(state, 'u', u_1, 1)
       !$acc exit data copyout(u_1)
       call mpas_pool_get_array(state, 'u', u_2, 2)
diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F
index d1b9931c6c..087cfc2f2c 100644
--- a/src/core_atmosphere/mpas_atm_core.F
+++ b/src/core_atmosphere/mpas_atm_core.F
@@ -543,6 +543,8 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt)
       call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ)
       call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal)
       call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional)
+      call mpas_reconstruct_2d_h2d(mesh, u, uReconstructX, uReconstructY, uReconstructZ, &
+                                   uReconstructZonal, uReconstructMeridional)
       call mpas_reconstruct(mesh, u,                   &
                             uReconstructX,             &
                             uReconstructY,             &
@@ -550,6 +552,8 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt)
                             uReconstructZonal,         &
                             uReconstructMeridional     &
                            )
+      call mpas_reconstruct_2d_d2h(mesh, u, uReconstructX, uReconstructY, uReconstructZ, &
+               uReconstructZonal, uReconstructMeridional)
    
 #ifdef DO_PHYSICS
       !proceed with initialization of physics parameterization if moist_physics is set to true:
diff --git a/src/operators/mpas_vector_reconstruction.F b/src/operators/mpas_vector_reconstruction.F
index 605da9cd6d..2aa4ca2aee 100644
--- a/src/operators/mpas_vector_reconstruction.F
+++ b/src/operators/mpas_vector_reconstruction.F
@@ -258,16 +258,6 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
 
     call mpas_pool_get_config(meshPool, 'on_a_sphere', on_a_sphere)
 
-    MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]')
-    ! Only use sections needed, nCells may be all cells or only non-halo cells
-    !$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), &
-    !$acc                   edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells))
-    !$acc enter data copyin(u(:,:))
-    !$acc enter data create(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), &
-    !$acc                   uReconstructZ(:,1:nCells),uReconstructZonal(:,1:nCells), &
-    !$acc                   uReconstructMeridional(:,1:nCells))
-    MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]')
-
     ! loop over cell centers
     !$omp do schedule(runtime)
     !$acc parallel default(present)
@@ -337,6 +327,109 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
       !$omp end do
     end if
 
+  end subroutine mpas_reconstruct_2d!}}}
+
+
+  subroutine mpas_reconstruct_2d_h2d(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, uReconstructZonal, uReconstructMeridional, includeHalos)!{{{
+
+    implicit none
+
+    type (mpas_pool_type), intent(in) :: meshPool !< Input: Mesh information
+    real (kind=RKIND), dimension(:,:), intent(in) :: u !< Input: Velocity field on edges
+    real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructX !< Output: X Component of velocity reconstructed to cell centers
+    real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructY !< Output: Y Component of velocity reconstructed to cell centers
+    real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZ !< Output: Z Component of velocity reconstructed to cell centers
+    real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZonal !< Output: Zonal Component of velocity reconstructed to cell centers
+    real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructMeridional !< Output: Meridional Component of velocity reconstructed to cell centers
+    logical, optional, intent(in) :: includeHalos !< Input: Optional logical that allows reconstruction over halo regions
+
+    logical :: includeHalosLocal
+    integer, dimension(:,:), pointer :: edgesOnCell
+    integer, dimension(:), pointer :: nEdgesOnCell
+    integer :: nCells
+    integer, pointer :: nCells_ptr
+    real(kind=RKIND), dimension(:), pointer :: latCell, lonCell
+    real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct
+
+    if ( present(includeHalos) ) then
+       includeHalosLocal = includeHalos
+    else
+       includeHalosLocal = .false.
+    end if
+
+    ! stored arrays used during compute procedure
+    call mpas_pool_get_array(meshPool, 'coeffs_reconstruct', coeffs_reconstruct)
+
+    ! temporary variables
+    call mpas_pool_get_array(meshPool, 'nEdgesOnCell', nEdgesOnCell)
+    call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell)
+    call mpas_pool_get_array(meshPool, 'latCell', latCell)
+    call mpas_pool_get_array(meshPool, 'lonCell', lonCell)
+
+    if ( includeHalosLocal ) then
+       call mpas_pool_get_dimension(meshPool, 'nCells', nCells_ptr)
+    else
+       call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCells_ptr)
+    end if
+    nCells = nCells_ptr
+
+    MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]')
+    ! Only use sections needed, nCells may be all cells or only non-halo cells
+    !$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), &
+    !$acc                   edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells))
+    !$acc enter data copyin(u(:,:))
+    !$acc enter data create(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), &
+    !$acc                   uReconstructZ(:,1:nCells),uReconstructZonal(:,1:nCells), &
+    !$acc                   uReconstructMeridional(:,1:nCells))
+    MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]')
+
+  end subroutine mpas_reconstruct_2d_h2d
+
+
+
+  subroutine mpas_reconstruct_2d_d2h(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, uReconstructZonal, uReconstructMeridional, includeHalos)!{{{
+
+    implicit none
+
+    type (mpas_pool_type), intent(in) :: meshPool !< Input: Mesh information
+    real (kind=RKIND), dimension(:,:), intent(in) :: u !< Input: Velocity field on edges
+    real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructX !< Output: X Component of velocity reconstructed to cell centers
+    real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructY !< Output: Y Component of velocity reconstructed to cell centers
+    real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZ !< Output: Z Component of velocity reconstructed to cell centers
+    real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZonal !< Output: Zonal Component of velocity reconstructed to cell centers
+    real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructMeridional !< Output: Meridional Component of velocity reconstructed to cell centers
+    logical, optional, intent(in) :: includeHalos !< Input: Optional logical that allows reconstruction over halo regions
+
+    logical :: includeHalosLocal
+    integer, dimension(:,:), pointer :: edgesOnCell
+    integer, dimension(:), pointer :: nEdgesOnCell
+    integer :: nCells
+    integer, pointer :: nCells_ptr
+    real(kind=RKIND), dimension(:), pointer :: latCell, lonCell
+    real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct
+
+    if ( present(includeHalos) ) then
+       includeHalosLocal = includeHalos
+    else
+       includeHalosLocal = .false.
+    end if
+
+    ! stored arrays used during compute procedure
+    call mpas_pool_get_array(meshPool, 'coeffs_reconstruct', coeffs_reconstruct)
+
+    ! temporary variables
+    call mpas_pool_get_array(meshPool, 'nEdgesOnCell', nEdgesOnCell)
+    call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell)
+    call mpas_pool_get_array(meshPool, 'latCell', latCell)
+    call mpas_pool_get_array(meshPool, 'lonCell', lonCell)
+
+    if ( includeHalosLocal ) then
+       call mpas_pool_get_dimension(meshPool, 'nCells', nCells_ptr)
+    else
+       call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCells_ptr)
+    end if
+    nCells = nCells_ptr
+
     MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]')
     !$acc exit data delete(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), &
     !$acc                   edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells))
@@ -346,7 +439,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
     !$acc                   uReconstructMeridional(:,1:nCells))
     MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]')
 
-  end subroutine mpas_reconstruct_2d!}}}
+  end subroutine mpas_reconstruct_2d_d2h
 
 
 !***********************************************************************

From 59d0a4ba2d2e962a7dccba7517a50c37aba57942 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Thu, 14 Aug 2025 10:31:53 -0600
Subject: [PATCH 118/146] Need to copyout u_2 and w_2 at the end of dynamics

---
 src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index d113931054..3a41cdfecf 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -1331,11 +1331,11 @@ subroutine mpas_atm_post_dynamics_d2h(domain)
       call mpas_pool_get_array(state, 'u', u_1, 1)
       !$acc exit data copyout(u_1)
       call mpas_pool_get_array(state, 'u', u_2, 2)
-      !$acc exit data delete(u_2)
+      !$acc exit data copyout(u_2)
       call mpas_pool_get_array(state, 'w', w_1, 1)
       !$acc exit data copyout(w_1)
       call mpas_pool_get_array(state, 'w', w_2, 2)
-      !$acc exit data delete(w_2)
+      !$acc exit data copyout(w_2)
       call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1)  
       !$acc exit data copyout(theta_m_1)   ! use values from atm_init_coupled_diagnostics
       call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2)

From 3e657cf46da9a920a19dfd8a462650cc9bdc3fce Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Fri, 22 Aug 2025 13:36:24 -0600
Subject: [PATCH 119/146] Fixes to produce correct results with CURVATURE

This commit introduces changes to ensure that building with -DCURVATURE still
produces the correct results, compared to the nvhpc cpu reference. This
involves removing the data movement of the reconstructed zonal and meridional
velocities in the atm_compute_dyn_tend_work subroutine and instead using
copyin for the same fields in mpas_atm_pre_dynamics_h2d.

This commit also removes the ACC data Xfer timers for the
atm_compute_dyn_tend_work subroutine, as we only have create/delete
statements
---
 src/core_atmosphere/dynamics/mpas_atm_time_integration.F | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 3a41cdfecf..58db6793a3 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -1042,9 +1042,9 @@ subroutine mpas_atm_pre_dynamics_h2d(domain)
       call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ)
       !$acc enter data create(uReconstructZ(:,1:nCells))
       call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal)
-      !$acc enter data create(uReconstructZonal(:,1:nCells))
+      !$acc enter data copyin(uReconstructZonal(:,1:nCells))
       call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional)
-      !$acc enter data create(uReconstructMeridional(:,1:nCells))
+      !$acc enter data copyin(uReconstructMeridional(:,1:nCells))
 
       call mpas_pool_get_array(state, 'u', u_1, 1)
       !$acc enter data copyin(u_1)

From 4467748a2c40f2ccb9d2ca39befeb70349807948 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Fri, 3 Oct 2025 15:05:50 -0600
Subject: [PATCH 120/146] Adding option to enable GPU execution of
 mpas_reconstruct_2d

---
 .../dynamics/mpas_atm_time_integration.F        |  3 ++-
 src/core_atmosphere/mpas_atm_core.F             |  3 ++-
 src/operators/mpas_vector_reconstruction.F      | 17 +++++++++++++----
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 58db6793a3..fdf6882baf 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -2671,7 +2671,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
                             uReconstructY,          &
                             uReconstructZ,          &
                             uReconstructZonal,      &
-                            uReconstructMeridional  &
+                            uReconstructMeridional, &
+                            lACC = .true.           &
                            )
 
 
diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F
index 087cfc2f2c..248cdf2393 100644
--- a/src/core_atmosphere/mpas_atm_core.F
+++ b/src/core_atmosphere/mpas_atm_core.F
@@ -550,7 +550,8 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt)
                             uReconstructY,             &
                             uReconstructZ,             &
                             uReconstructZonal,         &
-                            uReconstructMeridional     &
+                            uReconstructMeridional,    &
+                            lACC = .true.              &
                            )
       call mpas_reconstruct_2d_d2h(mesh, u, uReconstructX, uReconstructY, uReconstructZ, &
                uReconstructZonal, uReconstructMeridional)
diff --git a/src/operators/mpas_vector_reconstruction.F b/src/operators/mpas_vector_reconstruction.F
index 2aa4ca2aee..88d87474ab 100644
--- a/src/operators/mpas_vector_reconstruction.F
+++ b/src/operators/mpas_vector_reconstruction.F
@@ -202,7 +202,8 @@ end subroutine mpas_init_reconstruct!}}}
 !>  Input: grid meta data and vector component data residing at cell edges
 !>  Output: reconstructed vector field (measured in X,Y,Z) located at cell centers
 !-----------------------------------------------------------------------
-  subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, uReconstructZonal, uReconstructMeridional, includeHalos)!{{{
+  subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, &
+                          uReconstructZonal, uReconstructMeridional, includeHalos, lACC)!{{{
 
     implicit none
 
@@ -214,9 +215,11 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
     real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZonal !< Output: Zonal Component of velocity reconstructed to cell centers
     real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructMeridional !< Output: Meridional Component of velocity reconstructed to cell centers
     logical, optional, intent(in) :: includeHalos !< Input: Optional logical that allows reconstruction over halo regions
+    logical, optional, intent(in) :: lACC !< Input: Optional logical that controls execution on the GPU with OpenACC
 
     !   temporary arrays needed in the compute procedure
     logical :: includeHalosLocal
+    logical :: lACCLocal
     integer, pointer :: nCells_ptr, nVertLevels_ptr
     integer :: nCells, nVertLevels
     integer, dimension(:,:), pointer :: edgesOnCell
@@ -236,6 +239,12 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
        includeHalosLocal = .false.
     end if
 
+    if ( present(lACC) ) then
+       lACCLocal = lACC
+    else
+       lACCLocal = .false.
+    end if
+
     ! stored arrays used during compute procedure
     call mpas_pool_get_array(meshPool, 'coeffs_reconstruct', coeffs_reconstruct)
 
@@ -260,7 +269,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
 
     ! loop over cell centers
     !$omp do schedule(runtime)
-    !$acc parallel default(present)
+    !$acc parallel default(present) if(lACCLocal)
     !$acc loop gang
     do iCell = 1, nCells
       ! initialize the reconstructed vectors
@@ -295,7 +304,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
 
     if (on_a_sphere) then
       !$omp do schedule(runtime)
-      !$acc parallel default(present)
+      !$acc parallel default(present) if(lACCLocal)
       !$acc loop gang
       do iCell = 1, nCells
         clat = cos(latCell(iCell))
@@ -315,7 +324,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
       !$omp end do
     else
       !$omp do schedule(runtime)
-      !$acc parallel default(present)
+      !$acc parallel default(present) if(lACCLocal)
       !$acc loop gang vector collapse(2)
       do iCell = 1, nCells
         do k = 1, nVertLevels

From 0b45ee2111c1eb9a4c6eb1162fd876a84d0589ab Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Mon, 13 Oct 2025 14:00:23 -0600
Subject: [PATCH 121/146] fixes needed with intel compiler

---
 src/core_atmosphere/dynamics/mpas_atm_iau.F        | 14 +++++++-------
 .../physics/mpas_atmphys_interface.F               |  2 ++
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_iau.F b/src/core_atmosphere/dynamics/mpas_atm_iau.F
index d5999e18c7..7459de89b4 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_iau.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_iau.F
@@ -6,13 +6,13 @@
 ! distributed with this code, or at http://mpas-dev.github.com/license.html
 !
 
- #ifdef MPAS_OPENACC
- #define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
- #define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
- #else
- #define MPAS_ACC_TIMER_START(X)
- #define MPAS_ACC_TIMER_STOP(X)
- #endif
+#ifdef MPAS_OPENACC
+#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
+#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
+#else
+#define MPAS_ACC_TIMER_START(X)
+#define MPAS_ACC_TIMER_STOP(X)
+#endif
 
 module mpas_atm_iau
 
diff --git a/src/core_atmosphere/physics/mpas_atmphys_interface.F b/src/core_atmosphere/physics/mpas_atmphys_interface.F
index 4e65cffd7a..76a7e4fb6d 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_interface.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_interface.F
@@ -27,6 +27,8 @@ module mpas_atmphys_interface
  private
  public:: allocate_forall_physics,   &
           deallocate_forall_physics, &
+          update_d2h_pre_microphysics, &
+          update_h2d_post_microphysics, &
           MPAS_to_physics,           &
           microphysics_from_MPAS,    &
           microphysics_to_MPAS

From 88d8a4e21a2c1b84159486b8b71e19a120dbb8b1 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Wed, 12 Nov 2025 07:51:48 -0700
Subject: [PATCH 122/146] Make ACC data movements of lbc variables contigent on
 config_apply_lbcs

Modifying the existing OpenACC data movements of lbc_* variables in the
mpas_atm_pre_dynamics_h2d and mpas_atm_post_dynamics_d2h subroutines to be
contingent on config_apply_lbcs being true. This avoids unexpected behavior
when lbc_* variables are uninitialized.
---
 .../dynamics/mpas_atm_time_integration.F      | 164 ++++++++++--------
 1 file changed, 88 insertions(+), 76 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index fdf6882baf..7674b5adae 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -873,6 +873,8 @@ subroutine mpas_atm_pre_dynamics_h2d(domain)
       type (mpas_pool_type), pointer :: tend_physics
       type (mpas_pool_type), pointer :: lbc
 
+      logical, pointer :: config_apply_lbcs_ptr
+      logical :: config_apply_lbcs
 
       real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p
       real (kind=RKIND), dimension(:,:), pointer :: ru_save
@@ -930,6 +932,9 @@ subroutine mpas_atm_pre_dynamics_h2d(domain)
       call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics)
       call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc)
 
+      call mpas_pool_get_config(domain % blocklist % configs, 'config_apply_lbcs', config_apply_lbcs_ptr)
+      config_apply_lbcs = config_apply_lbcs_ptr
+
       MPAS_ACC_TIMER_START('atm_srk3 [ACC_data_xfer]')
       call mpas_pool_get_array(diag, 'ru', ru)
       !$acc enter data copyin(ru) ! use values from atm_init_coupled_diagnostics
@@ -1092,44 +1097,46 @@ subroutine mpas_atm_pre_dynamics_h2d(domain)
       !$acc enter data copyin(scalar_tend_save)
 
 
-      call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2)
-      !$acc enter data copyin(lbc_u)
-      call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2)
-      !$acc enter data copyin(lbc_w)
-      call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2)
-      !$acc enter data copyin(lbc_ru)
-      call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2)
-      !$acc enter data copyin(lbc_rho_edge)
-      call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2)
-      !$acc enter data copyin(lbc_theta)
-      call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2)
-      !$acc enter data copyin(lbc_rtheta_m)
-      call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2)
-      !$acc enter data copyin(lbc_rho_zz)
-      call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2)
-      !$acc enter data copyin(lbc_rho)
-      call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2)
-      !$acc enter data copyin(lbc_scalars)
-
-      
-      call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1)
-      !$acc enter data copyin(lbc_tend_u)
-      call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1)
-      !$acc enter data copyin(lbc_tend_ru)
-      call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1)
-      !$acc enter data copyin(lbc_tend_rho_edge)
-      call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1)
-      !$acc enter data copyin(lbc_tend_w)
-      call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1)
-      !$acc enter data copyin(lbc_tend_theta)
-      call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1)
-      !$acc enter data copyin(lbc_tend_rtheta_m)
-      call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1)
-      !$acc enter data copyin(lbc_tend_rho_zz)
-      call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1)
-      !$acc enter data copyin(lbc_tend_rho)
-      call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1)
-      !$acc enter data copyin(lbc_tend_scalars)
+      if(config_apply_lbcs) then
+         call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2)
+         !$acc enter data copyin(lbc_u)
+         call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2)
+         !$acc enter data copyin(lbc_w)
+         call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2)
+         !$acc enter data copyin(lbc_ru)
+         call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2)
+         !$acc enter data copyin(lbc_rho_edge)
+         call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2)
+         !$acc enter data copyin(lbc_theta)
+         call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2)
+         !$acc enter data copyin(lbc_rtheta_m)
+         call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2)
+         !$acc enter data copyin(lbc_rho_zz)
+         call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2)
+         !$acc enter data copyin(lbc_rho)
+         call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2)
+         !$acc enter data copyin(lbc_scalars)
+
+         
+         call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1)
+         !$acc enter data copyin(lbc_tend_u)
+         call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1)
+         !$acc enter data copyin(lbc_tend_ru)
+         call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1)
+         !$acc enter data copyin(lbc_tend_rho_edge)
+         call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1)
+         !$acc enter data copyin(lbc_tend_w)
+         call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1)
+         !$acc enter data copyin(lbc_tend_theta)
+         call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1)
+         !$acc enter data copyin(lbc_tend_rtheta_m)
+         call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1)
+         !$acc enter data copyin(lbc_tend_rho_zz)
+         call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1)
+         !$acc enter data copyin(lbc_tend_rho)
+         call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1)
+         !$acc enter data copyin(lbc_tend_scalars)
+      end if
 
       call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
       !$acc enter data copyin(rthdynten)
@@ -1155,6 +1162,8 @@ subroutine mpas_atm_post_dynamics_d2h(domain)
       type (mpas_pool_type), pointer :: tend_physics
       type (mpas_pool_type), pointer :: lbc
 
+      logical, pointer :: config_apply_lbcs_ptr
+      logical :: config_apply_lbcs
 
       real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p
       real (kind=RKIND), dimension(:,:), pointer :: ru_save
@@ -1212,6 +1221,9 @@ subroutine mpas_atm_post_dynamics_d2h(domain)
       call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics)
       call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc)
 
+      call mpas_pool_get_config(domain % blocklist % configs, 'config_apply_lbcs', config_apply_lbcs_ptr)
+      config_apply_lbcs = config_apply_lbcs_ptr
+
       MPAS_ACC_TIMER_START('atm_srk3 [ACC_data_xfer]')
       call mpas_pool_get_array(diag, 'ru', ru)
       !$acc exit data copyout(ru) ! use values from atm_init_coupled_diagnostics
@@ -1373,45 +1385,45 @@ subroutine mpas_atm_post_dynamics_d2h(domain)
       call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save)
       !$acc exit data copyout(scalar_tend_save)
 
-
-      call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2)
-      !$acc exit data delete(lbc_u)
-      call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2)
-      !$acc exit data delete(lbc_w)
-      call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2)
-      !$acc exit data delete(lbc_ru)
-      call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2)
-      !$acc exit data delete(lbc_rho_edge)
-      call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2)
-      !$acc exit data delete(lbc_theta)
-      call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2)
-      !$acc exit data delete(lbc_rtheta_m)
-      call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2)
-      !$acc exit data delete(lbc_rho_zz)
-      call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2)
-      !$acc exit data delete(lbc_rho)
-      call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2)
-      !$acc exit data delete(lbc_scalars)
-
+      if(config_apply_lbcs) then
+         call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2)
+         !$acc exit data delete(lbc_u)
+         call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2)
+         !$acc exit data delete(lbc_w)
+         call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2)
+         !$acc exit data delete(lbc_ru)
+         call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2)
+         !$acc exit data delete(lbc_rho_edge)
+         call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2)
+         !$acc exit data delete(lbc_theta)
+         call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2)
+         !$acc exit data delete(lbc_rtheta_m)
+         call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2)
+         !$acc exit data delete(lbc_rho_zz)
+         call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2)
+         !$acc exit data delete(lbc_rho)
+         call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2)
+         !$acc exit data delete(lbc_scalars)
       
-      call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1)
-      !$acc exit data delete(lbc_tend_u)
-      call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1)
-      !$acc exit data delete(lbc_tend_ru)
-      call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1)
-      !$acc exit data delete(lbc_tend_rho_edge)
-      call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1)
-      !$acc exit data delete(lbc_tend_w)
-      call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1)
-      !$acc exit data delete(lbc_tend_theta)
-      call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1)
-      !$acc exit data delete(lbc_tend_rtheta_m)
-      call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1)
-      !$acc exit data delete(lbc_tend_rho_zz)
-      call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1)
-      !$acc exit data delete(lbc_tend_rho)
-      call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1)
-      !$acc exit data delete(lbc_tend_scalars)
+         call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1)
+         !$acc exit data delete(lbc_tend_u)
+         call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1)
+         !$acc exit data delete(lbc_tend_ru)
+         call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1)
+         !$acc exit data delete(lbc_tend_rho_edge)
+         call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1)
+         !$acc exit data delete(lbc_tend_w)
+         call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1)
+         !$acc exit data delete(lbc_tend_theta)
+         call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1)
+         !$acc exit data delete(lbc_tend_rtheta_m)
+         call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1)
+         !$acc exit data delete(lbc_tend_rho_zz)
+         call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1)
+         !$acc exit data delete(lbc_tend_rho)
+         call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1)
+         !$acc exit data delete(lbc_tend_scalars)
+      end if
 
       call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
       !$acc exit data copyout(rthdynten)

From 35cc1440789904a3ce71d9c1dcbf9fc237c0f776 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Mon, 23 Feb 2026 17:01:19 -0700
Subject: [PATCH 123/146] Some more fixes to get GPU runs working

---
 .../dynamics/mpas_atm_time_integration.F        | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 7674b5adae..6296130382 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -6225,17 +6225,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
       MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]')
       if (les_model_opt /= LES_MODEL_NONE) then
-         !$acc enter data copyin(exner, pressure_b, bn2)
+         !$acc enter data copyin(bn2)
       end if
       !$acc enter data copyin(ustm, hfx, qfx)
       !$acc enter data create(rayleigh_damp_coef)
-      if (les_model_opt /= LES_MODEL_NONE) then
-         !$acc enter data copyin(ur_cell, vr_cell)
-      else
-#ifdef CURVATURE
-         !$acc enter data copyin(ur_cell, vr_cell)
-#endif
-      end if
       !$acc enter data create(eddy_visc_horz)
       !$acc enter data create(eddy_visc_vert)
       !$acc enter data create(prandtl_3d_inv)
@@ -6874,18 +6867,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
 
       MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]')
       if (les_model_opt /= LES_MODEL_NONE) then
-         !$acc exit data delete(exner, pressure_b)
          !$acc exit data copyout(bn2)
       end if
       !$acc exit data delete(ustm, hfx, qfx)
       !$acc exit data delete(rayleigh_damp_coef)
-      if (les_model_opt /= LES_MODEL_NONE) then
-         !$acc exit data delete(ur_cell, vr_cell)
-      else
-#ifdef CURVATURE
-         !$acc exit data delete(ur_cell, vr_cell)
-#endif
-      end if
       !$acc exit data delete(eddy_visc_horz)
       !$acc exit data delete(eddy_visc_vert)
       !$acc exit data delete(prandtl_3d_inv)

From 84602ed2571196fc5a068806b6dc8433cd7340c9 Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 11:50:32 -0600
Subject: [PATCH 124/146] Add data movement for some fields under the
 mpas_halo_groups

---
 src/framework/mpas_halo.F | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index 4ab8817c23..a2c75327a3 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -280,6 +280,13 @@ subroutine mpas_halo_exch_group_complete(domain, groupName, iErr)
 
         call refactor_lists(domain, groupName, iErr)
 
+        !$acc enter data copyin(newGroup)
+        !$acc enter data copyin(newGroup % fields(:), newGroup % sendBuf(:))
+        do i = 1, newGroup % nFields
+            !$acc enter data copyin(newGroup % fields(i))
+            !$acc enter data copyin(newGroup % fields(i) % sendListSrc(:,:,:))
+        end do
+
     end subroutine mpas_halo_exch_group_complete
 
 
@@ -350,6 +357,7 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr)
             deallocate(cursor % fields(i) % compactSendLists)
             deallocate(cursor % fields(i) % compactRecvLists)
             deallocate(cursor % fields(i) % nSendLists)
+            !$acc exit data delete(cursor % fields(i) % sendListSrc(:,:,:))
             deallocate(cursor % fields(i) % sendListSrc)
             deallocate(cursor % fields(i) % sendListDst)
             deallocate(cursor % fields(i) % packOffsets)
@@ -357,7 +365,10 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr)
             deallocate(cursor % fields(i) % recvListSrc)
             deallocate(cursor % fields(i) % recvListDst)
             deallocate(cursor % fields(i) % unpackOffsets)
+            !$acc exit data delete(cursor % fields(i))
         end do
+        ! Use finalize here in-case the copyins in ..._complete increment the reference counter
+        !$acc exit data finalize delete(cursor % fields(:))
         deallocate(cursor % fields)
         deallocate(cursor % groupPackOffsets)
         deallocate(cursor % groupSendNeighbors)
@@ -368,10 +379,12 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr)
         deallocate(cursor % groupToFieldRecvIdx)
         deallocate(cursor % groupRecvOffsets)
         deallocate(cursor % groupRecvCounts)
+        !$acc exit data delete(cursor % sendBuf(:))
         deallocate(cursor % sendBuf)
         deallocate(cursor % recvBuf)
         deallocate(cursor % sendRequests)
         deallocate(cursor % recvRequests)
+        !$acc exit data delete(cursor)
         deallocate(cursor)
 
     end subroutine mpas_halo_exch_group_destroy

From 994674c2c10697c741e32d33eb5191c37d80b7ae Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 12:30:01 -0600
Subject: [PATCH 125/146] Add a data region and acc kernels to the 2D packing
 code

---
 src/framework/mpas_halo.F | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index a2c75327a3..aef3759d88 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -676,6 +676,15 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
+
+                    ! Use data regions for specificity and so the reference or attachment counters are easier to make sense of
+                    ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action'
+                    !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
+                    !$acc data copyin(group % fields(i) % r2arr(:,:)) &
+                    !$acc      copyin(group % fields(i) % sendListSrc(:,:,:), group % fields(i) % sendListDst(:,:,:), group % fields(i) % nSendLists(:,:), group % fields(i) % packOffsets(:))
+
+                    ! Kernels is good enough, use default present to force a run-time error if programmer forgot something
+                    !$acc kernels default(present)
                     do iEndp = 1, nSendEndpts
                         do iHalo = 1, nHalos
                             do j = 1, maxNSendList
@@ -688,6 +697,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                             end do
                         end do
                     end do
+                    !$acc end kernels
+                    !$acc end data
+                    !$acc end data
 
                 !
                 ! Packing code for 3-d real-valued fields

From 3ea454f90d08079ec7ec0999bfa4972a9083216f Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 13:31:41 -0600
Subject: [PATCH 126/146] Add the update directives that should have been part
 of the last commit

This commit does work and matches the previous results!
---
 src/framework/mpas_halo.F | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index aef3759d88..778e27649c 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -680,6 +680,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     ! Use data regions for specificity and so the reference or attachment counters are easier to make sense of
                     ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action'
                     !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
+                    !$acc update device(group % sendBuf(:))
                     !$acc data copyin(group % fields(i) % r2arr(:,:)) &
                     !$acc      copyin(group % fields(i) % sendListSrc(:,:,:), group % fields(i) % sendListDst(:,:,:), group % fields(i) % nSendLists(:,:), group % fields(i) % packOffsets(:))
 
@@ -699,6 +700,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     end do
                     !$acc end kernels
                     !$acc end data
+                    !$acc update host(group % sendBuf(:))
                     !$acc end data
 
                 !

From 227a8d0d080e3b0c9ad0e05fb3d9bac86d0cdb48 Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 13:34:26 -0600
Subject: [PATCH 127/146] Comment out data present region, see if this causes
 an error

---
 src/framework/mpas_halo.F | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index 778e27649c..3b77b8fbb1 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -679,7 +679,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
 
                     ! Use data regions for specificity and so the reference or attachment counters are easier to make sense of
                     ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action'
-                    !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
+                    ! !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
                     !$acc update device(group % sendBuf(:))
                     !$acc data copyin(group % fields(i) % r2arr(:,:)) &
                     !$acc      copyin(group % fields(i) % sendListSrc(:,:,:), group % fields(i) % sendListDst(:,:,:), group % fields(i) % nSendLists(:,:), group % fields(i) % packOffsets(:))
@@ -701,7 +701,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     !$acc end kernels
                     !$acc end data
                     !$acc update host(group % sendBuf(:))
-                    !$acc end data
+                    ! !$acc end data
 
                 !
                 ! Packing code for 3-d real-valued fields

From c7e19fc08adab86b9022caa54921fd577fe3968c Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 14:24:48 -0600
Subject: [PATCH 128/146] Expand the data managed on the GPU for the halo
 exchange

NOTE: The last commit was successful!
---
 src/framework/mpas_halo.F | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index 3b77b8fbb1..44736740af 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -280,11 +280,21 @@ subroutine mpas_halo_exch_group_complete(domain, groupName, iErr)
 
         call refactor_lists(domain, groupName, iErr)
 
+        ! Always copy in the main data member first
         !$acc enter data copyin(newGroup)
-        !$acc enter data copyin(newGroup % fields(:), newGroup % sendBuf(:))
+        ! Then the data in the members of the type
+        !$acc enter data copyin(newGroup % recvBuf(:), newGroup % sendBuf(:))
+        !$acc enter data copyin(newGroup % fields(:))
         do i = 1, newGroup % nFields
             !$acc enter data copyin(newGroup % fields(i))
+            !$acc enter data copyin(newGroup % fields(i) % nSendLists(:,:))
+            !$acc enter data copyin(newGroup % fields(i) % packOffsets(:))
             !$acc enter data copyin(newGroup % fields(i) % sendListSrc(:,:,:))
+            !$acc enter data copyin(newGroup % fields(i) % sendListDst(:,:,:))
+            !$acc enter data copyin(newGroup % fields(i) % nRecvLists(:,:))
+            !$acc enter data copyin(newGroup % fields(i) % unpackOffsets(:))
+            !$acc enter data copyin(newGroup % fields(i) % recvListSrc(:,:,:))
+            !$acc enter data copyin(newGroup % fields(i) % recvListDst(:,:,:))
         end do
 
     end subroutine mpas_halo_exch_group_complete
@@ -356,14 +366,21 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr)
             deallocate(cursor % fields(i) % compactHaloInfo)
             deallocate(cursor % fields(i) % compactSendLists)
             deallocate(cursor % fields(i) % compactRecvLists)
+            !$acc exit data delete(cursor % fields(i) % nSendLists(:,:))
             deallocate(cursor % fields(i) % nSendLists)
             !$acc exit data delete(cursor % fields(i) % sendListSrc(:,:,:))
             deallocate(cursor % fields(i) % sendListSrc)
+            !$acc exit data delete(cursor % fields(i) % sendListDst(:,:,:))
             deallocate(cursor % fields(i) % sendListDst)
+            !$acc exit data delete(cursor % fields(i) % packOffsets(:))
             deallocate(cursor % fields(i) % packOffsets)
+            !$acc exit data delete(cursor % fields(i) % nRecvLists(:,:))
             deallocate(cursor % fields(i) % nRecvLists)
+            !$acc exit data delete(cursor % fields(i) % recvListSrc(:,:,:))
             deallocate(cursor % fields(i) % recvListSrc)
+            !$acc exit data delete(cursor % fields(i) % recvListDst(:,:,:))
             deallocate(cursor % fields(i) % recvListDst)
+            !$acc exit data delete(cursor % fields(i) % unpackOffsets(:))
             deallocate(cursor % fields(i) % unpackOffsets)
             !$acc exit data delete(cursor % fields(i))
         end do
@@ -381,10 +398,12 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr)
         deallocate(cursor % groupRecvCounts)
         !$acc exit data delete(cursor % sendBuf(:))
         deallocate(cursor % sendBuf)
+        !$acc exit data delete(cursor % recvBuf(:))
         deallocate(cursor % recvBuf)
         deallocate(cursor % sendRequests)
         deallocate(cursor % recvRequests)
-        !$acc exit data delete(cursor)
+        ! Finalize here as well, just in-case
+        !$acc exit data finalize delete(cursor)
         deallocate(cursor)
 
     end subroutine mpas_halo_exch_group_destroy
@@ -680,9 +699,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     ! Use data regions for specificity and so the reference or attachment counters are easier to make sense of
                     ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action'
                     ! !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
-                    !$acc update device(group % sendBuf(:))
-                    !$acc data copyin(group % fields(i) % r2arr(:,:)) &
-                    !$acc      copyin(group % fields(i) % sendListSrc(:,:,:), group % fields(i) % sendListDst(:,:,:), group % fields(i) % nSendLists(:,:), group % fields(i) % packOffsets(:))
+                    !$acc data copyin(group % fields(i) % r2arr(:,:))
 
                     ! Kernels is good enough, use default present to force a run-time error if programmer forgot something
                     !$acc kernels default(present)

From ad0dc4d9fa8ac2762430a25732ed95b5a196a384 Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 15:01:58 -0600
Subject: [PATCH 129/146] Remove the OpenACC management of recvBuf

Last commit had differences from the baseline. It's either this, or the
change dropping 'update device(group % sendBuf(:)' in the last commit
---
 src/framework/mpas_halo.F | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index 44736740af..d6d7b503db 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -283,7 +283,8 @@ subroutine mpas_halo_exch_group_complete(domain, groupName, iErr)
         ! Always copy in the main data member first
         !$acc enter data copyin(newGroup)
         ! Then the data in the members of the type
-        !$acc enter data copyin(newGroup % recvBuf(:), newGroup % sendBuf(:))
+        ! !$acc enter data copyin(newGroup % recvBuf(:), newGroup % sendBuf(:))
+        !$acc enter data copyin(newGroup % sendBuf(:))
         !$acc enter data copyin(newGroup % fields(:))
         do i = 1, newGroup % nFields
             !$acc enter data copyin(newGroup % fields(i))
@@ -398,7 +399,7 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr)
         deallocate(cursor % groupRecvCounts)
         !$acc exit data delete(cursor % sendBuf(:))
         deallocate(cursor % sendBuf)
-        !$acc exit data delete(cursor % recvBuf(:))
+        ! !$acc exit data delete(cursor % recvBuf(:))
         deallocate(cursor % recvBuf)
         deallocate(cursor % sendRequests)
         deallocate(cursor % recvRequests)

From 9367137e2e77b34a395d834add2b6e3da22b3d6c Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 15:25:45 -0600
Subject: [PATCH 130/146] Add update host(sendBuf) back, address answer diff

Last commit still had answer differences
---
 src/framework/mpas_halo.F | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index d6d7b503db..bf9003c87a 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -700,6 +700,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     ! Use data regions for specificity and so the reference or attachment counters are easier to make sense of
                     ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action'
                     ! !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
+                    !$acc update device(group % sendBuf(:))
                     !$acc data copyin(group % fields(i) % r2arr(:,:))
 
                     ! Kernels is good enough, use default present to force a run-time error if programmer forgot something

From 5a4a3bb7ce38a0c6203c0121b311b226947e01cf Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 15:56:47 -0600
Subject: [PATCH 131/146] Expand to other packing kernels, only update sendBuf
 after packing finishes

---
 src/framework/mpas_halo.F | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index bf9003c87a..f84dfeb1dc 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -672,9 +672,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r1arr, timeLevel=group % fields(i) % timeLevel)
 
+                    !$acc data copyin(group % fields(i) % r1arr(:))
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
+                    !$acc kernels default(present)
                     do iEndp = 1, nSendEndpts
                         do iHalo = 1, nHalos
                             do j = 1, maxNSendList
@@ -685,6 +687,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                             end do
                         end do
                     end do
+                    !$acc end kernels
+                    !$acc end data
 
                 !
                 ! Packing code for 2-d real-valued fields
@@ -700,7 +704,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     ! Use data regions for specificity and so the reference or attachment counters are easier to make sense of
                     ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action'
                     ! !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
-                    !$acc update device(group % sendBuf(:))
                     !$acc data copyin(group % fields(i) % r2arr(:,:))
 
                     ! Kernels is good enough, use default present to force a run-time error if programmer forgot something
@@ -719,7 +722,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     end do
                     !$acc end kernels
                     !$acc end data
-                    !$acc update host(group % sendBuf(:))
                     ! !$acc end data
 
                 !
@@ -728,10 +730,12 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                 case (3)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r3arr, group % fields(i) % timeLevel)
+                    !$acc data copyin(group % fields(i) % r3arr(:,:,:))
 
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
+                    !$acc kernels default(present)
                     do iEndp = 1, nSendEndpts
                         do iHalo = 1, nHalos
                             do j = 1, maxNSendList
@@ -747,10 +751,13 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                             end do
                         end do
                     end do
+                    !$acc end kernels
+                    !$acc end data
 
                 end select
             end if
         end do
+        !$acc update host(group % sendBuf(:))
 
         !
         ! Initiate non-blocking sends to all neighbors

From 0cf0dbf4417e620298be760d394028a4ddc2407d Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 16:44:49 -0600
Subject: [PATCH 132/146] Change to simple integers to access the buffers and
 the field arrays

This should make the dependency analysis easier on the compiler.

NOTE: The last commit succeeded and had no diffs after 1 timestep
compared to a reference run!
---
 src/framework/mpas_halo.F | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index f84dfeb1dc..2244f20fb8 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -564,6 +564,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
 
         ! Local variables
         integer :: i, bufstart, bufend
+        integer :: idxBuf, idxArr
         integer :: dim1, dim2
         integer :: i1, i2, j, iNeighbor, iReq
         integer :: iHalo, iEndp
@@ -681,8 +682,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         do iHalo = 1, nHalos
                             do j = 1, maxNSendList
                                 if (j <= nSendLists(iHalo,iEndp)) then
-                                    group % sendBuf(packOffsets(iEndp) + sendListDst(j,iHalo,iEndp)) = &
-                                        group % fields(i) % r1arr(sendListSrc(j,iHalo,iEndp))
+                                    bufIdx = packOffsets(iEndp) + sendListDst(j,iHalo,iEndp)
+                                    arrIdx = sendListSrc(j,iHalo,iEndp)
+                                    group % sendBuf(bufIdx) = group % fields(i) % r1arr(arrIdx)
                                 end if
                             end do
                         end do
@@ -713,8 +715,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                             do j = 1, maxNSendList
                                 do i1 = 1, dim1
                                     if (j <= nSendLists(iHalo,iEndp)) then
-                                        group % sendBuf(packOffsets(iEndp) + dim1 * (sendListDst(j,iHalo,iEndp) - 1) + i1) = &
-                                            group % fields(i) % r2arr(i1, sendListSrc(j,iHalo,iEndp))
+                                        bufIdx = packOffsets(iEndp) + dim1 * (sendListDst(j,iHalo,iEndp) - 1) + i1
+                                        arrIdx = sendListSrc(j,iHalo,iEndp)
+                                        group % sendBuf(bufIdx) = group % fields(i) % r2arr(i1,arrIdx)
                                     end if
                                 end do
                             end do
@@ -742,9 +745,10 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 do i2 = 1, dim2
                                 do i1 = 1, dim1
                                     if (j <= nSendLists(iHalo,iEndp)) then
-                                        group % sendBuf(packOffsets(iEndp) + dim1*dim2*(sendListDst(j,iHalo,iEndp) - 1) &
-                                                        + dim1*(i2-1) + i1) = &
-                                            group % fields(i) % r3arr(i1, i2, sendListSrc(j,iHalo,iEndp))
+                                        bufIdx = packOffsets(iEndp) + dim1*dim2*(sendListDst(j,iHalo,iEndp) - 1) &
+                                                 + dim1*(i2-1) + i1
+                                        arrIdx = sendListSrc(j,iHalo,iEndp)
+                                        group % sendBuf(bufIdx) = group % fields(i) % r3arr(i1,i2,arrIdx)
                                     end if
                                 end do
                                 end do
@@ -827,8 +831,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList
                                 if (j <= nRecvLists(iHalo,iEndp)) then
-                                    group % fields(i) % r1arr(recvListDst(j,iHalo,iEndp)) = &
-                                        group % recvBuf(unpackOffsets(iEndp) + recvListSrc(j,iHalo,iEndp))
+                                    arrIdx = recvListDst(j,iHalo,iEndp)
+                                    bufIdx = unpackOffsets(iEndp) + recvListSrc(j,iHalo,iEndp)
+                                    group % fields(i) % r1arr(arrIdx) = group % recvBuf(bufIdx)
                                 end if
                             end do
                         end do
@@ -844,8 +849,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                             do j = 1, maxNRecvList
                                 do i1 = 1, dim1
                                     if (j <= nRecvLists(iHalo,iEndp)) then
-                                        group % fields(i) % r2arr(i1, recvListDst(j,iHalo,iEndp)) = &
-                                            group % recvBuf(unpackOffsets(iEndp) + dim1 * (recvListSrc(j,iHalo,iEndp) - 1) + i1)
+                                        arrIdx = recvListDst(j,iHalo,iEndp)
+                                        bufIdx = unpackOffsets(iEndp) + dim1 * (recvListSrc(j,iHalo,iEndp) - 1) + i1
+                                        group % fields(i) % r2arr(i1, arrIdx) = group % recvBuf(bufIdx)
                                     end if
                                 end do
                             end do
@@ -863,9 +869,10 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 do i2 = 1, dim2
                                 do i1 = 1, dim1
                                     if (j <= nRecvLists(iHalo,iEndp)) then
-                                        group % fields(i) % r3arr(i1, i2, recvListDst(j,iHalo,iEndp)) = &
-                                            group % recvBuf(unpackOffsets(iEndp) + dim1*dim2*(recvListSrc(j,iHalo,iEndp) - 1) &
-                                                            + dim1*(i2-1) + i1)
+                                        arrIdx = recvListDst(j,iHalo,iEndp)
+                                        bufIdx = unpackOffsets(iEndp) + dim1*dim2*(recvListSrc(j,iHalo,iEndp) - 1) &
+                                                 + dim1*(i2-1) + i1
+                                        group % fields(i) % r3arr(i1, i2, arrIdx) = group % recvBuf(bufIdx)
                                     end if
                                 end do
                                 end do

From 14fbe88ffeb0078ab2a86cf392a0d33f7786b3d9 Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 18:29:38 -0600
Subject: [PATCH 133/146] Add kernels to unpacking loops and use a data present
 region to try to force GPUDirect MPI

NOTE: The last commit ran successfully and matched previous 1 step
results
---
 src/framework/mpas_halo.F | 57 ++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 22 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index 2244f20fb8..4fc6ca0e76 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -283,8 +283,8 @@ subroutine mpas_halo_exch_group_complete(domain, groupName, iErr)
         ! Always copy in the main data member first
         !$acc enter data copyin(newGroup)
         ! Then the data in the members of the type
-        ! !$acc enter data copyin(newGroup % recvBuf(:), newGroup % sendBuf(:))
-        !$acc enter data copyin(newGroup % sendBuf(:))
+        !$acc enter data copyin(newGroup % recvBuf(:), newGroup % sendBuf(:))
+        ! !$acc enter data copyin(newGroup % sendBuf(:))
         !$acc enter data copyin(newGroup % fields(:))
         do i = 1, newGroup % nFields
             !$acc enter data copyin(newGroup % fields(i))
@@ -399,7 +399,7 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr)
         deallocate(cursor % groupRecvCounts)
         !$acc exit data delete(cursor % sendBuf(:))
         deallocate(cursor % sendBuf)
-        ! !$acc exit data delete(cursor % recvBuf(:))
+        !$acc exit data delete(cursor % recvBuf(:))
         deallocate(cursor % recvBuf)
         deallocate(cursor % sendRequests)
         deallocate(cursor % recvRequests)
@@ -623,6 +623,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
 #endif
         rank = group % fields(1) % compactHaloInfo(8)
 
+        !$acc data present(group % recvBuf(:), group % sendBuf(:))
 
         !
         ! Initiate non-blocking MPI receives for all neighbors
@@ -682,9 +683,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         do iHalo = 1, nHalos
                             do j = 1, maxNSendList
                                 if (j <= nSendLists(iHalo,iEndp)) then
-                                    bufIdx = packOffsets(iEndp) + sendListDst(j,iHalo,iEndp)
-                                    arrIdx = sendListSrc(j,iHalo,iEndp)
-                                    group % sendBuf(bufIdx) = group % fields(i) % r1arr(arrIdx)
+                                    idxBuf = packOffsets(iEndp) + sendListDst(j,iHalo,iEndp)
+                                    idxArr = sendListSrc(j,iHalo,iEndp)
+                                    group % sendBuf(idxBuf) = group % fields(i) % r1arr(idxArr)
                                 end if
                             end do
                         end do
@@ -715,9 +716,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                             do j = 1, maxNSendList
                                 do i1 = 1, dim1
                                     if (j <= nSendLists(iHalo,iEndp)) then
-                                        bufIdx = packOffsets(iEndp) + dim1 * (sendListDst(j,iHalo,iEndp) - 1) + i1
-                                        arrIdx = sendListSrc(j,iHalo,iEndp)
-                                        group % sendBuf(bufIdx) = group % fields(i) % r2arr(i1,arrIdx)
+                                        idxBuf = packOffsets(iEndp) + dim1 * (sendListDst(j,iHalo,iEndp) - 1) + i1
+                                        idxArr = sendListSrc(j,iHalo,iEndp)
+                                        group % sendBuf(idxBuf) = group % fields(i) % r2arr(i1,idxArr)
                                     end if
                                 end do
                             end do
@@ -745,10 +746,10 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 do i2 = 1, dim2
                                 do i1 = 1, dim1
                                     if (j <= nSendLists(iHalo,iEndp)) then
-                                        bufIdx = packOffsets(iEndp) + dim1*dim2*(sendListDst(j,iHalo,iEndp) - 1) &
+                                        idxBuf = packOffsets(iEndp) + dim1*dim2*(sendListDst(j,iHalo,iEndp) - 1) &
                                                  + dim1*(i2-1) + i1
-                                        arrIdx = sendListSrc(j,iHalo,iEndp)
-                                        group % sendBuf(bufIdx) = group % fields(i) % r3arr(i1,i2,arrIdx)
+                                        idxArr = sendListSrc(j,iHalo,iEndp)
+                                        group % sendBuf(idxBuf) = group % fields(i) % r3arr(i1,i2,idxArr)
                                     end if
                                 end do
                                 end do
@@ -761,7 +762,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                 end select
             end if
         end do
-        !$acc update host(group % sendBuf(:))
+        ! !$acc update host(group % sendBuf(:))
 
         !
         ! Initiate non-blocking sends to all neighbors
@@ -828,15 +829,18 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         !
                         ! Unpack recv buffer from all neighbors for current field
                         !
+                        !$acc kernels default(present)
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList
                                 if (j <= nRecvLists(iHalo,iEndp)) then
-                                    arrIdx = recvListDst(j,iHalo,iEndp)
-                                    bufIdx = unpackOffsets(iEndp) + recvListSrc(j,iHalo,iEndp)
-                                    group % fields(i) % r1arr(arrIdx) = group % recvBuf(bufIdx)
+                                    idxArr = recvListDst(j,iHalo,iEndp)
+                                    idxBuf = unpackOffsets(iEndp) + recvListSrc(j,iHalo,iEndp)
+                                    group % fields(i) % r1arr(idxArr) = group % recvBuf(idxBuf)
                                 end if
                             end do
                         end do
+                        !$acc end kernels
+                        !$acc exit data copyout(group % fields(i) % r1arr(:))
 
                     !
                     ! Unpacking code for 2-d real-valued fields
@@ -845,17 +849,20 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         !
                         ! Unpack recv buffer from all neighbors for current field
                         !
+                        !$acc kernels default(present)
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList
                                 do i1 = 1, dim1
                                     if (j <= nRecvLists(iHalo,iEndp)) then
-                                        arrIdx = recvListDst(j,iHalo,iEndp)
-                                        bufIdx = unpackOffsets(iEndp) + dim1 * (recvListSrc(j,iHalo,iEndp) - 1) + i1
-                                        group % fields(i) % r2arr(i1, arrIdx) = group % recvBuf(bufIdx)
+                                        idxArr = recvListDst(j,iHalo,iEndp)
+                                        idxBuf = unpackOffsets(iEndp) + dim1 * (recvListSrc(j,iHalo,iEndp) - 1) + i1
+                                        group % fields(i) % r2arr(i1, idxArr) = group % recvBuf(idxBuf)
                                     end if
                                 end do
                             end do
                         end do
+                        !$acc end kernels
+                        !$acc exit data copyout(group % fields(i) % r2arr(:,:))
 
                     !
                     ! Unpacking code for 3-d real-valued fields
@@ -864,26 +871,32 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         !
                         ! Unpack recv buffer from all neighbors for current field
                         !
+                        !$acc kernels default(present)
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList
                                 do i2 = 1, dim2
                                 do i1 = 1, dim1
                                     if (j <= nRecvLists(iHalo,iEndp)) then
-                                        arrIdx = recvListDst(j,iHalo,iEndp)
-                                        bufIdx = unpackOffsets(iEndp) + dim1*dim2*(recvListSrc(j,iHalo,iEndp) - 1) &
+                                        idxArr = recvListDst(j,iHalo,iEndp)
+                                        idxBuf = unpackOffsets(iEndp) + dim1*dim2*(recvListSrc(j,iHalo,iEndp) - 1) &
                                                  + dim1*(i2-1) + i1
-                                        group % fields(i) % r3arr(i1, i2, arrIdx) = group % recvBuf(bufIdx)
+                                        group % fields(i) % r3arr(i1, i2, idxArr) = group % recvBuf(idxBuf)
                                     end if
                                 end do
                                 end do
                             end do
                         end do
+                        !$acc end kernels
+                        !$acc exit data copyout(group % fields(i) % r3arr(:,:,:))
 
                     end select
                 end if
             end do
         end do
 
+        ! For the present(group % recvBuf(:), group % sendBuf(:))
+        !$acc end data
+
         !
         ! Nullify array pointers - not necessary for correctness, but helpful when debugging
         ! to not leave pointers to what might later be incorrect targets

From 36ebbe565350052934b30d51cdc4bd2fdf7872f6 Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 19:05:10 -0600
Subject: [PATCH 134/146] Change from data copyin regions to enter/exit
 directives for the r?arr variables

Last run failed with CUDA_ERROR_ILLEGAL_ADDRESS, I think keeping these
on the GPU would help!
---
 src/framework/mpas_halo.F | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index 4fc6ca0e76..fb51f3a9db 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -674,7 +674,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r1arr, timeLevel=group % fields(i) % timeLevel)
 
-                    !$acc data copyin(group % fields(i) % r1arr(:))
+                    ! !$acc data copyin(group % fields(i) % r1arr(:))
+                    !$acc enter data copyin(group % fields(i) % r1arr(:))
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
@@ -691,7 +692,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         end do
                     end do
                     !$acc end kernels
-                    !$acc end data
+                    ! !$acc end data
 
                 !
                 ! Packing code for 2-d real-valued fields
@@ -707,7 +708,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     ! Use data regions for specificity and so the reference or attachment counters are easier to make sense of
                     ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action'
                     ! !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
-                    !$acc data copyin(group % fields(i) % r2arr(:,:))
+                    ! !$acc data copyin(group % fields(i) % r2arr(:,:))
+                    !$acc enter data copyin(group % fields(i) % r2arr(:,:))
 
                     ! Kernels is good enough, use default present to force a run-time error if programmer forgot something
                     !$acc kernels default(present)
@@ -725,7 +727,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         end do
                     end do
                     !$acc end kernels
-                    !$acc end data
+                    ! !$acc end data
                     ! !$acc end data
 
                 !
@@ -734,7 +736,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                 case (3)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r3arr, group % fields(i) % timeLevel)
-                    !$acc data copyin(group % fields(i) % r3arr(:,:,:))
+                    ! !$acc data copyin(group % fields(i) % r3arr(:,:,:))
+                    !$acc enter data copyin(group % fields(i) % r3arr(:,:,:))
 
                     !
                     ! Pack send buffer for all neighbors for current field
@@ -757,7 +760,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         end do
                     end do
                     !$acc end kernels
-                    !$acc end data
+                    ! !$acc end data
 
                 end select
             end if

From c7454a5bafa658b7892f5bb07a05a431439e57ed Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 19:50:34 -0600
Subject: [PATCH 135/146] Re-enable update host for sendBuf, add update device
 recvBuf

Last commit gave me some big differences, let's see if this helps.

If this helps, then that means I wasn't using GPU-aware MPI routines
like I thought...
---
 src/framework/mpas_halo.F | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index fb51f3a9db..e576e6c13f 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -765,7 +765,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                 end select
             end if
         end do
-        ! !$acc update host(group % sendBuf(:))
+        !$acc update host(group % sendBuf(:))
 
         !
         ! Initiate non-blocking sends to all neighbors

From b29e3f04d33f8c22212129f54e5fed7361de2cfd Mon Sep 17 00:00:00 2001
From: "G. Dylan Dickerson" <gdicker@ucar.edu>
Date: Wed, 7 May 2025 20:02:30 -0600
Subject: [PATCH 136/146] Remove update directives, use acc host_data
 use_device(...) near MPI calls instead

Last commit still had answer differences.

NOTE: This commit does too
---
 src/framework/mpas_halo.F | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index e576e6c13f..d3b77c780b 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -633,9 +633,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                 bufstart = group % groupRecvOffsets(i)
                 bufend = group % groupRecvOffsets(i) + group % groupRecvCounts(i) - 1
 !TO DO: how do we determine appropriate type here?
+                !$acc host_data use_device(group % recvBuf)
                 call MPI_Irecv(group % recvBuf(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
                                group % groupRecvNeighbors(i), group % groupRecvNeighbors(i), comm, &
                                group % recvRequests(i), mpi_ierr)
+                !$acc end host_data
             else
                 group % recvRequests(i) = MPI_REQUEST_NULL
             end if
@@ -765,7 +767,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                 end select
             end if
         end do
-        !$acc update host(group % sendBuf(:))
 
         !
         ! Initiate non-blocking sends to all neighbors
@@ -775,9 +776,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                 bufstart = group % groupSendOffsets(i)
                 bufend = group % groupSendOffsets(i) + group % groupSendCounts(i) - 1
 !TO DO: how do we determine appropriate type here?
+                !$acc host_data use_device(group % sendBuf)
                 call MPI_Isend(group % sendBuf(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
                                group % groupSendNeighbors(i), rank, comm, &
                                group % sendRequests(i), mpi_ierr)
+                !$acc end host_data
             else
                 group % sendRequests(i) = MPI_REQUEST_NULL
             end if

From 5b227db4a74d4e66abaf99d691b2cb9bfcc079f4 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Wed, 6 Aug 2025 16:33:26 -0600
Subject: [PATCH 137/146] checkpoints: acc pack + cuda aware mpi working

---
 src/framework/mpas_halo.F | 91 +++++++++++++++++++++++++++++++--------
 1 file changed, 74 insertions(+), 17 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index d3b77c780b..cb432b05ef 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -280,6 +280,12 @@ subroutine mpas_halo_exch_group_complete(domain, groupName, iErr)
 
         call refactor_lists(domain, groupName, iErr)
 
+        if ( newGroup% nGroupSendNeighbors <=0 ) then
+            !call mpas_log_write('No send neighbors for halo exchange group '//trim(groupName))
+            return
+        end if    
+
+
         ! Always copy in the main data member first
         !$acc enter data copyin(newGroup)
         ! Then the data in the members of the type
@@ -541,6 +547,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
         use mpas_derived_types, only : domain_type, mpas_halo_group, MPAS_HALO_REAL, MPAS_LOG_CRIT
         use mpas_pool_routines, only : mpas_pool_get_array
         use mpas_log, only : mpas_log_write
+        use mpas_kind_types, only : RKIND
 
         ! Parameters
 #ifdef MPAS_USE_MPI_F08
@@ -588,7 +595,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
         integer :: maxNRecvList
         integer, dimension(:,:,:), CONTIGUOUS pointer :: recvListSrc, recvListDst
         integer, dimension(:), CONTIGUOUS pointer :: unpackOffsets
-
+        real (kind=RKIND), dimension(:), pointer :: sendBufptr, recvBufptr
 
         if (present(iErr)) then
            iErr = 0
@@ -611,6 +618,10 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 messageType=MPAS_LOG_CRIT)
         end if
 
+        if ( group% nGroupSendNeighbors <=0 ) then
+            !call mpas_log_write('group has no halo exchanges: '//trim(groupName))
+            return
+        end if    
         !
         ! Get the rank of this task and the MPI communicator to use from the first field in
         ! the group; all fields should be using the same communicator, so this should not
@@ -623,7 +634,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
 #endif
         rank = group % fields(1) % compactHaloInfo(8)
 
-        !$acc data present(group % recvBuf(:), group % sendBuf(:))
+        sendBufptr => group % sendBuf
+        recvBufptr => group % recvBuf
+
+        !!!$acc data present(group % recvBuf(:), group % sendBuf(:))
+        !$acc data present(sendBufptr,recvBufptr)
 
         !
         ! Initiate non-blocking MPI receives for all neighbors
@@ -633,8 +648,12 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                 bufstart = group % groupRecvOffsets(i)
                 bufend = group % groupRecvOffsets(i) + group % groupRecvCounts(i) - 1
 !TO DO: how do we determine appropriate type here?
-                !$acc host_data use_device(group % recvBuf)
-                call MPI_Irecv(group % recvBuf(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
+                ! !$acc host_data use_device(group % recvBuf)
+                ! call MPI_Irecv(group % recvBuf(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
+                !                group % groupRecvNeighbors(i), group % groupRecvNeighbors(i), comm, &
+                !                group % recvRequests(i), mpi_ierr)
+                !$acc host_data use_device(recvBufptr)
+                call MPI_Irecv(recvBufptr(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
                                group % groupRecvNeighbors(i), group % groupRecvNeighbors(i), comm, &
                                group % recvRequests(i), mpi_ierr)
                 !$acc end host_data
@@ -695,7 +714,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     end do
                     !$acc end kernels
                     ! !$acc end data
-
+                    !!$acc update device(group % sendBuf(:))
                 !
                 ! Packing code for 2-d real-valued fields
                 !
@@ -731,7 +750,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     !$acc end kernels
                     ! !$acc end data
                     ! !$acc end data
-
+                    !!$acc update device(group % sendBuf(:))
                 !
                 ! Packing code for 3-d real-valued fields
                 !
@@ -763,11 +782,25 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     end do
                     !$acc end kernels
                     ! !$acc end data
+                    !!$acc update device(group % sendBuf(:))
 
                 end select
             end if
         end do
 
+        do i = 1, group % nFields
+         if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
+          select case (group % fields(i) % nDims)
+           case (1)  
+            !$acc exit data delete(group % fields(i) % r1arr(:))
+           case (2)  
+            !$acc exit data delete(group % fields(i) % r2arr(:,:))
+           case (3)  
+            !$acc exit data delete(group % fields(i) % r3arr(:,:,:))
+          end select
+         end if
+        end do
+
         !
         ! Initiate non-blocking sends to all neighbors
         !
@@ -776,8 +809,12 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                 bufstart = group % groupSendOffsets(i)
                 bufend = group % groupSendOffsets(i) + group % groupSendCounts(i) - 1
 !TO DO: how do we determine appropriate type here?
-                !$acc host_data use_device(group % sendBuf)
-                call MPI_Isend(group % sendBuf(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
+                ! !$acc host_data use_device(group % sendBuf)
+                ! call MPI_Isend(group % sendBuf(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
+                !                group % groupSendNeighbors(i), rank, comm, &
+                !                group % sendRequests(i), mpi_ierr)
+                !$acc host_data use_device(sendBufptr)
+                call MPI_Isend(sendBufptr(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
                                group % groupSendNeighbors(i), rank, comm, &
                                group % sendRequests(i), mpi_ierr)
                 !$acc end host_data
@@ -835,7 +872,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         !
                         ! Unpack recv buffer from all neighbors for current field
                         !
-                        !$acc kernels default(present)
+                        !$acc update host(group % recvBuf(:))
+                        !$acc wait
+                        !!$acc kernels default(present)
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList
                                 if (j <= nRecvLists(iHalo,iEndp)) then
@@ -845,8 +884,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 end if
                             end do
                         end do
-                        !$acc end kernels
-                        !$acc exit data copyout(group % fields(i) % r1arr(:))
+                        !!$acc end kernels
+                        !!$acc exit data copyout(group % fields(i) % r1arr(:))
 
                     !
                     ! Unpacking code for 2-d real-valued fields
@@ -855,7 +894,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         !
                         ! Unpack recv buffer from all neighbors for current field
                         !
-                        !$acc kernels default(present)
+                        !$acc update host(group % recvBuf(:))
+                        !$acc wait
+                        !!$acc kernels default(present)
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList
                                 do i1 = 1, dim1
@@ -867,8 +908,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 end do
                             end do
                         end do
-                        !$acc end kernels
-                        !$acc exit data copyout(group % fields(i) % r2arr(:,:))
+                        !!$acc end kernels
+                        !!$acc exit data copyout(group % fields(i) % r2arr(:,:))
 
                     !
                     ! Unpacking code for 3-d real-valued fields
@@ -877,7 +918,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         !
                         ! Unpack recv buffer from all neighbors for current field
                         !
-                        !$acc kernels default(present)
+                        !$acc update host(group % recvBuf(:))
+                        !$acc wait
+                        !!$acc kernels default(present)
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList
                                 do i2 = 1, dim2
@@ -892,8 +935,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 end do
                             end do
                         end do
-                        !$acc end kernels
-                        !$acc exit data copyout(group % fields(i) % r3arr(:,:,:))
+                        !!$acc end kernels
+                        !!$acc exit data copyout(group % fields(i) % r3arr(:,:,:))
 
                     end select
                 end if
@@ -903,6 +946,20 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
         ! For the present(group % recvBuf(:), group % sendBuf(:))
         !$acc end data
 
+        ! do i = 1, group % nFields
+        !  if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
+        !   select case (group % fields(i) % nDims)
+        !    case (1)  
+        !     !$acc exit data copyout(group % fields(i) % r1arr(:))
+        !    case (2)  
+        !     !$acc exit data copyout(group % fields(i) % r2arr(:,:))
+        !    case (3)  
+        !     !$acc exit data copyout(group % fields(i) % r3arr(:,:,:))
+        !   end select
+        !  end if
+        ! end do
+
+        
         !
         ! Nullify array pointers - not necessary for correctness, but helpful when debugging
         ! to not leave pointers to what might later be incorrect targets

From df7c5fb052e3fbf3b4dae8ac7d86c9887425f069 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Thu, 7 Aug 2025 10:10:55 -0600
Subject: [PATCH 138/146] seems to be working

---
 src/framework/mpas_halo.F | 67 +++++++++++++++++++++++----------------
 1 file changed, 40 insertions(+), 27 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index cb432b05ef..9062bc4537 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -788,19 +788,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
             end if
         end do
 
-        do i = 1, group % nFields
-         if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
-          select case (group % fields(i) % nDims)
-           case (1)  
-            !$acc exit data delete(group % fields(i) % r1arr(:))
-           case (2)  
-            !$acc exit data delete(group % fields(i) % r2arr(:,:))
-           case (3)  
-            !$acc exit data delete(group % fields(i) % r3arr(:,:,:))
-          end select
-         end if
-        end do
-
         !
         ! Initiate non-blocking sends to all neighbors
         !
@@ -872,9 +859,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         !
                         ! Unpack recv buffer from all neighbors for current field
                         !
-                        !$acc update host(group % recvBuf(:))
-                        !$acc wait
-                        !!$acc kernels default(present)
+                        !!$acc update host(group % recvBuf(:))
+                        !!$acc wait
+                        !$acc kernels default(present) 
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList
                                 if (j <= nRecvLists(iHalo,iEndp)) then
@@ -884,7 +871,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 end if
                             end do
                         end do
-                        !!$acc end kernels
+                        !$acc end kernels
                         !!$acc exit data copyout(group % fields(i) % r1arr(:))
 
                     !
@@ -894,9 +881,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         !
                         ! Unpack recv buffer from all neighbors for current field
                         !
-                        !$acc update host(group % recvBuf(:))
-                        !$acc wait
-                        !!$acc kernels default(present)
+                        !!$acc update host(group % recvBuf(:))
+                        !!$acc wait
+                        !$acc kernels default(present)
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList
                                 do i1 = 1, dim1
@@ -908,7 +895,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 end do
                             end do
                         end do
-                        !!$acc end kernels
+                        !$acc end kernels
                         !!$acc exit data copyout(group % fields(i) % r2arr(:,:))
 
                     !
@@ -918,9 +905,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         !
                         ! Unpack recv buffer from all neighbors for current field
                         !
-                        !$acc update host(group % recvBuf(:))
-                        !$acc wait
-                        !!$acc kernels default(present)
+                        !!$acc update host(group % recvBuf(:))
+                        !!$acc wait
+                        !$acc kernels default(present)
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList
                                 do i2 = 1, dim2
@@ -935,7 +922,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 end do
                             end do
                         end do
-                        !!$acc end kernels
+                        !$acc end kernels
                         !!$acc exit data copyout(group % fields(i) % r3arr(:,:,:))
 
                     end select
@@ -943,9 +930,35 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
             end do
         end do
 
+        do i = 1, group % nFields
+         if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
+          select case (group % fields(i) % nDims)
+           case (1)  
+            !$acc update self(group % fields(i) % r1arr(:))
+           case (2)  
+            !$acc update self(group % fields(i) % r2arr(:,:))
+           case (3)  
+            !$acc update self(group % fields(i) % r3arr(:,:,:))
+          end select
+         end if
+        end do
+
+        do i = 1, group % nFields
+         if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
+          select case (group % fields(i) % nDims)
+           case (1)  
+            !$acc exit data delete(group % fields(i) % r1arr(:))
+           case (2)  
+            !$acc exit data delete(group % fields(i) % r2arr(:,:))
+           case (3)  
+            !$acc exit data delete(group % fields(i) % r3arr(:,:,:))
+          end select
+         end if
+        end do
+
         ! For the present(group % recvBuf(:), group % sendBuf(:))
         !$acc end data
-
+        ! !$acc wait
         ! do i = 1, group % nFields
         !  if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
         !   select case (group % fields(i) % nDims)
@@ -958,7 +971,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
         !   end select
         !  end if
         ! end do
-
+        ! !$acc wait
         
         !
         ! Nullify array pointers - not necessary for correctness, but helpful when debugging

From b00e437a7e7da57961c91f83f64c9f4091ef2335 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Thu, 7 Aug 2025 17:17:45 -0600
Subject: [PATCH 139/146] Optimized packing and unpacking loops. Adding timers
 and other cleanup

---
 src/framework/mpas_halo.F | 142 ++++++++++++++++++++------------------
 1 file changed, 73 insertions(+), 69 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index 9062bc4537..7f96e0d397 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -17,6 +17,15 @@
 !>  communicating the halos of all fields in a group.
 !
 !-----------------------------------------------------------------------
+
+#ifdef MPAS_OPENACC
+#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
+#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
+#else
+#define MPAS_ACC_TIMER_START(X)
+#define MPAS_ACC_TIMER_STOP(X)
+#endif
+
 module mpas_halo
 
     implicit none
@@ -281,9 +290,8 @@ subroutine mpas_halo_exch_group_complete(domain, groupName, iErr)
         call refactor_lists(domain, groupName, iErr)
 
         if ( newGroup% nGroupSendNeighbors <=0 ) then
-            !call mpas_log_write('No send neighbors for halo exchange group '//trim(groupName))
             return
-        end if    
+        end if
 
 
         ! Always copy in the main data member first
@@ -547,7 +555,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
         use mpas_derived_types, only : domain_type, mpas_halo_group, MPAS_HALO_REAL, MPAS_LOG_CRIT
         use mpas_pool_routines, only : mpas_pool_get_array
         use mpas_log, only : mpas_log_write
-        use mpas_kind_types, only : RKIND
+        use mpas_timer, only : mpas_timer_start, mpas_timer_stop
+
 
         ! Parameters
 #ifdef MPAS_USE_MPI_F08
@@ -595,12 +604,13 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
         integer :: maxNRecvList
         integer, dimension(:,:,:), CONTIGUOUS pointer :: recvListSrc, recvListDst
         integer, dimension(:), CONTIGUOUS pointer :: unpackOffsets
-        real (kind=RKIND), dimension(:), pointer :: sendBufptr, recvBufptr
+
 
         if (present(iErr)) then
            iErr = 0
         end if
 
+        
         !
         ! Find this halo exhange group in the list of groups
         !
@@ -618,10 +628,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 messageType=MPAS_LOG_CRIT)
         end if
 
-        if ( group% nGroupSendNeighbors <=0 ) then
-            !call mpas_log_write('group has no halo exchanges: '//trim(groupName))
+        if ( group% nGroupSendNeighbors <= 0 ) then
             return
-        end if    
+        end if
+
+        call mpas_timer_start('full_halo_exch')
         !
         ! Get the rank of this task and the MPI communicator to use from the first field in
         ! the group; all fields should be using the same communicator, so this should not
@@ -634,11 +645,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
 #endif
         rank = group % fields(1) % compactHaloInfo(8)
 
-        sendBufptr => group % sendBuf
-        recvBufptr => group % recvBuf
-
-        !!!$acc data present(group % recvBuf(:), group % sendBuf(:))
-        !$acc data present(sendBufptr,recvBufptr)
+        !$acc data present(group % recvBuf(:), group % sendBuf(:))
 
         !
         ! Initiate non-blocking MPI receives for all neighbors
@@ -648,12 +655,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                 bufstart = group % groupRecvOffsets(i)
                 bufend = group % groupRecvOffsets(i) + group % groupRecvCounts(i) - 1
 !TO DO: how do we determine appropriate type here?
-                ! !$acc host_data use_device(group % recvBuf)
-                ! call MPI_Irecv(group % recvBuf(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
-                !                group % groupRecvNeighbors(i), group % groupRecvNeighbors(i), comm, &
-                !                group % recvRequests(i), mpi_ierr)
-                !$acc host_data use_device(recvBufptr)
-                call MPI_Irecv(recvBufptr(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
+                !$acc host_data use_device(group % recvBuf)
+                call MPI_Irecv(group % recvBuf(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
                                group % groupRecvNeighbors(i), group % groupRecvNeighbors(i), comm, &
                                group % recvRequests(i), mpi_ierr)
                 !$acc end host_data
@@ -695,14 +698,18 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r1arr, timeLevel=group % fields(i) % timeLevel)
 
-                    ! !$acc data copyin(group % fields(i) % r1arr(:))
+                    MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
                     !$acc enter data copyin(group % fields(i) % r1arr(:))
+                    MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
-                    !$acc kernels default(present)
+                    call mpas_timer_start('packing_halo_exch')
+                    !$acc parallel default(present)
+                    !$acc loop gang collapse(2)
                     do iEndp = 1, nSendEndpts
                         do iHalo = 1, nHalos
+                            !$acc loop vector
                             do j = 1, maxNSendList
                                 if (j <= nSendLists(iHalo,iEndp)) then
                                     idxBuf = packOffsets(iEndp) + sendListDst(j,iHalo,iEndp)
@@ -712,9 +719,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                             end do
                         end do
                     end do
-                    !$acc end kernels
-                    ! !$acc end data
-                    !!$acc update device(group % sendBuf(:))
+                    !$acc end parallel
+                    call mpas_timer_stop('packing_halo_exch')
+                    
                 !
                 ! Packing code for 2-d real-valued fields
                 !
@@ -725,18 +732,23 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
-
+                                             
                     ! Use data regions for specificity and so the reference or attachment counters are easier to make sense of
                     ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action'
                     ! !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
                     ! !$acc data copyin(group % fields(i) % r2arr(:,:))
+                    MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
                     !$acc enter data copyin(group % fields(i) % r2arr(:,:))
+                    MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
 
+                    call mpas_timer_start('packing_halo_exch')
                     ! Kernels is good enough, use default present to force a run-time error if programmer forgot something
-                    !$acc kernels default(present)
+                    !$acc parallel default(present)
+                    !$acc loop gang collapse(3)
                     do iEndp = 1, nSendEndpts
                         do iHalo = 1, nHalos
                             do j = 1, maxNSendList
+                                !$acc loop vector
                                 do i1 = 1, dim1
                                     if (j <= nSendLists(iHalo,iEndp)) then
                                         idxBuf = packOffsets(iEndp) + dim1 * (sendListDst(j,iHalo,iEndp) - 1) + i1
@@ -747,27 +759,30 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                             end do
                         end do
                     end do
-                    !$acc end kernels
-                    ! !$acc end data
-                    ! !$acc end data
-                    !!$acc update device(group % sendBuf(:))
+                    !$acc end parallel
+                    call mpas_timer_stop('packing_halo_exch')
+                    
                 !
                 ! Packing code for 3-d real-valued fields
                 !
                 case (3)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r3arr, group % fields(i) % timeLevel)
-                    ! !$acc data copyin(group % fields(i) % r3arr(:,:,:))
+                    MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
                     !$acc enter data copyin(group % fields(i) % r3arr(:,:,:))
+                    MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
 
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
-                    !$acc kernels default(present)
+                    call mpas_timer_start('packing_halo_exch')
+                    !$acc parallel default(present)
+                    !$acc loop gang collapse(4)
                     do iEndp = 1, nSendEndpts
                         do iHalo = 1, nHalos
                             do j = 1, maxNSendList
                                 do i2 = 1, dim2
+                                !$acc loop vector
                                 do i1 = 1, dim1
                                     if (j <= nSendLists(iHalo,iEndp)) then
                                         idxBuf = packOffsets(iEndp) + dim1*dim2*(sendListDst(j,iHalo,iEndp) - 1) &
@@ -780,9 +795,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                             end do
                         end do
                     end do
-                    !$acc end kernels
-                    ! !$acc end data
-                    !!$acc update device(group % sendBuf(:))
+                    !$acc end parallel
+                    call mpas_timer_stop('packing_halo_exch')
 
                 end select
             end if
@@ -796,12 +810,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                 bufstart = group % groupSendOffsets(i)
                 bufend = group % groupSendOffsets(i) + group % groupSendCounts(i) - 1
 !TO DO: how do we determine appropriate type here?
-                ! !$acc host_data use_device(group % sendBuf)
-                ! call MPI_Isend(group % sendBuf(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
-                !                group % groupSendNeighbors(i), rank, comm, &
-                !                group % sendRequests(i), mpi_ierr)
-                !$acc host_data use_device(sendBufptr)
-                call MPI_Isend(sendBufptr(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
+                !$acc host_data use_device(group % sendBuf)
+                call MPI_Isend(group % sendBuf(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
                                group % groupSendNeighbors(i), rank, comm, &
                                group % sendRequests(i), mpi_ierr)
                 !$acc end host_data
@@ -859,10 +869,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         !
                         ! Unpack recv buffer from all neighbors for current field
                         !
-                        !!$acc update host(group % recvBuf(:))
-                        !!$acc wait
-                        !$acc kernels default(present) 
+                        call mpas_timer_start('unpacking_halo_exch')
+                        !$acc parallel default(present)
+                        !$acc loop gang
                         do iHalo = 1, nHalos
+                            !$acc loop vector
                             do j = 1, maxNRecvList
                                 if (j <= nRecvLists(iHalo,iEndp)) then
                                     idxArr = recvListDst(j,iHalo,iEndp)
@@ -871,8 +882,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 end if
                             end do
                         end do
-                        !$acc end kernels
-                        !!$acc exit data copyout(group % fields(i) % r1arr(:))
+                        !$acc end parallel
+                        call mpas_timer_stop('unpacking_halo_exch')
 
                     !
                     ! Unpacking code for 2-d real-valued fields
@@ -881,11 +892,13 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         !
                         ! Unpack recv buffer from all neighbors for current field
                         !
-                        !!$acc update host(group % recvBuf(:))
-                        !!$acc wait
-                        !$acc kernels default(present)
+                        call mpas_timer_start('unpacking_halo_exch')
+                        !$acc parallel default(present)
+                        !$acc loop gang
                         do iHalo = 1, nHalos
+                            !$acc loop worker
                             do j = 1, maxNRecvList
+                                !$acc loop vector
                                 do i1 = 1, dim1
                                     if (j <= nRecvLists(iHalo,iEndp)) then
                                         idxArr = recvListDst(j,iHalo,iEndp)
@@ -895,8 +908,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 end do
                             end do
                         end do
-                        !$acc end kernels
-                        !!$acc exit data copyout(group % fields(i) % r2arr(:,:))
+                        !$acc end parallel
+                        call mpas_timer_stop('unpacking_halo_exch')
 
                     !
                     ! Unpacking code for 3-d real-valued fields
@@ -905,11 +918,12 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         !
                         ! Unpack recv buffer from all neighbors for current field
                         !
-                        !!$acc update host(group % recvBuf(:))
-                        !!$acc wait
-                        !$acc kernels default(present)
+                        call mpas_timer_start('unpacking_halo_exch')
+                        !$acc parallel default(present)
+                        !$acc loop gang collapse(2)
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList
+                                !$acc loop vector collapse(2)
                                 do i2 = 1, dim2
                                 do i1 = 1, dim1
                                     if (j <= nRecvLists(iHalo,iEndp)) then
@@ -922,14 +936,15 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                                 end do
                             end do
                         end do
-                        !$acc end kernels
-                        !!$acc exit data copyout(group % fields(i) % r3arr(:,:,:))
+                        !$acc end parallel
+                        call mpas_timer_stop('unpacking_halo_exch')
 
                     end select
                 end if
             end do
         end do
 
+        MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
         do i = 1, group % nFields
          if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
           select case (group % fields(i) % nDims)
@@ -958,20 +973,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
 
         ! For the present(group % recvBuf(:), group % sendBuf(:))
         !$acc end data
-        ! !$acc wait
-        ! do i = 1, group % nFields
-        !  if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
-        !   select case (group % fields(i) % nDims)
-        !    case (1)  
-        !     !$acc exit data copyout(group % fields(i) % r1arr(:))
-        !    case (2)  
-        !     !$acc exit data copyout(group % fields(i) % r2arr(:,:))
-        !    case (3)  
-        !     !$acc exit data copyout(group % fields(i) % r3arr(:,:,:))
-        !   end select
-        !  end if
-        ! end do
-        ! !$acc wait
+        MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
         
         !
         ! Nullify array pointers - not necessary for correctness, but helpful when debugging
@@ -992,6 +994,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
         !
         call MPI_Waitall(group % nGroupSendNeighbors, group % sendRequests, MPI_STATUSES_IGNORE, mpi_ierr)
 
+        call mpas_timer_stop('full_halo_exch')
+
     end subroutine mpas_halo_exch_group_full_halo_exch
 
 

From 354c91a1d1ef7026d933768a6edb39bddb36ad42 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Tue, 12 Aug 2025 17:13:52 -0600
Subject: [PATCH 140/146] Working savepoint

---
 .../dynamics/mpas_atm_time_integration.F      | 107 +++++++++---------
 src/core_atmosphere/mpas_atm_core.F           |   3 +-
 src/core_atmosphere/mpas_atm_halos.F          |   3 +-
 .../physics/mpas_atmphys_todynamics.F         |   3 +-
 src/framework/mpas_dmpar.F                    |  11 +-
 src/framework/mpas_halo.F                     |  67 +++++++----
 6 files changed, 112 insertions(+), 82 deletions(-)

diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 6296130382..8181c2ad84 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -43,12 +43,13 @@ module atm_time_integration
    ! in a named group
    !
    abstract interface
-      subroutine halo_exchange_routine(domain, halo_group, ierr)
+      subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr)
 
          use mpas_derived_types, only : domain_type
 
          type (domain_type), intent(inout) :: domain
          character(len=*), intent(in) :: halo_group
+         logical, intent(in), optional :: withGPUAwareMPI
          integer, intent(out), optional :: ierr
 
       end subroutine halo_exchange_routine
@@ -2041,9 +2042,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
       call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
       call mpas_pool_get_array(diag, 'pressure_p', pressure_p)
       call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
-      !$acc update self(theta_m,scalars_1,pressure_p,rtheta_p)
-      call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p')
-      !$acc update device(theta_m,scalars_1,pressure_p,rtheta_p)
+      !!$acc update self(theta_m,scalars_1,pressure_p,rtheta_p)
+      call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p', .true.)
+      !!$acc update device(theta_m,scalars_1,pressure_p,rtheta_p)
       MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
       call mpas_timer_start('atm_rk_integration_setup')
@@ -2132,9 +2133,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
       MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
       call mpas_pool_get_array(diag, 'exner', exner)      
-      !$acc update self(exner)
-         call exchange_halo_group(domain, 'dynamics:exner')
-      !$acc update device(exner)
+      !!$acc update self(exner)
+         call exchange_halo_group(domain, 'dynamics:exner', .true.)
+      !!$acc update device(exner)
       MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
          !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
@@ -2216,9 +2217,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 ! tend_u
             MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
             call mpas_pool_get_array(tend, 'u', tend_u)
-            !$acc update self(tend_u)
-            call exchange_halo_group(domain, 'dynamics:tend_u')
-            !$acc update device(tend_u)
+            !!$acc update self(tend_u)
+            call exchange_halo_group(domain, 'dynamics:tend_u', .true.)
+            !!$acc update device(tend_u)
             MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
    
             call mpas_timer_start('small_step_prep')
@@ -2297,9 +2298,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
                MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
                call mpas_pool_get_array(diag, 'rho_pp', rho_pp)
-               !$acc update self(rho_pp)
-               call exchange_halo_group(domain, 'dynamics:rho_pp')
-               !$acc update device(rho_pp)
+               !!$acc update self(rho_pp)
+               call exchange_halo_group(domain, 'dynamics:rho_pp', .true.)
+               !!$acc update device(rho_pp)
                MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
                call mpas_timer_start('atm_advance_acoustic_step')
@@ -2324,9 +2325,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 ! This is the only communications needed during the acoustic steps because we solve for u on all edges of owned cells
                MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
                call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp)
-               !$acc update self(rtheta_pp)
-               call exchange_halo_group(domain, 'dynamics:rtheta_pp')
-               !$acc update device(rtheta_pp)
+               !!$acc update self(rtheta_pp)
+               call exchange_halo_group(domain, 'dynamics:rtheta_pp', .true.)
+               !!$acc update device(rtheta_pp)
                MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
 !  complete update of horizontal momentum by including 3d divergence damping at the end of the acoustic step
@@ -2352,9 +2353,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             call mpas_pool_get_array(diag, 'rw_p', rw_p)
             call mpas_pool_get_array(diag, 'rho_pp', rho_pp)
             call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp)
-            !$acc update self(rw_p,ru_p,rho_pp,rtheta_pp)
-            call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp')
-            !$acc update device(rw_p,ru_p,rho_pp,rtheta_pp)
+            !!$acc update self(rw_p,ru_p,rho_pp,rtheta_pp)
+            call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp', .true.)
+            !!$acc update device(rw_p,ru_p,rho_pp,rtheta_pp)
             MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             call mpas_timer_start('atm_recover_large_step_variables')
@@ -2425,14 +2426,14 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             
             MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
             call mpas_pool_get_array(state, 'u', u, 2)
-            !$acc update self(u)
+            !!$acc update self(u)
             ! u
             if (config_apply_lbcs) then
-               call exchange_halo_group(domain, 'dynamics:u_123')
+               call exchange_halo_group(domain, 'dynamics:u_123', .true.)
             else
-               call exchange_halo_group(domain, 'dynamics:u_3')
+               call exchange_halo_group(domain, 'dynamics:u_3', .true.)
             end if
-            !$acc update device(u)
+            !!$acc update device(u)
             MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             ! scalar advection: RK3 scheme of Skamarock and Gassmann (2011). 
@@ -2447,9 +2448,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
                   MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
                   call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
-                  !$acc update self(scalars_2)
-                  call exchange_halo_group(domain, 'dynamics:scalars')
-                  !$acc update device(scalars_2)
+                  !!$acc update self(scalars_2)
+                  call exchange_halo_group(domain, 'dynamics:scalars', .true.)
+                  !!$acc update device(scalars_2)
                   MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
                   allocate(scalars_driving(num_scalars,nVertLevels,nCells+1))
@@ -2506,22 +2507,22 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             call mpas_pool_get_array(state, 'w', w, 2)
             call mpas_pool_get_array(diag, 'pv_edge', pv_edge)
             call mpas_pool_get_array(diag, 'rho_edge', rho_edge)            
-            !$acc update self(w,pv_edge,rho_edge)
+            !!$acc update self(w,pv_edge,rho_edge)
             if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then
                !
                ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2], scalars[1,2]
                !
                call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
-               !$acc update self(scalars_2)
-               call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars')
-               !$acc update device(scalars_2)
+               !!$acc update self(scalars_2)
+               call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars', .true.)
+               !!$acc update device(scalars_2)
             else
                !
                ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2]
                !
-               call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge')
+               call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge', .true.)
             end if
-            !$acc update device(w,pv_edge,rho_edge)
+            !!$acc update device(w,pv_edge,rho_edge)
             MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             ! set the zero-gradient condition on w for regional_MPAS
@@ -2538,9 +2539,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
               MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
               ! w halo values needs resetting after regional boundary update
               call mpas_pool_get_array(state, 'w', w, 2)
-              !$acc update self(w)
-              call exchange_halo_group(domain, 'dynamics:w')
-              !$acc update device(w)
+              !!$acc update self(w)
+              call exchange_halo_group(domain, 'dynamics:w', .true.)
+              !!$acc update device(w)
               MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             end if ! end of regional_MPAS addition 
@@ -2556,9 +2557,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             call mpas_pool_get_array(state, 'theta_m', theta_m, 2)
             call mpas_pool_get_array(diag, 'pressure_p', pressure_p)
             call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
-            !$acc update self(theta_m,pressure_p,rtheta_p)
-            call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p')
-            !$acc update device(theta_m,pressure_p,rtheta_p)
+            !!$acc update self(theta_m,pressure_p,rtheta_p)
+            call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p', .true.)
+            !!$acc update device(theta_m,pressure_p,rtheta_p)
             MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             !
@@ -2627,9 +2628,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
                MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
                ! need to fill halo for horizontal filter
                call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
-               !$acc update self(scalars_2)
-               call exchange_halo_group(domain, 'dynamics:scalars')
-               !$acc update device(scalars_2)
+               !!$acc update self(scalars_2)
+               call exchange_halo_group(domain, 'dynamics:scalars', .true.)
+               !!$acc update device(scalars_2)
                MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
    
                allocate(scalars_driving(num_scalars,nVertLevels,nCells+1))
@@ -2658,9 +2659,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             if (rk_step < 3) then
                MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
                call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
-               !$acc update self(scalars_2)
-               call exchange_halo_group(domain, 'dynamics:scalars')
-               !$acc update device(scalars_2)
+               !!$acc update self(scalars_2)
+               call exchange_halo_group(domain, 'dynamics:scalars', .true.)
+               !!$acc update device(scalars_2)
                MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
             end if
 
@@ -2790,9 +2791,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
          MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
          call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
-         !$acc update self(scalars_2)
-         call exchange_halo_group(domain, 'dynamics:scalars')
-         !$acc update device(scalars_2)
+         !!$acc update self(scalars_2)
+         call exchange_halo_group(domain, 'dynamics:scalars', .true.)
+         !!$acc update device(scalars_2)
          MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
          allocate(scalars_driving(num_scalars,nVertLevels,nCells+1))
@@ -5128,17 +5129,17 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
       !$acc end parallel
 
       MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-      !$acc update self(scalars_old)
+      !!$acc update self(scalars_old)
       MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
 !$OMP BARRIER
 !$OMP MASTER
-      call exchange_halo_group(block % domain, 'dynamics:'//trim(field_name)//'_old')
+      call exchange_halo_group(block % domain, 'dynamics:'//trim(field_name)//'_old', .true.)
 !$OMP END MASTER
 !$OMP BARRIER
 
       MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-      !$acc update device(scalars_old)
+      !!$acc update device(scalars_old)
       MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
       !
@@ -5535,17 +5536,17 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
          !
 
          MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-         !$acc update self(scale_arr)
+         !!$acc update self(scale_arr)
          MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
 !$OMP BARRIER
 !$OMP MASTER
-         call exchange_halo_group(block % domain, 'dynamics:scale')
+         call exchange_halo_group(block % domain, 'dynamics:scale', .true.)
 !$OMP END MASTER
 !$OMP BARRIER
 
          MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-         !$acc update device(scale_arr)
+         !!$acc update device(scale_arr)
          MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
          !$acc parallel
diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F
index 248cdf2393..bae45193d5 100644
--- a/src/core_atmosphere/mpas_atm_core.F
+++ b/src/core_atmosphere/mpas_atm_core.F
@@ -18,12 +18,13 @@ module atm_core
    ! in a named group
    !
    abstract interface
-      subroutine halo_exchange_routine(domain, halo_group, ierr)
+      subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr)
 
          use mpas_derived_types, only : domain_type
 
          type (domain_type), intent(inout) :: domain
          character(len=*), intent(in) :: halo_group
+         logical, intent(in), optional :: withGPUAwareMPI
          integer, intent(out), optional :: ierr
 
       end subroutine halo_exchange_routine
diff --git a/src/core_atmosphere/mpas_atm_halos.F b/src/core_atmosphere/mpas_atm_halos.F
index df02ee30a2..955f9b5ea0 100644
--- a/src/core_atmosphere/mpas_atm_halos.F
+++ b/src/core_atmosphere/mpas_atm_halos.F
@@ -15,12 +15,13 @@ module mpas_atm_halos
    ! in a named group
    !
    abstract interface
-      subroutine halo_exchange_routine(domain, halo_group, ierr)
+      subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr)
 
          use mpas_derived_types, only : domain_type
 
          type (domain_type), intent(inout) :: domain
          character(len=*), intent(in) :: halo_group
+         logical, intent(in), optional :: withGPUAwareMPI
          integer, intent(out), optional :: ierr
 
       end subroutine halo_exchange_routine
diff --git a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F
index 2cb94a7ba5..71f37eb550 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F
@@ -56,12 +56,13 @@ module mpas_atmphys_todynamics
 ! in a named group
 !
  abstract interface
-    subroutine halo_exchange_routine(domain, halo_group, ierr)
+    subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr)
 
        use mpas_derived_types, only : domain_type
 
        type (domain_type), intent(inout) :: domain
        character(len=*), intent(in) :: halo_group
+       logical, intent(in), optional :: withGPUAwareMPI
        integer, intent(out), optional :: ierr
 
     end subroutine halo_exchange_routine
diff --git a/src/framework/mpas_dmpar.F b/src/framework/mpas_dmpar.F
index 6d68c0c656..5d9b48d53b 100644
--- a/src/framework/mpas_dmpar.F
+++ b/src/framework/mpas_dmpar.F
@@ -7448,19 +7448,28 @@ end subroutine mpas_dmpar_exch_group_end_halo_exch!}}}
 !>  exchange is complete.
 !
 !-----------------------------------------------------------------------
-   subroutine mpas_dmpar_exch_group_full_halo_exch(domain, groupName, iErr)!{{{
+   subroutine mpas_dmpar_exch_group_full_halo_exch(domain, groupName, withGPUAwareMPI, iErr)!{{{
 
       type (domain_type), intent(inout) :: domain
       character (len=*), intent(in) :: groupName
+      logical, optional, intent(in) :: withGPUAwareMPI
       integer, optional, intent(out) :: iErr
 
       type (mpas_exchange_group), pointer :: exchGroupPtr
       integer :: nLen
+      logical :: useGPUAwareMPI
 
       if ( present(iErr) ) then
          iErr = MPAS_DMPAR_NOERR
       end if
 
+      useGPUAwareMPI = .false.
+      if (present(withGPUAwareMPI)) then
+        if (withGPUAwareMPI) then
+           call mpas_log_write(' GPU-aware MPI not implemented in this module', MPAS_LOG_CRIT)
+        end if
+      end if
+
       nLen = len_trim(groupName)
       DMPAR_DEBUG_WRITE(' -- Trying to perform a full exchange for group ' // trim(groupName))
 
diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index 7f96e0d397..ead5e42b35 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -542,7 +542,7 @@ end subroutine mpas_halo_exch_group_add_field
     !>  exchange group.
     !
     !-----------------------------------------------------------------------
-    subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
+    subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMPI, iErr)
 
 #ifdef MPAS_USE_MPI_F08
         use mpi_f08, only : MPI_Datatype, MPI_Comm
@@ -576,6 +576,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
         ! Arguments
         type (domain_type), intent(inout) :: domain
         character (len=*), intent(in) :: groupName
+        logical, optional, intent(in) :: withGPUAwareMPI
         integer, optional, intent(out) :: iErr
 
         ! Local variables
@@ -592,6 +593,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
         integer :: comm
 #endif
         integer :: mpi_ierr
+        logical:: useGPUAwareMPI
         type (mpas_halo_group), pointer :: group
         integer, dimension(:), pointer :: compactHaloInfo
         integer, dimension(:), pointer :: compactSendLists
@@ -605,11 +607,17 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
         integer, dimension(:,:,:), CONTIGUOUS pointer :: recvListSrc, recvListDst
         integer, dimension(:), CONTIGUOUS pointer :: unpackOffsets
 
+        
 
         if (present(iErr)) then
            iErr = 0
         end if
 
+        useGPUAwareMPI = .false.
+        if (present(withGPUAwareMPI)) then
+           useGPUAwareMPI = withGPUAwareMPI
+        end if
+
         
         !
         ! Find this halo exhange group in the list of groups
@@ -645,7 +653,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
 #endif
         rank = group % fields(1) % compactHaloInfo(8)
 
-        !$acc data present(group % recvBuf(:), group % sendBuf(:))
+        !$acc data present(group % recvBuf(:), group % sendBuf(:)) if(useGPUAwareMPI)
 
         !
         ! Initiate non-blocking MPI receives for all neighbors
@@ -654,8 +662,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
             if (group % groupRecvCounts(i) > 0) then
                 bufstart = group % groupRecvOffsets(i)
                 bufend = group % groupRecvOffsets(i) + group % groupRecvCounts(i) - 1
+
+                !!$acc update self(group % recvBuf(bufstart:bufend)) if(useGPUAwareMPI)
+
 !TO DO: how do we determine appropriate type here?
-                !$acc host_data use_device(group % recvBuf)
+                !$acc host_data use_device(group % recvBuf) if(useGPUAwareMPI)
                 call MPI_Irecv(group % recvBuf(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
                                group % groupRecvNeighbors(i), group % groupRecvNeighbors(i), comm, &
                                group % recvRequests(i), mpi_ierr)
@@ -698,14 +709,14 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r1arr, timeLevel=group % fields(i) % timeLevel)
 
-                    MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
-                    !$acc enter data copyin(group % fields(i) % r1arr(:))
-                    MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
+                    !MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
+                    !$acc enter data copyin(group % fields(i) % r1arr(:)) if(useGPUAwareMPI)
+                    !MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
                     call mpas_timer_start('packing_halo_exch')
-                    !$acc parallel default(present)
+                    !$acc parallel default(present) if(useGPUAwareMPI)
                     !$acc loop gang collapse(2)
                     do iEndp = 1, nSendEndpts
                         do iHalo = 1, nHalos
@@ -737,13 +748,13 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action'
                     ! !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
                     ! !$acc data copyin(group % fields(i) % r2arr(:,:))
-                    MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
-                    !$acc enter data copyin(group % fields(i) % r2arr(:,:))
-                    MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
+                    ! MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
+                    !$acc enter data copyin(group % fields(i) % r2arr(:,:)) if (useGPUAwareMPI)
+                    ! MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
 
                     call mpas_timer_start('packing_halo_exch')
                     ! Kernels is good enough, use default present to force a run-time error if programmer forgot something
-                    !$acc parallel default(present)
+                    !$acc parallel default(present) if(useGPUAwareMPI)
                     !$acc loop gang collapse(3)
                     do iEndp = 1, nSendEndpts
                         do iHalo = 1, nHalos
@@ -768,15 +779,15 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                 case (3)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r3arr, group % fields(i) % timeLevel)
-                    MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
-                    !$acc enter data copyin(group % fields(i) % r3arr(:,:,:))
-                    MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
+                    ! MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
+                    !$acc enter data copyin(group % fields(i) % r3arr(:,:,:)) if (useGPUAwareMPI)
+                    ! MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
 
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
                     call mpas_timer_start('packing_halo_exch')
-                    !$acc parallel default(present)
+                    !$acc parallel default(present) if(useGPUAwareMPI)
                     !$acc loop gang collapse(4)
                     do iEndp = 1, nSendEndpts
                         do iHalo = 1, nHalos
@@ -809,8 +820,10 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
             if (group % groupSendCounts(i) > 0) then
                 bufstart = group % groupSendOffsets(i)
                 bufend = group % groupSendOffsets(i) + group % groupSendCounts(i) - 1
+
+                !!$acc update self(group % sendBuf(bufstart:bufend)) if(.not. useGPUAwareMPI)
 !TO DO: how do we determine appropriate type here?
-                !$acc host_data use_device(group % sendBuf)
+                !$acc host_data use_device(group % sendBuf) if(useGPUAwareMPI)
                 call MPI_Isend(group % sendBuf(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
                                group % groupSendNeighbors(i), rank, comm, &
                                group % sendRequests(i), mpi_ierr)
@@ -860,6 +873,10 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                     dim1 = compactHaloInfo(2)
                     dim2 = compactHaloInfo(3)
 
+                    bufstart = group % groupSendOffsets(iNeighbor)
+                    bufend = group % groupSendOffsets(iNeighbor) + group % groupSendCounts(iNeighbor) - 1
+                    !!!$acc update device(group % recvBuf(bufstart:bufend)) if(.not. useGPUAwareMPI)
+
                     select case (group % fields(i) % nDims)
 
                     !
@@ -870,7 +887,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         ! Unpack recv buffer from all neighbors for current field
                         !
                         call mpas_timer_start('unpacking_halo_exch')
-                        !$acc parallel default(present)
+                        !$acc parallel default(present) if(useGPUAwareMPI)
                         !$acc loop gang
                         do iHalo = 1, nHalos
                             !$acc loop vector
@@ -893,7 +910,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         ! Unpack recv buffer from all neighbors for current field
                         !
                         call mpas_timer_start('unpacking_halo_exch')
-                        !$acc parallel default(present)
+                        !$acc parallel default(present) if(useGPUAwareMPI)
                         !$acc loop gang
                         do iHalo = 1, nHalos
                             !$acc loop worker
@@ -919,7 +936,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
                         ! Unpack recv buffer from all neighbors for current field
                         !
                         call mpas_timer_start('unpacking_halo_exch')
-                        !$acc parallel default(present)
+                        !$acc parallel default(present) if(useGPUAwareMPI)
                         !$acc loop gang collapse(2)
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList
@@ -949,11 +966,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
          if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
           select case (group % fields(i) % nDims)
            case (1)  
-            !$acc update self(group % fields(i) % r1arr(:))
+            !$acc update self(group % fields(i) % r1arr(:)) if (useGPUAwareMPI)
            case (2)  
-            !$acc update self(group % fields(i) % r2arr(:,:))
+            !$acc update self(group % fields(i) % r2arr(:,:)) if (useGPUAwareMPI)
            case (3)  
-            !$acc update self(group % fields(i) % r3arr(:,:,:))
+            !$acc update self(group % fields(i) % r3arr(:,:,:)) if (useGPUAwareMPI)
           end select
          end if
         end do
@@ -962,11 +979,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
          if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
           select case (group % fields(i) % nDims)
            case (1)  
-            !$acc exit data delete(group % fields(i) % r1arr(:))
+            !$acc exit data delete(group % fields(i) % r1arr(:)) if (useGPUAwareMPI)
            case (2)  
-            !$acc exit data delete(group % fields(i) % r2arr(:,:))
+            !$acc exit data delete(group % fields(i) % r2arr(:,:)) if (useGPUAwareMPI)
            case (3)  
-            !$acc exit data delete(group % fields(i) % r3arr(:,:,:))
+            !$acc exit data delete(group % fields(i) % r3arr(:,:,:)) if (useGPUAwareMPI)
           end select
          end if
         end do

From 5847e999d0bc7b3b3670de0255f558929f410526 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Wed, 13 Aug 2025 09:27:06 -0600
Subject: [PATCH 141/146] u_2 and w_2 need to be copied out after dynamics +
 cleanup

---
 src/framework/mpas_halo.F | 68 +++++++++------------------------------
 1 file changed, 16 insertions(+), 52 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index ead5e42b35..034e222576 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -556,6 +556,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
         use mpas_pool_routines, only : mpas_pool_get_array
         use mpas_log, only : mpas_log_write
         use mpas_timer, only : mpas_timer_start, mpas_timer_stop
+        use openacc
 
 
         ! Parameters
@@ -636,6 +637,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                                 messageType=MPAS_LOG_CRIT)
         end if
 
+        ! Logic to return early if there no neighbors to send to
         if ( group% nGroupSendNeighbors <= 0 ) then
             return
         end if
@@ -662,9 +664,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
             if (group % groupRecvCounts(i) > 0) then
                 bufstart = group % groupRecvOffsets(i)
                 bufend = group % groupRecvOffsets(i) + group % groupRecvCounts(i) - 1
-
-                !!$acc update self(group % recvBuf(bufstart:bufend)) if(useGPUAwareMPI)
-
 !TO DO: how do we determine appropriate type here?
                 !$acc host_data use_device(group % recvBuf) if(useGPUAwareMPI)
                 call MPI_Irecv(group % recvBuf(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
@@ -708,10 +707,10 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                 case (1)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r1arr, timeLevel=group % fields(i) % timeLevel)
-
-                    !MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
-                    !$acc enter data copyin(group % fields(i) % r1arr(:)) if(useGPUAwareMPI)
-                    !MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
+                    
+                    if( useGPUAwareMPI ) then
+                        call acc_attach(group % fields(i) % r1arr)
+                    end if                    
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
@@ -742,15 +741,15 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
 
                     !
                     ! Pack send buffer for all neighbors for current field
-                    !
-                                             
+                    !                                             
                     ! Use data regions for specificity and so the reference or attachment counters are easier to make sense of
                     ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action'
                     ! !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
-                    ! !$acc data copyin(group % fields(i) % r2arr(:,:))
-                    ! MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
-                    !$acc enter data copyin(group % fields(i) % r2arr(:,:)) if (useGPUAwareMPI)
-                    ! MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
+                    
+                    if( useGPUAwareMPI ) then
+                        call acc_attach(group % fields(i) % r2arr)
+                    end if
+                    
 
                     call mpas_timer_start('packing_halo_exch')
                     ! Kernels is good enough, use default present to force a run-time error if programmer forgot something
@@ -778,10 +777,10 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                 !
                 case (3)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
-                                             group % fields(i) % r3arr, group % fields(i) % timeLevel)
-                    ! MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
-                    !$acc enter data copyin(group % fields(i) % r3arr(:,:,:)) if (useGPUAwareMPI)
-                    ! MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
+                                             group % fields(i) % r3arr, group % fields(i) % timeLevel)                    
+                    if( useGPUAwareMPI ) then
+                        call acc_attach(group % fields(i) % r3arr)
+                    end if
 
                     !
                     ! Pack send buffer for all neighbors for current field
@@ -820,8 +819,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
             if (group % groupSendCounts(i) > 0) then
                 bufstart = group % groupSendOffsets(i)
                 bufend = group % groupSendOffsets(i) + group % groupSendCounts(i) - 1
-
-                !!$acc update self(group % sendBuf(bufstart:bufend)) if(.not. useGPUAwareMPI)
 !TO DO: how do we determine appropriate type here?
                 !$acc host_data use_device(group % sendBuf) if(useGPUAwareMPI)
                 call MPI_Isend(group % sendBuf(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
@@ -873,10 +870,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                     dim1 = compactHaloInfo(2)
                     dim2 = compactHaloInfo(3)
 
-                    bufstart = group % groupSendOffsets(iNeighbor)
-                    bufend = group % groupSendOffsets(iNeighbor) + group % groupSendCounts(iNeighbor) - 1
-                    !!!$acc update device(group % recvBuf(bufstart:bufend)) if(.not. useGPUAwareMPI)
-
                     select case (group % fields(i) % nDims)
 
                     !
@@ -960,37 +953,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                 end if
             end do
         end do
-
-        MPAS_ACC_TIMER_START('halo_exch [ACC_data_xfer]')
-        do i = 1, group % nFields
-         if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
-          select case (group % fields(i) % nDims)
-           case (1)  
-            !$acc update self(group % fields(i) % r1arr(:)) if (useGPUAwareMPI)
-           case (2)  
-            !$acc update self(group % fields(i) % r2arr(:,:)) if (useGPUAwareMPI)
-           case (3)  
-            !$acc update self(group % fields(i) % r3arr(:,:,:)) if (useGPUAwareMPI)
-          end select
-         end if
-        end do
-
-        do i = 1, group % nFields
-         if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
-          select case (group % fields(i) % nDims)
-           case (1)  
-            !$acc exit data delete(group % fields(i) % r1arr(:)) if (useGPUAwareMPI)
-           case (2)  
-            !$acc exit data delete(group % fields(i) % r2arr(:,:)) if (useGPUAwareMPI)
-           case (3)  
-            !$acc exit data delete(group % fields(i) % r3arr(:,:,:)) if (useGPUAwareMPI)
-          end select
-         end if
-        end do
-
         ! For the present(group % recvBuf(:), group % sendBuf(:))
         !$acc end data
-        MPAS_ACC_TIMER_STOP('halo_exch [ACC_data_xfer]')
         
         !
         ! Nullify array pointers - not necessary for correctness, but helpful when debugging

From 321c2608ab9bc7f16112fee47aa165ef0bfbe122 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Wed, 13 Aug 2025 10:05:02 -0600
Subject: [PATCH 142/146] using attach in a directive instead of the acc_attach
 library call

---
 src/framework/mpas_halo.F | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index 034e222576..8048661acb 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -17,15 +17,6 @@
 !>  communicating the halos of all fields in a group.
 !
 !-----------------------------------------------------------------------
-
-#ifdef MPAS_OPENACC
-#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
-#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
-#else
-#define MPAS_ACC_TIMER_START(X)
-#define MPAS_ACC_TIMER_STOP(X)
-#endif
-
 module mpas_halo
 
     implicit none
@@ -556,8 +547,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
         use mpas_pool_routines, only : mpas_pool_get_array
         use mpas_log, only : mpas_log_write
         use mpas_timer, only : mpas_timer_start, mpas_timer_stop
-        use openacc
-
 
         ! Parameters
 #ifdef MPAS_USE_MPI_F08
@@ -607,7 +596,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
         integer :: maxNRecvList
         integer, dimension(:,:,:), CONTIGUOUS pointer :: recvListSrc, recvListDst
         integer, dimension(:), CONTIGUOUS pointer :: unpackOffsets
-
         
 
         if (present(iErr)) then
@@ -618,7 +606,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
         if (present(withGPUAwareMPI)) then
            useGPUAwareMPI = withGPUAwareMPI
         end if
-
         
         !
         ! Find this halo exhange group in the list of groups
@@ -708,9 +695,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r1arr, timeLevel=group % fields(i) % timeLevel)
                     
-                    if( useGPUAwareMPI ) then
-                        call acc_attach(group % fields(i) % r1arr)
-                    end if                    
+                    !$acc enter data attach(group % fields(i) % r1arr) if(useGPUAwareMPI)
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
@@ -738,7 +723,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                 case (2)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r2arr, timeLevel=group % fields(i) % timeLevel)
-
+                    
+                    !$acc enter data attach(group % fields(i) % r2arr) if(useGPUAwareMPI)
                     !
                     ! Pack send buffer for all neighbors for current field
                     !                                             
@@ -746,10 +732,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                     ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action'
                     ! !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
                     
-                    if( useGPUAwareMPI ) then
-                        call acc_attach(group % fields(i) % r2arr)
-                    end if
-                    
 
                     call mpas_timer_start('packing_halo_exch')
                     ! Kernels is good enough, use default present to force a run-time error if programmer forgot something
@@ -778,10 +760,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                 case (3)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r3arr, group % fields(i) % timeLevel)                    
-                    if( useGPUAwareMPI ) then
-                        call acc_attach(group % fields(i) % r3arr)
-                    end if
 
+                    !$acc enter data attach(group % fields(i) % r3arr) if(useGPUAwareMPI)
                     !
                     ! Pack send buffer for all neighbors for current field
                     !

From 28da82b5e9e4bc0422411445578638941bddfba2 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Wed, 13 Aug 2025 10:45:13 -0600
Subject: [PATCH 143/146] Using attach clause in parallel region will also auto
 detach at end of region

---
 src/framework/mpas_halo.F | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index 8048661acb..e88dd5d021 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -695,12 +695,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r1arr, timeLevel=group % fields(i) % timeLevel)
                     
-                    !$acc enter data attach(group % fields(i) % r1arr) if(useGPUAwareMPI)
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
                     call mpas_timer_start('packing_halo_exch')
-                    !$acc parallel default(present) if(useGPUAwareMPI)
+                    !$acc parallel default(present) attach(group % fields(i) % r1arr) if(useGPUAwareMPI)
                     !$acc loop gang collapse(2)
                     do iEndp = 1, nSendEndpts
                         do iHalo = 1, nHalos
@@ -724,7 +723,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r2arr, timeLevel=group % fields(i) % timeLevel)
                     
-                    !$acc enter data attach(group % fields(i) % r2arr) if(useGPUAwareMPI)
                     !
                     ! Pack send buffer for all neighbors for current field
                     !                                             
@@ -735,7 +733,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
 
                     call mpas_timer_start('packing_halo_exch')
                     ! Kernels is good enough, use default present to force a run-time error if programmer forgot something
-                    !$acc parallel default(present) if(useGPUAwareMPI)
+                    !$acc parallel default(present) attach(group % fields(i) % r2arr) if(useGPUAwareMPI)
                     !$acc loop gang collapse(3)
                     do iEndp = 1, nSendEndpts
                         do iHalo = 1, nHalos
@@ -760,13 +758,12 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                 case (3)
                     call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
                                              group % fields(i) % r3arr, group % fields(i) % timeLevel)                    
-
-                    !$acc enter data attach(group % fields(i) % r3arr) if(useGPUAwareMPI)
+                    
                     !
                     ! Pack send buffer for all neighbors for current field
                     !
                     call mpas_timer_start('packing_halo_exch')
-                    !$acc parallel default(present) if(useGPUAwareMPI)
+                    !$acc parallel default(present) attach(group % fields(i) % r3arr) if(useGPUAwareMPI)
                     !$acc loop gang collapse(4)
                     do iEndp = 1, nSendEndpts
                         do iHalo = 1, nHalos
@@ -860,7 +857,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                         ! Unpack recv buffer from all neighbors for current field
                         !
                         call mpas_timer_start('unpacking_halo_exch')
-                        !$acc parallel default(present) if(useGPUAwareMPI)
+                        !$acc parallel default(present) attach(group % fields(i) % r1arr) if(useGPUAwareMPI)
                         !$acc loop gang
                         do iHalo = 1, nHalos
                             !$acc loop vector
@@ -883,7 +880,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                         ! Unpack recv buffer from all neighbors for current field
                         !
                         call mpas_timer_start('unpacking_halo_exch')
-                        !$acc parallel default(present) if(useGPUAwareMPI)
+                        !$acc parallel default(present) attach(group % fields(i) % r2arr) if(useGPUAwareMPI)
                         !$acc loop gang
                         do iHalo = 1, nHalos
                             !$acc loop worker
@@ -909,7 +906,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                         ! Unpack recv buffer from all neighbors for current field
                         !
                         call mpas_timer_start('unpacking_halo_exch')
-                        !$acc parallel default(present) if(useGPUAwareMPI)
+                        !$acc parallel default(present) attach(group % fields(i) % r3arr) if(useGPUAwareMPI)
                         !$acc loop gang collapse(2)
                         do iHalo = 1, nHalos
                             do j = 1, maxNRecvList

From 9c344d74774737320ee7731b6bfb0489ef4a93af Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Wed, 13 Aug 2025 11:43:26 -0600
Subject: [PATCH 144/146] Reverting the indexing in loops and comment cleanup

---
 src/framework/mpas_halo.F | 39 ++++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index e88dd5d021..8877d6330d 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -280,6 +280,7 @@ subroutine mpas_halo_exch_group_complete(domain, groupName, iErr)
 
         call refactor_lists(domain, groupName, iErr)
 
+        ! Logic to return early if there are no neighbors to send to
         if ( newGroup% nGroupSendNeighbors <=0 ) then
             return
         end if
@@ -571,7 +572,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
 
         ! Local variables
         integer :: i, bufstart, bufend
-        integer :: idxBuf, idxArr
         integer :: dim1, dim2
         integer :: i1, i2, j, iNeighbor, iReq
         integer :: iHalo, iEndp
@@ -624,7 +624,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                                 messageType=MPAS_LOG_CRIT)
         end if
 
-        ! Logic to return early if there no neighbors to send to
+        ! Logic to return early if there are no neighbors to send to
         if ( group% nGroupSendNeighbors <= 0 ) then
             return
         end if
@@ -706,9 +706,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                             !$acc loop vector
                             do j = 1, maxNSendList
                                 if (j <= nSendLists(iHalo,iEndp)) then
-                                    idxBuf = packOffsets(iEndp) + sendListDst(j,iHalo,iEndp)
-                                    idxArr = sendListSrc(j,iHalo,iEndp)
-                                    group % sendBuf(idxBuf) = group % fields(i) % r1arr(idxArr)
+                                    group % sendBuf(packOffsets(iEndp) + sendListDst(j,iHalo,iEndp)) = &
+                                        group % fields(i) % r1arr(sendListSrc(j,iHalo,iEndp))
                                 end if
                             end do
                         end do
@@ -732,7 +731,6 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                     
 
                     call mpas_timer_start('packing_halo_exch')
-                    ! Kernels is good enough, use default present to force a run-time error if programmer forgot something
                     !$acc parallel default(present) attach(group % fields(i) % r2arr) if(useGPUAwareMPI)
                     !$acc loop gang collapse(3)
                     do iEndp = 1, nSendEndpts
@@ -741,9 +739,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                                 !$acc loop vector
                                 do i1 = 1, dim1
                                     if (j <= nSendLists(iHalo,iEndp)) then
-                                        idxBuf = packOffsets(iEndp) + dim1 * (sendListDst(j,iHalo,iEndp) - 1) + i1
-                                        idxArr = sendListSrc(j,iHalo,iEndp)
-                                        group % sendBuf(idxBuf) = group % fields(i) % r2arr(i1,idxArr)
+                                        group % sendBuf(packOffsets(iEndp) + dim1 * (sendListDst(j,iHalo,iEndp) - 1) + i1) = &
+                                            group % fields(i) % r2arr(i1, sendListSrc(j,iHalo,iEndp))
                                     end if
                                 end do
                             end do
@@ -772,10 +769,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                                 !$acc loop vector
                                 do i1 = 1, dim1
                                     if (j <= nSendLists(iHalo,iEndp)) then
-                                        idxBuf = packOffsets(iEndp) + dim1*dim2*(sendListDst(j,iHalo,iEndp) - 1) &
-                                                 + dim1*(i2-1) + i1
-                                        idxArr = sendListSrc(j,iHalo,iEndp)
-                                        group % sendBuf(idxBuf) = group % fields(i) % r3arr(i1,i2,idxArr)
+                                        group % sendBuf(packOffsets(iEndp) + dim1*dim2*(sendListDst(j,iHalo,iEndp) - 1) &
+                                                        + dim1*(i2-1) + i1) = &
+                                            group % fields(i) % r3arr(i1, i2, sendListSrc(j,iHalo,iEndp))
                                     end if
                                 end do
                                 end do
@@ -863,9 +859,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                             !$acc loop vector
                             do j = 1, maxNRecvList
                                 if (j <= nRecvLists(iHalo,iEndp)) then
-                                    idxArr = recvListDst(j,iHalo,iEndp)
-                                    idxBuf = unpackOffsets(iEndp) + recvListSrc(j,iHalo,iEndp)
-                                    group % fields(i) % r1arr(idxArr) = group % recvBuf(idxBuf)
+                                    group % fields(i) % r1arr(recvListDst(j,iHalo,iEndp)) = &
+                                        group % recvBuf(unpackOffsets(iEndp) + recvListSrc(j,iHalo,iEndp))
                                 end if
                             end do
                         end do
@@ -888,9 +883,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                                 !$acc loop vector
                                 do i1 = 1, dim1
                                     if (j <= nRecvLists(iHalo,iEndp)) then
-                                        idxArr = recvListDst(j,iHalo,iEndp)
-                                        idxBuf = unpackOffsets(iEndp) + dim1 * (recvListSrc(j,iHalo,iEndp) - 1) + i1
-                                        group % fields(i) % r2arr(i1, idxArr) = group % recvBuf(idxBuf)
+                                        group % fields(i) % r2arr(i1, recvListDst(j,iHalo,iEndp)) = &
+                                            group % recvBuf(unpackOffsets(iEndp) + dim1 * (recvListSrc(j,iHalo,iEndp) - 1) + i1)
                                     end if
                                 end do
                             end do
@@ -914,10 +908,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMP
                                 do i2 = 1, dim2
                                 do i1 = 1, dim1
                                     if (j <= nRecvLists(iHalo,iEndp)) then
-                                        idxArr = recvListDst(j,iHalo,iEndp)
-                                        idxBuf = unpackOffsets(iEndp) + dim1*dim2*(recvListSrc(j,iHalo,iEndp) - 1) &
-                                                 + dim1*(i2-1) + i1
-                                        group % fields(i) % r3arr(i1, i2, idxArr) = group % recvBuf(idxBuf)
+                                        group % fields(i) % r3arr(i1, i2, recvListDst(j,iHalo,iEndp)) = &
+                                            group % recvBuf(unpackOffsets(iEndp) + dim1*dim2*(recvListSrc(j,iHalo,iEndp) - 1) &
+                                                            + dim1*(i2-1) + i1)
                                     end if
                                 end do
                                 end do

From b17da1582cb7adc0e724bf5efacfbd5f78e4403d Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Wed, 13 Aug 2025 15:48:42 -0600
Subject: [PATCH 145/146] New namelist option to switch on or off GPU-Aware MPI

Introducing a new namelist option under development, config_gpu_aware_mpi,
which will control whether the OpenACC run of MPAS on GPUs will use GPU-aware
MPI or do a device<->host update of variables around the call to a purely CPU-
based halo exchange.

Note: This feature is not available to use when config_halo_exch_method is set
to 'mpas_dmpar'
---
 src/core_atmosphere/Registry.xml              |   4 +
 .../dynamics/mpas_atm_time_integration.F      | 123 +++++++++---------
 src/core_atmosphere/mpas_atm_halos.F          |   5 +
 3 files changed, 73 insertions(+), 59 deletions(-)

diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index a0a94ad03c..9bfc99f5c8 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -422,6 +422,10 @@
                      units="-"
                      description="Method to use for exchanging halos"
                      possible_values="`mpas_dmpar', `mpas_halo'"/>
+               <nml_option name="config_gpu_aware_mpi" type="logical" default_value="false"
+                     units="-"
+                     description="Whether to use GPU-aware MPI for halo exchanges"
+                     possible_values=".true. or .false."/>
         </nml_record>
 
 <!-- **************************************************************************************** -->
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 8181c2ad84..786f3a3416 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -1845,6 +1845,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
       logical, pointer :: config_scalar_advection
       logical, pointer :: config_positive_definite
       logical, pointer :: config_monotonic
+      logical, pointer :: config_gpu_aware_mpi
       character (len=StrKIND), pointer :: config_microp_scheme
       character (len=StrKIND), pointer :: config_convection_scheme
 
@@ -1889,6 +1890,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
       call mpas_pool_get_config(block % configs, 'config_scalar_advection', config_scalar_advection)
       call mpas_pool_get_config(block % configs, 'config_positive_definite', config_positive_definite)
       call mpas_pool_get_config(block % configs, 'config_monotonic', config_monotonic)
+      call mpas_pool_get_config(block % configs, 'config_gpu_aware_mpi', config_gpu_aware_mpi)
       call mpas_pool_get_config(block % configs, 'config_IAU_option', config_IAU_option)
       !  config variables for dynamics-transport splitting, WCS 18 November 2014
       call mpas_pool_get_config(block % configs, 'config_split_dynamics_transport', config_split_dynamics_transport)
@@ -2042,9 +2044,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
       call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
       call mpas_pool_get_array(diag, 'pressure_p', pressure_p)
       call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
-      !!$acc update self(theta_m,scalars_1,pressure_p,rtheta_p)
-      call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p', .true.)
-      !!$acc update device(theta_m,scalars_1,pressure_p,rtheta_p)
+      !$acc update self(theta_m,scalars_1,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi)
+      call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p', config_gpu_aware_mpi)
+      !$acc update device(theta_m,scalars_1,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi)
       MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
       call mpas_timer_start('atm_rk_integration_setup')
@@ -2133,9 +2135,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
       MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
       call mpas_pool_get_array(diag, 'exner', exner)      
-      !!$acc update self(exner)
-         call exchange_halo_group(domain, 'dynamics:exner', .true.)
-      !!$acc update device(exner)
+      !$acc update self(exner) if (.not. config_gpu_aware_mpi)
+      call exchange_halo_group(domain, 'dynamics:exner', config_gpu_aware_mpi)
+      !$acc update device(exner) if (.not. config_gpu_aware_mpi)
       MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
          !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 
@@ -2217,9 +2219,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 ! tend_u
             MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
             call mpas_pool_get_array(tend, 'u', tend_u)
-            !!$acc update self(tend_u)
-            call exchange_halo_group(domain, 'dynamics:tend_u', .true.)
-            !!$acc update device(tend_u)
+            !$acc update self(tend_u) if (.not. config_gpu_aware_mpi)
+            call exchange_halo_group(domain, 'dynamics:tend_u', config_gpu_aware_mpi)
+            !$acc update device(tend_u) if (.not. config_gpu_aware_mpi)
             MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
    
             call mpas_timer_start('small_step_prep')
@@ -2298,9 +2300,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
                MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
                call mpas_pool_get_array(diag, 'rho_pp', rho_pp)
-               !!$acc update self(rho_pp)
-               call exchange_halo_group(domain, 'dynamics:rho_pp', .true.)
-               !!$acc update device(rho_pp)
+               !$acc update self(rho_pp) if (.not. config_gpu_aware_mpi)
+               call exchange_halo_group(domain, 'dynamics:rho_pp', config_gpu_aware_mpi)
+               !$acc update device(rho_pp) if (.not. config_gpu_aware_mpi)
                MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
                call mpas_timer_start('atm_advance_acoustic_step')
@@ -2325,9 +2327,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 ! This is the only communications needed during the acoustic steps because we solve for u on all edges of owned cells
                MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
                call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp)
-               !!$acc update self(rtheta_pp)
-               call exchange_halo_group(domain, 'dynamics:rtheta_pp', .true.)
-               !!$acc update device(rtheta_pp)
+               !$acc update self(rtheta_pp) if (.not. config_gpu_aware_mpi)
+               call exchange_halo_group(domain, 'dynamics:rtheta_pp', config_gpu_aware_mpi)
+               !$acc update device(rtheta_pp) if (.not. config_gpu_aware_mpi)
                MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
 !  complete update of horizontal momentum by including 3d divergence damping at the end of the acoustic step
@@ -2353,9 +2355,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             call mpas_pool_get_array(diag, 'rw_p', rw_p)
             call mpas_pool_get_array(diag, 'rho_pp', rho_pp)
             call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp)
-            !!$acc update self(rw_p,ru_p,rho_pp,rtheta_pp)
-            call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp', .true.)
-            !!$acc update device(rw_p,ru_p,rho_pp,rtheta_pp)
+            !$acc update self(rw_p,ru_p,rho_pp,rtheta_pp) if (.not. config_gpu_aware_mpi)
+            call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp', config_gpu_aware_mpi)
+            !$acc update device(rw_p,ru_p,rho_pp,rtheta_pp) if (.not. config_gpu_aware_mpi)
             MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             call mpas_timer_start('atm_recover_large_step_variables')
@@ -2426,14 +2428,14 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             
             MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
             call mpas_pool_get_array(state, 'u', u, 2)
-            !!$acc update self(u)
+            !$acc update self(u) if (.not. config_gpu_aware_mpi)
             ! u
             if (config_apply_lbcs) then
-               call exchange_halo_group(domain, 'dynamics:u_123', .true.)
+               call exchange_halo_group(domain, 'dynamics:u_123', config_gpu_aware_mpi)
             else
-               call exchange_halo_group(domain, 'dynamics:u_3', .true.)
+               call exchange_halo_group(domain, 'dynamics:u_3', config_gpu_aware_mpi)
             end if
-            !!$acc update device(u)
+            !$acc update device(u) if (.not. config_gpu_aware_mpi)
             MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             ! scalar advection: RK3 scheme of Skamarock and Gassmann (2011). 
@@ -2442,15 +2444,15 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then
 
                call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, &
-                                    config_time_integration_order, config_split_dynamics_transport, exchange_halo_group)
+                                    config_time_integration_order, config_split_dynamics_transport, config_gpu_aware_mpi, exchange_halo_group)
 
                if (config_apply_lbcs) then  ! adjust boundary tendencies for regional_MPAS scalar transport
 
                   MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
                   call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
-                  !!$acc update self(scalars_2)
-                  call exchange_halo_group(domain, 'dynamics:scalars', .true.)
-                  !!$acc update device(scalars_2)
+                  !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi)
+                  call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi)
+                  !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi)
                   MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
                   allocate(scalars_driving(num_scalars,nVertLevels,nCells+1))
@@ -2507,22 +2509,22 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             call mpas_pool_get_array(state, 'w', w, 2)
             call mpas_pool_get_array(diag, 'pv_edge', pv_edge)
             call mpas_pool_get_array(diag, 'rho_edge', rho_edge)            
-            !!$acc update self(w,pv_edge,rho_edge)
+            !$acc update self(w,pv_edge,rho_edge) if (.not. config_gpu_aware_mpi)
             if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then
                !
                ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2], scalars[1,2]
                !
                call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
-               !!$acc update self(scalars_2)
-               call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars', .true.)
-               !!$acc update device(scalars_2)
+               !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi)
+               call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars', config_gpu_aware_mpi)
+               !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi)
             else
                !
                ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2]
                !
-               call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge', .true.)
+               call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge', config_gpu_aware_mpi)
             end if
-            !!$acc update device(w,pv_edge,rho_edge)
+            !$acc update device(w,pv_edge,rho_edge) if (.not. config_gpu_aware_mpi)
             MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             ! set the zero-gradient condition on w for regional_MPAS
@@ -2539,9 +2541,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
               MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
               ! w halo values needs resetting after regional boundary update
               call mpas_pool_get_array(state, 'w', w, 2)
-              !!$acc update self(w)
-              call exchange_halo_group(domain, 'dynamics:w', .true.)
-              !!$acc update device(w)
+              !$acc update self(w) if (.not. config_gpu_aware_mpi)
+              call exchange_halo_group(domain, 'dynamics:w', config_gpu_aware_mpi)
+              !$acc update device(w) if (.not. config_gpu_aware_mpi)
               MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             end if ! end of regional_MPAS addition 
@@ -2557,9 +2559,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             call mpas_pool_get_array(state, 'theta_m', theta_m, 2)
             call mpas_pool_get_array(diag, 'pressure_p', pressure_p)
             call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
-            !!$acc update self(theta_m,pressure_p,rtheta_p)
-            call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p', .true.)
-            !!$acc update device(theta_m,pressure_p,rtheta_p)
+            !$acc update self(theta_m,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi)
+            call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p', config_gpu_aware_mpi)
+            !$acc update device(theta_m,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi)
             MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
             !
@@ -2621,16 +2623,16 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
 
             call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, &
-                                 config_time_integration_order, config_split_dynamics_transport, exchange_halo_group)
+                                 config_time_integration_order, config_split_dynamics_transport, config_gpu_aware_mpi, exchange_halo_group)
 
             if (config_apply_lbcs) then  ! adjust boundary tendencies for regional_MPAS scalar transport
 
                MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
                ! need to fill halo for horizontal filter
                call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
-               !!$acc update self(scalars_2)
-               call exchange_halo_group(domain, 'dynamics:scalars', .true.)
-               !!$acc update device(scalars_2)
+               !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi)
+               call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi)
+               !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi)
                MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
    
                allocate(scalars_driving(num_scalars,nVertLevels,nCells+1))
@@ -2659,9 +2661,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
             if (rk_step < 3) then
                MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
                call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
-               !!$acc update self(scalars_2)
-               call exchange_halo_group(domain, 'dynamics:scalars', .true.)
-               !!$acc update device(scalars_2)
+               !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi)
+               call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi)
+               !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi)
                MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
             end if
 
@@ -2791,9 +2793,9 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
 
          MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
          call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
-         !!$acc update self(scalars_2)
-         call exchange_halo_group(domain, 'dynamics:scalars', .true.)
-         !!$acc update device(scalars_2)
+         !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi)
+         call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi)
+         !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi)
          MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
 
          allocate(scalars_driving(num_scalars,nVertLevels,nCells+1))
@@ -2847,7 +2849,7 @@ end subroutine atm_srk3
    !
    !-----------------------------------------------------------------------
    subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, &
-                              config_time_integration_order, config_split_dynamics_transport, exchange_halo_group)
+                              config_time_integration_order, config_split_dynamics_transport, config_gpu_aware_mpi, exchange_halo_group)
 
       implicit none
 
@@ -2860,6 +2862,7 @@ subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_mono
       logical, intent(in) :: config_positive_definite
       integer, intent(in) :: config_time_integration_order
       logical, intent(in) :: config_split_dynamics_transport
+      logical, intent(in) :: config_gpu_aware_mpi
       procedure (halo_exchange_routine) :: exchange_halo_group
 
       ! Local variables
@@ -2991,7 +2994,7 @@ subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_mono
                                           edgeThreadStart(thread), edgeThreadEnd(thread), &
                                           cellSolveThreadStart(thread), cellSolveThreadEnd(thread), &
                                           scalar_old_arr, scalar_new_arr, s_max_arr, s_min_arr, wdtn_arr, &
-                                          flux_array, flux_upwind_tmp_arr, flux_tmp_arr, &
+                                          flux_array, flux_upwind_tmp_arr, flux_tmp_arr, config_gpu_aware_mpi, &
                                           exchange_halo_group, &
                                           advance_density=config_split_dynamics_transport, rho_zz_int=rho_zz_int)
          end if
@@ -4870,7 +4873,7 @@ subroutine atm_advance_scalars_mono(field_name, block, tend, state, diag, mesh,
                                        cellStart, cellEnd, edgeStart, edgeEnd, &
                                        cellSolveStart, cellSolveEnd, &
                                        scalar_old, scalar_new, s_max, s_min, wdtn, flux_arr, &
-                                       flux_upwind_tmp, flux_tmp, exchange_halo_group, advance_density, rho_zz_int)
+                                       flux_upwind_tmp, flux_tmp, config_gpu_aware_mpi, exchange_halo_group, advance_density, rho_zz_int)
 
       implicit none
 
@@ -4891,6 +4894,7 @@ subroutine atm_advance_scalars_mono(field_name, block, tend, state, diag, mesh,
       real (kind=RKIND), dimension(:,:), intent(inout) :: wdtn
       real (kind=RKIND), dimension(:,:), intent(inout) :: flux_arr
       real (kind=RKIND), dimension(:,:), intent(inout) :: flux_upwind_tmp, flux_tmp
+      logical, intent(in) :: config_gpu_aware_mpi
       procedure (halo_exchange_routine) :: exchange_halo_group
       logical, intent(in), optional :: advance_density
       real (kind=RKIND), dimension(:,:), intent(inout), optional :: rho_zz_int
@@ -4969,7 +4973,7 @@ subroutine atm_advance_scalars_mono(field_name, block, tend, state, diag, mesh,
                                    edgesOnCell, edgesOnCell_sign, nEdgesOnCell, fnm, fnp, rdnw, nAdvCellsForEdge, &
                                    advCellsForEdge, adv_coefs, adv_coefs_3rd, scalar_old, scalar_new, s_max, s_min, &
                                    wdtn, scale_arr, flux_arr, flux_upwind_tmp, flux_tmp, &
-                                   bdyMaskCell, bdyMaskEdge, &
+                                   bdyMaskCell, bdyMaskEdge, config_gpu_aware_mpi, &
                                    exchange_halo_group, advance_density, rho_zz_int)
 
       call mpas_deallocate_scratch_field(scale)
@@ -5017,7 +5021,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
                                    edgesOnCell, edgesOnCell_sign, nEdgesOnCell, fnm, fnp, rdnw, nAdvCellsForEdge, &
                                    advCellsForEdge, adv_coefs, adv_coefs_3rd, scalar_old, scalar_new, s_max, s_min, &
                                    wdtn, scale_arr, flux_arr, flux_upwind_tmp, flux_tmp, &
-                                   bdyMaskCell, bdyMaskEdge, &
+                                   bdyMaskCell, bdyMaskEdge, config_gpu_aware_mpi, &
                                    exchange_halo_group, advance_density, rho_zz_int)
 
       use mpas_atm_dimensions, only : nVertLevels
@@ -5033,6 +5037,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
       real (kind=RKIND), intent(in)        :: dt
       integer, intent(in) :: cellStart, cellEnd, edgeStart, edgeEnd
       integer, intent(in) :: cellSolveStart, cellSolveEnd
+      logical, intent(in) :: config_gpu_aware_mpi
       procedure (halo_exchange_routine) :: exchange_halo_group
       logical, intent(in), optional :: advance_density
       real (kind=RKIND), dimension(:,:), intent(inout), optional :: rho_zz_int
@@ -5129,17 +5134,17 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
       !$acc end parallel
 
       MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-      !!$acc update self(scalars_old)
+      !$acc update self(scalars_old) if (.not. config_gpu_aware_mpi)
       MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
 !$OMP BARRIER
 !$OMP MASTER
-      call exchange_halo_group(block % domain, 'dynamics:'//trim(field_name)//'_old', .true.)
+      call exchange_halo_group(block % domain, 'dynamics:'//trim(field_name)//'_old', config_gpu_aware_mpi)
 !$OMP END MASTER
 !$OMP BARRIER
 
       MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-      !!$acc update device(scalars_old)
+      !$acc update device(scalars_old) if (.not. config_gpu_aware_mpi)
       MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
       !
@@ -5536,17 +5541,17 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
          !
 
          MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-         !!$acc update self(scale_arr)
+         !$acc update self(scale_arr) if (.not. config_gpu_aware_mpi)
          MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
 !$OMP BARRIER
 !$OMP MASTER
-         call exchange_halo_group(block % domain, 'dynamics:scale', .true.)
+         call exchange_halo_group(block % domain, 'dynamics:scale', config_gpu_aware_mpi)
 !$OMP END MASTER
 !$OMP BARRIER
 
          MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
-         !!$acc update device(scale_arr)
+         !$acc update device(scale_arr) if (.not. config_gpu_aware_mpi)
          MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
 
          !$acc parallel
diff --git a/src/core_atmosphere/mpas_atm_halos.F b/src/core_atmosphere/mpas_atm_halos.F
index 955f9b5ea0..19c4a5be46 100644
--- a/src/core_atmosphere/mpas_atm_halos.F
+++ b/src/core_atmosphere/mpas_atm_halos.F
@@ -62,18 +62,23 @@ subroutine atm_build_halo_groups(domain, ierr)
 
       ! Local variables
       character(len=StrKIND), pointer :: config_halo_exch_method
+      logical, pointer :: config_gpu_aware_mpi
 
 
       !
       ! Determine from the namelist option config_halo_exch_method which halo exchange method to employ
       !
       call mpas_pool_get_config(domain % blocklist % configs, 'config_halo_exch_method', config_halo_exch_method)
+      call mpas_pool_get_config(domain % blocklist % configs, 'config_gpu_aware_mpi', config_gpu_aware_mpi)
 
       if (trim(config_halo_exch_method) == 'mpas_dmpar') then
          call mpas_log_write('')
          call mpas_log_write('*** Using ''mpas_dmpar'' routines for exchanging halos')
          call mpas_log_write('')
 
+         if (config_gpu_aware_mpi) then
+            call mpas_log_write('GPU-aware MPI is not presently supported with config_halo_exch_method = mpas_dmpar',MPAS_LOG_CRIT)
+         end if
          !
          ! Set up halo exchange groups used during atmosphere core initialization
          !

From eff87cf19248980cdba794e8bbec4c3d6c8128e2 Mon Sep 17 00:00:00 2001
From: Abishek Gopal <agopal@ucar.edu>
Date: Thu, 25 Sep 2025 13:06:05 -0600
Subject: [PATCH 146/146] Adding a dependency to mpas_timer.o in mpas_halo.o

---
 src/framework/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/framework/Makefile b/src/framework/Makefile
index 2d8e7dc92b..64d3d6faf4 100644
--- a/src/framework/Makefile
+++ b/src/framework/Makefile
@@ -110,7 +110,7 @@ mpas_c_interfacing.o:
 xml_stream_parser.o: xml_stream_parser.c
 	$(CC) $(CFLAGS) $(CPPFLAGS) $(CPPINCLUDES) -I../external/ezxml -c xml_stream_parser.c
 
-mpas_halo.o: mpas_derived_types.o mpas_pool_routines.o mpas_log.o
+mpas_halo.o: mpas_derived_types.o mpas_pool_routines.o mpas_log.o mpas_timer.o
 
 mpas_stream_inquiry.o : mpas_derived_types.o mpas_log.o mpas_c_interfacing.o