diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml
index 4281c40bba..9bfc99f5c8 100644
--- a/src/core_atmosphere/Registry.xml
+++ b/src/core_atmosphere/Registry.xml
@@ -146,6 +146,31 @@
description="Formulation of horizontal mixing"
possible_values="`2d_fixed' or `2d_smagorinsky'"/>
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -494,6 +528,11 @@
+
+
+
+
+
#ifdef MPAS_CAM_DYCORE
@@ -1561,6 +1600,20 @@
#endif
+
+
+
+
+
+
+
+
+
+
+
#endif
@@ -1921,8 +1977,20 @@
-
+
+
+
+
+
+
+
+
+
@@ -2024,6 +2092,9 @@
+
+
#endif
@@ -2102,6 +2173,10 @@
+
+
diff --git a/src/core_atmosphere/dynamics/Makefile b/src/core_atmosphere/dynamics/Makefile
index 6892633c68..1bf21fff9d 100644
--- a/src/core_atmosphere/dynamics/Makefile
+++ b/src/core_atmosphere/dynamics/Makefile
@@ -1,11 +1,12 @@
.SUFFIXES: .F .o
OBJS = mpas_atm_time_integration.o \
- mpas_atm_boundaries.o
+ mpas_atm_boundaries.o \
+ mpas_atm_dissipation_models.o
all: $(OBJS)
-mpas_atm_time_integration.o: mpas_atm_boundaries.o mpas_atm_iau.o
+mpas_atm_time_integration.o: mpas_atm_boundaries.o mpas_atm_iau.o mpas_atm_dissipation_models.o
mpas_atm_boundaries.o:
diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F
index 787e7719a1..6c19ed7931 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F
@@ -395,18 +395,14 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t
nullify(tend)
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1)
- MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]')
if (associated(tend)) then
- !$acc enter data copyin(tend)
else
call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1)
- !$acc enter data copyin(tend_scalars)
! Ensure the integer pointed to by idx_ptr is copied to the gpu device
call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr)
idx = idx_ptr
end if
- MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]')
!$acc parallel default(present)
if (associated(tend)) then
@@ -426,13 +422,6 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t
end if
!$acc end parallel
- MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]')
- if (associated(tend)) then
- !$acc exit data delete(tend)
- else
- !$acc exit data delete(tend_scalars)
- end if
- MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]')
end subroutine mpas_atm_get_bdy_tend
@@ -533,9 +522,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
! query the field as a scalar constituent
!
if (associated(tend) .and. associated(state)) then
- MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
- !$acc enter data copyin(tend, state)
- MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc parallel default(present)
!$acc loop gang vector collapse(2)
@@ -546,9 +532,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
end do
!$acc end parallel
- MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
- !$acc exit data delete(tend, state)
- MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
else
call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1)
call mpas_pool_get_array(lbc, 'lbc_scalars', state_scalars, 2)
@@ -556,10 +539,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
idx=idx_ptr ! Avoid non-array pointer for OpenACC
- MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
- !$acc enter data copyin(tend_scalars, state_scalars)
- MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
-
!$acc parallel default(present)
!$acc loop gang vector collapse(2)
do i=1, horizDim+1
@@ -569,9 +548,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
end do
!$acc end parallel
- MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
- !$acc exit data delete(tend_scalars, state_scalars)
- MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
end if
end subroutine mpas_atm_get_bdy_state_2d
@@ -652,10 +628,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim,
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1)
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), state, 2)
- MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
- !$acc enter data copyin(tend, state)
- MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
-
!$acc parallel default(present)
!$acc loop gang vector collapse(3)
do i=1, horizDim+1
@@ -667,10 +639,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim,
end do
!$acc end parallel
- MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
- !$acc exit data delete(tend, state)
- MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
-
end subroutine mpas_atm_get_bdy_state_3d
diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
new file mode 100644
index 0000000000..450ed045c1
--- /dev/null
+++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F
@@ -0,0 +1,1622 @@
+! Copyright (c) 2026, The University Corporation for Atmospheric Research (UCAR).
+!
+! Unless noted otherwise source code is licensed under the BSD license.
+! Additional copyright and license information can be found in the LICENSE file
+! distributed with this code, or at http://mpas-dev.github.com/license.html
+!
+
+#define COMMA ,
+#define DEBUG_WRITE(M) ! call mpas_log_write(M)
+
+module mpas_atm_dissipation_models
+
+ use mpas_kind_types, only : RKIND
+ use mpas_atmphys_constants
+ use mpas_constants
+ use mpas_log
+ use mpas_timekeeping, only : mpas_get_clock_time, mpas_get_time
+ use mpas_derived_types, only : MPAS_Clock_type, MPAS_Time_type, MPAS_NOW, MPAS_LOG_CRIT
+
+ logical, parameter :: les_test = .true., les_sas_test = .false.
+ !! real (kind=RKIND), parameter :: tke_heat_flux = 0.03 ! shear case from Moeng et al., first hour
+ ! real (kind=RKIND), parameter :: tke_heat_flux = 0.03
+ ! real (kind=RKIND), parameter :: tke_heat_flux = 0.0
+ !! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013 ! ocean roughness length
+ ! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.006
+ ! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0
+ real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
+ ! real (kind=RKIND), parameter :: c_k = 0.1
+ real (kind=RKIND), parameter :: c_k = 0.25
+
+ integer, parameter :: LES_INVALID_OPT = -1
+
+ integer, parameter :: LES_MODEL_NONE = 0, &
+ LES_MODEL_3D_SMAGORINSKY = 1, &
+ LES_MODEL_PROGNOSTIC_15_ORDER = 2
+
+ integer, parameter :: LES_SURFACE_NONE = 0, &
+ LES_SURFACE_SPECIFIED = 1, &
+ LES_SURFACE_VARYING = 2
+
+contains
+
+
+ !-----------------------------------------------------------------------
+ ! routine les_model_from_string
+ !
+ !> \brief Converts an LES model option from a string to an integer parameter
+ !> \author Michael Duda
+ !> \date 13 February 2026
+ !> \details
+ !> Given a string that contains the name of a valid LES model option, this
+ !> routine returns an integer parameter corresponding to that option.
+ !>
+ !> If the given string is not recognized as a valid LES model option, the
+ !> integer parameter LES_INVALID_OPT is returned.
+ !
+ !-----------------------------------------------------------------------
+ pure function les_model_from_string(les_model_str) result(les_model_opt)
+
+ implicit none
+
+ ! Arguments
+ character(len=*), intent(in) :: les_model_str
+
+ ! Return value
+ integer :: les_model_opt
+
+
+ if (trim(les_model_str) == 'none') then
+ les_model_opt = LES_MODEL_NONE
+ else if (trim(les_model_str) == '3d_smagorinsky') then
+ les_model_opt = LES_MODEL_3D_SMAGORINSKY
+ else if (trim(les_model_str) == 'prognostic_1.5_order') then
+ les_model_opt = LES_MODEL_PROGNOSTIC_15_ORDER
+ else
+ les_model_opt = LES_INVALID_OPT
+ end if
+
+ end function les_model_from_string
+
+
+ !-----------------------------------------------------------------------
+ ! routine les_surface_from_string
+ !
+ !> \brief Converts an LES surface option from a string to an integer parameter
+ !> \author Michael Duda
+ !> \date 13 February 2026
+ !> \details
+ !> Given a string that contains the name of a valid LES surface option, this
+ !> routine returns an integer parameter corresponding to that option.
+ !>
+ !> If the given string is not recognized as a valid LES surface option, the
+ !> integer parameter LES_INVALID_OPT is returned.
+ !
+ !-----------------------------------------------------------------------
+ pure function les_surface_from_string(les_surface_str) result(les_surface_opt)
+
+ implicit none
+
+ ! Arguments
+ character(len=*), intent(in) :: les_surface_str
+
+ ! Return value
+ integer :: les_surface_opt
+
+
+ if (trim(les_surface_str) == 'none') then
+ les_surface_opt = LES_SURFACE_NONE
+ else if (trim(les_surface_str) == 'specified') then
+ les_surface_opt = LES_SURFACE_SPECIFIED
+ else if (trim(les_surface_str) == 'varying') then
+ les_surface_opt = LES_SURFACE_VARYING
+ else
+ les_surface_opt = LES_INVALID_OPT
+ end if
+
+ end function les_surface_from_string
+
+
+ subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp, &
+ deformation_coef_c2, deformation_coef_s2, deformation_coef_cs, &
+ invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4, &
+ cellStart, cellEnd, nEdgesOnCell, edgesOnCell, &
+ nCells, nEdges )
+
+ use mpas_atm_dimensions ! pull nVertLevels and maxEdges from here
+
+ implicit none
+
+ integer, intent(in) :: cellStart, cellEnd, nCells, nEdges
+ real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
+ real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v
+ real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_c2
+ real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_s2
+ real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_cs
+ real (kind=RKIND), intent(in) :: c_s, config_len_disp, invDt, config_visc4_2dsmag
+ integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+ integer, dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell
+
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: kdiff
+ real (kind=RKIND), intent(out) :: h_mom_eddy_visc4, h_theta_eddy_visc4
+
+ ! local variables
+
+ integer :: iCell, iEdge, k
+ real (kind=RKIND), dimension(nVertLevels) :: d_11, d_22, d_12, dudx, dudy, dvdx, dvdy
+
+
+ DEBUG_WRITE(' begin smagorinsky_2d ')
+
+ !$acc enter data create(dudx, dudy, dvdx, dvdy)
+ !$acc enter data create(d_11, d_22, d_12)
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker private(dudx, dudy, dvdx, dvdy, d_11, d_22, d_12)
+ do iCell = cellStart,cellEnd
+
+ !$acc loop vector
+ do k = 1, nVertLevels
+ dudx(k) = 0.0_RKIND
+ dudy(k) = 0.0_RKIND
+ dvdx(k) = 0.0_RKIND
+ dvdy(k) = 0.0_RKIND
+ end do
+
+ !$acc loop seq
+ do iEdge=1,nEdgesOnCell(iCell)
+ !$acc loop vector
+ do k=1,nVertLevels
+ dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+ - deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+ dudy(k) = dudy(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+ - deformation_coef_s2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+ dvdx(k) = dvdx(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+ + deformation_coef_c2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+ dvdy(k) = dvdy(k) + deformation_coef_s2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
+ + deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+ end do
+ end do
+
+!DIR$ IVDEP
+ !$acc loop vector
+ do k=1, nVertLevels
+ ! here is the Smagorinsky formulation,
+ ! followed by imposition of an upper bound on the eddy viscosity
+ d_11(k) = 2*dudx(k)
+ d_22(k) = 2*dvdy(k)
+ d_12(k) = dudy(k) + dvdx(k)
+ kdiff(k,iCell) = (c_s * config_len_disp)**2 * sqrt(0.25*(d_11(k)-d_22(k))**2 + d_12(k)**2)
+ kdiff(k,iCell) = min(kdiff(k,iCell),(0.01*config_len_disp**2) * invDt)
+ end do
+ end do
+
+ !$acc end parallel
+
+ !$acc exit data delete(dudx, dudy, dvdx, dvdy)
+ !$acc exit data delete(d_11, d_22, d_12)
+
+ h_mom_eddy_visc4 = config_visc4_2dsmag * config_len_disp**3
+ h_theta_eddy_visc4 = h_mom_eddy_visc4
+
+ DEBUG_WRITE(' exiting smagorinsky_2d ')
+
+ end subroutine smagorinsky_2d
+
+!---------------------------------------
+
+ subroutine les_models( les_model_opt, les_surface_opt, dynamics_substep, eddy_visc_horz, eddy_visc_vert, &
+ u, v, uCell, vCell, &
+ w, c_s, bv_freq2, zgrid, config_len_disp, &
+ deformation_coef_c2, deformation_coef_s2, deformation_coef_cs, &
+ deformation_coef_c, deformation_coef_s, prandtl_3d_inv, &
+ invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4, &
+ scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2, &
+ cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge, &
+ nCells, nEdges, nVertLevels, maxEdges, num_scalars )
+
+ implicit none
+
+ integer, intent(in) :: les_model_opt
+ integer, intent(in) :: les_surface_opt
+
+ integer, intent(in) :: cellStart, cellEnd, nCells, nEdges, nVertLevels, maxEdges, index_tke, num_scalars
+ integer, intent(in) :: dynamics_substep
+ real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
+ real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: uCell
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: vCell
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: bv_freq2
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: prandtl_3d_inv
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+ real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(inout) :: scalars
+ real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1) :: tend_scalars
+ real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w
+ real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
+ real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_c2
+ real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_s2
+ real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_cs
+ real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_c
+ real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_s
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
+ real (kind=RKIND), intent(in) :: c_s, config_len_disp, invDt, config_visc4_2dsmag
+ integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+ integer, dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell
+ integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: eddy_visc_horz, eddy_visc_vert
+ real (kind=RKIND), intent(out) :: h_mom_eddy_visc4, h_theta_eddy_visc4
+
+ ! local variables
+
+ integer :: iCell, iEdge, k, ie, cell1, cell2
+ real (kind=RKIND), dimension(nVertLevels) :: d_11, d_22, d_33, d_12, d_13, d_23
+ real (kind=RKIND), dimension(nVertLevels) :: dudx, dudy, dvdx, dvdy
+ real (kind=RKIND), dimension(nVertLevels+1) :: dwdx, dwdy
+ real (kind=RKIND), dimension(nVertLevels) :: dudz, dvdz, dwdz
+ real (kind=RKIND) :: rdz, def2, pr_inv, wk
+ real (kind=RKIND) :: shear_production, buoyancy, dissipation, delta_z, delta_s, bv, tke_length, diss_length
+ real (kind=RKIND) :: l_horizontal, l_vertical, c_dissipation
+ real (kind=RKIND) :: prandtl_horizontal_inv
+ real (kind=RKIND) :: eddy_visc_h, eddy_visc_v
+
+ logical, parameter :: test_tke=.true.
+ ! real (kind=RKIND), parameter :: epsilon_bv = 1.e-06
+
+
+ !$acc enter data create(dudx, dudy, dvdx, dvdy, dwdx, dwdy, dudz, dvdz, dwdz)
+ !$acc enter data create(d_11, d_22, d_33, d_12, d_13, d_23)
+
+ pr_inv = 1./prandtl
+
+ ! set up coefficients for 4th-order horizontal background filter
+
+ h_mom_eddy_visc4 = config_visc4_2dsmag * config_len_disp**3
+ h_theta_eddy_visc4 = h_mom_eddy_visc4
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker private(dudx, dudy, dvdx, dvdy, dwdx, dwdy, dudz, dvdz, dwdz, d_11, d_22, d_33, d_12, d_13, d_23)
+ do iCell = cellStart,cellEnd
+
+ !$acc loop vector
+ do k = 1, nVertLevels
+ dudx(k) = 0.0_RKIND
+ dudy(k) = 0.0_RKIND
+ dvdx(k) = 0.0_RKIND
+ dvdy(k) = 0.0_RKIND
+
+ dudz(k) = 0.0_RKIND
+ dvdz(k) = 0.0_RKIND
+ dwdz(k) = 0.0_RKIND
+ end do
+
+ !$acc loop vector
+ do k = 1, nVertLevels+1
+ dwdx(k) = 0.0_RKIND
+ dwdy(k) = 0.0_RKIND
+ end do
+
+ !$acc loop seq
+ do iEdge=1,nEdgesOnCell(iCell)
+
+ ie = EdgesOnCell(iEdge,iCell)
+ cell1 = cellsOnEdge(1,ie)
+ cell2 = cellsOnEdge(2,ie)
+
+ !$acc loop vector
+ do k=1,nVertLevels
+ dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,ie) &
+ - deformation_coef_cs(iEdge,iCell)*v(k,ie)
+ dudy(k) = dudy(k) + deformation_coef_cs(iEdge,iCell)*u(k,ie) &
+ - deformation_coef_s2(iEdge,iCell)*v(k,ie)
+ dvdx(k) = dvdx(k) + deformation_coef_cs(iEdge,iCell)*u(k,ie) &
+ + deformation_coef_c2(iEdge,iCell)*v(k,ie)
+ dvdy(k) = dvdy(k) + deformation_coef_s2(iEdge,iCell)*u(k,ie) &
+ + deformation_coef_cs(iEdge,iCell)*v(k,ie)
+ end do
+
+ !$acc loop vector
+ do k=1,nVertLevels+1
+ wk = 0.5*(w(k,cell1)+w(k,cell2))
+ dwdx(k) = dwdx(k) + deformation_coef_c(iEdge,iCell)*wk
+ dwdy(k) = dwdy(k) + deformation_coef_s(iEdge,iCell)*wk
+ end do
+
+ end do
+
+ !$acc loop vector
+ do k=1,nVertLevels
+ rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
+ dwdz(k) = (w(k+1,iCell)-w(k,iCell))*rdz
+ end do
+
+ !$acc loop vector
+ do k=2,nVertLevels-1
+ rdz = 1./(zgrid(k+2,iCell)+zgrid(k+1,iCell)-zgrid(k,iCell)-zgrid(k-1,iCell))
+ dudz(k) = (uCell(k+1,iCell)-uCell(k-1,iCell))*rdz
+ dvdz(k) = (vCell(k+1,iCell)-vCell(k-1,iCell))*rdz
+ end do
+
+ k = 1
+ rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
+ dudz(k) = (uCell(k+1,iCell)-uCell(k,iCell))*rdz
+ dvdz(k) = (vCell(k+1,iCell)-vCell(k,iCell))*rdz
+
+ k = nVertLevels-1
+ rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell))
+ dudz(k+1) = (uCell(k+1,iCell)-uCell(k,iCell))*rdz
+ dvdz(k+1) = (vCell(k+1,iCell)-vCell(k,iCell))*rdz
+
+ !$acc loop vector
+ do k=1, nVertLevels
+ d_11(k) = 2.*dudx(k)
+ d_22(k) = 2.*dvdy(k)
+ d_33(k) = 2.*dwdz(k)
+ d_12(k) = dudy(k) + dvdx(k)
+ d_13(k) = dwdx(k) + dudz(k)
+ d_23(k) = dwdy(k) + dvdz(k)
+ end do
+
+ if (les_model_opt == LES_MODEL_3D_SMAGORINSKY) then
+
+ !$acc loop vector
+ do k=1, nVertLevels
+ def2 = 0.5*(d_11(k)**2 + d_22(k)**2 + d_33(k)**2) + d_12(k)**2 + d_13(k)**2 + d_23(k)**2
+ eddy_visc_horz(k,iCell) = (c_s * config_len_disp)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
+ eddy_visc_horz(k,iCell) = min(eddy_visc_horz(k,iCell),(0.01*config_len_disp**2) * invDt)
+ delta_z = zgrid(k+1,iCell)-zgrid(k,iCell)
+ eddy_visc_vert(k,iCell) = (c_s * delta_z)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell)))
+ ! eddy_visc_vert(k,iCell) = eddy_visc_horz(k,iCell)
+ end do
+
+ else if (les_model_opt == LES_MODEL_PROGNOSTIC_15_ORDER) then
+
+ !$acc loop vector
+ do k=1,nVertLevels ! bound the tke here, currently hardwired
+ ! scalars(index_tke,k,iCell) = max(0.,min(100.,scalars(index_tke,k,iCell)))
+ scalars(index_tke,k,iCell) = max(0.,scalars(index_tke,k,iCell))
+ end do
+
+ !$acc loop vector
+ do k=1,nVertLevels
+
+ delta_z = zgrid(k+1,iCell)-zgrid(k,iCell)
+ delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
+ bv = max( sqrt(abs(bv_freq2(k,iCell))), epsilon_bv )
+ tke_length = delta_s
+ ! isentropic mixing formulation
+ if(bv_freq2(k,iCell) .gt. 1.e-06) &
+ tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/bv
+ tke_length = min(tke_length, delta_z)
+ diss_length = min(delta_s,max(tke_length,0.01*delta_s))
+ if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
+
+ ! non-isotropic mixing
+
+ l_horizontal = config_len_disp
+ l_vertical = min(delta_z,tke_length)
+ if(bv_freq2(k,iCell) <= 0) diss_length = delta_z
+
+ ! isotropic mixing
+
+ ! l_horizontal = min(delta_s,tke_length)
+ ! if(bv_freq2(k,iCell) <= 0) diss_length = delta_s
+ ! l_vertical = l_horizontal
+
+ ! eddy viscocities set here if we are running the 1.5 order prognostic tke scheme
+ eddy_visc_h = c_k*l_horizontal*sqrt(scalars(index_tke,k,iCell))
+ eddy_visc_h = min(eddy_visc_h,(0.01*config_len_disp**2) * invDt)
+ eddy_visc_v = c_k*l_vertical*sqrt(scalars(index_tke,k,iCell))
+ eddy_visc_v = min(eddy_visc_v,(0.01*delta_z**2) * invDt)
+
+ eddy_visc_horz(k,iCell) = eddy_visc_h
+ eddy_visc_vert(k,iCell) = eddy_visc_v
+
+ ! terms for the prognostic tke integration
+
+ shear_production = eddy_visc_h*(d_11(k)**2 + d_22(k)**2 + d_12(k)**2) &
+ +eddy_visc_v*(d_33(k)**2 + d_13(k)**2 + d_23(k)**2)
+
+ buoyancy = -eddy_visc_v*bv_freq2(k,iCell)
+
+ ! dissipation
+
+ c_dissipation = 1.9*c_k + max( 0.0, 0.93 - 1.9*c_k )*diss_length/delta_s
+ ! if( (k.eq. 1) .or. (k.eq.nVertLevels) ) c_dissipation = 3.9
+
+ dissipation = -c_dissipation*(scalars(index_tke,k,iCell)**(1.5))/diss_length
+
+ ! computing eddy viscosities *********
+
+ prandtl_horizontal_inv = 3.
+ prandtl_3d_inv(k,iCell) = 1.0+(2.0*l_vertical/delta_z)
+
+
+ ! RHS term for the subgrid ke.
+
+ if(dynamics_substep == 1) &
+ tend_scalars(index_tke,k,iCell) = rho_zz(k,iCell)*( shear_production + buoyancy + dissipation )
+
+ end do
+
+ else
+
+!MGD call mpas_log_write(' in les_models, no les scheme for '//trim(config_les_model), messageType=MPAS_LOG_CRIT)
+
+ end if ! end of les_model_opt test
+
+ end do ! loop over all owned cells (columns)
+
+ !$acc end parallel
+
+ !$acc exit data delete(dudx, dudy, dvdx, dvdy, dwdx, dwdy, dudz, dvdz, dwdz)
+ !$acc exit data delete(d_11, d_22, d_33, d_12, d_13, d_23)
+
+ DEBUG_WRITE(' les_models ')
+
+ end subroutine les_models
+
+!---------------------------------------
+
+ subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
+ cellStart, cellEnd, nCells)
+
+ use mpas_atm_dimensions ! pull nVertLevels and num_scalars from here
+
+ integer, intent(in) :: cellStart, cellEnd, nCells
+ integer, intent(in) :: index_qv, index_qc
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: bn2
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m, exner, pressure_b, pp, qtot
+ real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
+ real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
+! local
+ integer :: iCell, k
+ real (kind=RKIND) :: dz, rdz, esw, p
+ real (kind=RKIND), parameter :: qc_cr = 0.00001 ! in kg/kg
+ real (kind=RKIND), dimension(nVertLevels) :: theta, qvsw, temp, coefa
+ logical :: dry_bv_frequency
+
+
+ DEBUG_WRITE(' begin BV frequency calculations ')
+
+ !$acc enter data create(theta, temp, qvsw, coefa)
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker private(theta, temp, qvsw, coefa)
+ do iCell = cellStart,cellEnd
+
+!DIR$ IVDEP
+ !$acc loop vector
+ do k=1, nVertLevels
+
+ theta(k) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell))
+
+ temp(k) = exner(k,iCell) * theta(k)
+
+ p = pressure_b(k,iCell) + pp(k,iCell)
+ esw = 1000. * svp1 * exp(svp2 * (temp(k) - svpt0) / (temp(k) - svp3))
+ if (p < esw) esw = p * 0.99 ! fix for pressure < esw
+ qvsw(k) = ep_2 * esw / (p - esw)
+
+ coefa(k) = ( 1.0 + xlv * qvsw(k)/ R_d / temp(k) ) / &
+ ( 1.0 + xlv * xlv *qvsw(k) / Cp / R_v / temp(k) / temp(k) )
+
+ end do
+
+ !$acc loop vector
+ do k=2, nVertLevels-1
+ dz = 0.5 * (zgrid(k+2,iCell)+zgrid(k+1,iCell)) - 0.5 * (zgrid(k,iCell)+zgrid(k-1,iCell))
+ rdz = 1.0/dz
+
+ ! if ( scalars(index_qc,k,iCell) < qc_cr ) then
+ ! ! Dry Brunt-Vaisala frequency
+ ! bn2(k,iCell) = gravity * ((theta(k+1) - theta(k-1) ) / theta(k) / dz &
+ ! + rvord * (scalars(index_qv,k+1,iCell) - scalars(index_qv,k-1,iCell)) / dz &
+ ! - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
+ ! else
+ ! ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
+ ! bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) / dz &
+ ! + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) / dz ) &
+ ! - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz )
+ ! endif
+
+ dry_bv_frequency = .true.
+ if(index_qc .gt. 0) then ! if moist simulation, qc exists
+ if ( scalars(index_qc,k,iCell) .ge. qc_cr ) dry_bv_frequency = .false.
+ end if
+
+ if (dry_bv_frequency) then
+ ! Dry Brunt-Vaisala frequency
+ bn2(k,iCell) = gravity * ((theta(k+1) - theta(k-1) ) / theta(k) * rdz &
+ + rvord * (scalars(index_qv,k+1,iCell) - scalars(index_qv,k-1,iCell)) * rdz &
+ - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) * rdz )
+ else
+ ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36
+ bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) * rdz &
+ + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) * rdz ) &
+ - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) * rdz )
+ endif
+
+ end do
+
+ bn2(1,iCell) = bn2(2,iCell)
+ bn2(nVertLevels,iCell) = bn2(nVertLevels-1,iCell)
+
+ end do
+
+ !$acc end parallel
+
+ !$acc exit data delete(theta, temp, qvsw, coefa)
+
+ DEBUG_WRITE(' exiting BV frequency calculations ')
+
+ end subroutine calculate_n2
+
+!---------------------------------------
+
+ subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd, &
+ cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree, &
+ cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex, &
+ nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign, &
+ invAreaCell, invAreaTriangle, invDvEdge, invDcEdge, &
+ angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4, &
+ config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2, &
+ config_del4u_div_factor, zgrid, &
+ eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw, &
+ fzm, fzp, les_model_opt, les_surface_opt, &
+ config_surface_drag_coefficient, &
+ delsq_u, delsq_vorticity, delsq_divergence, &
+ u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, &
+ tend_u_euler )
+
+ use mpas_atm_dimensions ! pull nVertLevels and maxEdges from here
+
+ implicit none
+
+ integer, intent(in) :: edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd
+ integer, intent(in) :: vertexStart, vertexEnd, vertexDegree
+ integer, intent(in) :: cellStart, cellEnd
+ integer, intent(in) :: nCells, nEdges, nVertices
+ logical, intent(in) :: config_mix_full
+
+ integer, intent(in) :: les_model_opt
+ integer, intent(in) :: les_surface_opt
+
+ integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+ integer, dimension(2,nEdges+1), intent(in) :: verticesOnEdge
+ integer, dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell
+ integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+ integer, dimension(vertexDegree,nVertices+1), intent(in) :: edgesOnVertex
+
+ real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
+ real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
+ real (kind=RKIND), intent(in) :: config_del4u_div_factor
+ real (kind=RKIND), intent(in) :: config_surface_drag_coefficient
+
+ real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
+ real (kind=RKIND), dimension(vertexDegree,nVertices+1), intent(in) :: edgesOnVertex_sign
+ real (kind=RKIND), dimension(nVertices+1), intent(in) :: invAreaTriangle
+ real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDvEdge
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: angleEdge
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: dcEdge
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
+ real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid
+
+ real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u
+ real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: divergence
+ real (kind=RKIND), dimension(nVertLevels,nVertices+1), intent(in) :: vorticity
+ real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+ real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
+ real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
+ real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzm
+ real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzp
+
+
+ ! scratch space from calling routine
+ real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: delsq_u
+ real (kind=RKIND), dimension(nVertLevels,nVertices+1) :: delsq_vorticity
+ real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_divergence
+
+ real (kind=RKIND), dimension(nVertLevels), intent(in) :: u_init, v_init
+ real (kind=RKIND), dimension(:), intent(in) :: ustm
+
+ real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(out) :: tend_u_euler
+
+ ! local variables
+
+ integer :: iEdge, cell1, cell2, vertex1, vertex2, iVertex, iCell, i, k
+ real (kind=RKIND) :: r_dc, r_dv, u_diffusion, u_diffusion_les, kdiffu, r, edge_sign, u_mix_scale
+ real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
+ real (kind=RKIND), dimension(nVertLevels) :: u_mix
+
+ real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
+ real (kind=RKIND) :: rho_k_cell1, rho_k_cell2, rho_k_at_w
+ real (kind=RKIND) :: zz_cell1, zz_cell2, zz_at_w
+ real (kind=RKIND) :: ust_edge
+
+ real (kind=RKIND) :: velocity_magnitude
+ real (kind=RKIND) :: tau_12_factor
+
+
+ DEBUG_WRITE(' begin u_dissipation_3d ')
+ DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_mom_eddy_visc4/))
+ DEBUG_WRITE(' 4th order divergence factor is $r ' COMMA realArgs=(/config_del4u_div_factor/))
+
+!$OMP BARRIER
+
+ ! del^4 horizontal filter. We compute this as del^2 ( del^2 (u) ).
+ ! First, storage to hold the result from the first del^2 computation.
+
+ !$acc enter data create(u_mix)
+ !$acc enter data create(turb_vflux)
+
+ !$acc parallel default(present)
+
+ tau_12_factor = 0.0
+ if(les_model_opt /= LES_MODEL_NONE) tau_12_factor = 1.0
+
+ !$acc loop gang worker
+ do iEdge=edgeStart,edgeEnd
+ cell1 = cellsOnEdge(1,iEdge)
+ cell2 = cellsOnEdge(2,iEdge)
+ vertex1 = verticesOnEdge(1,iEdge)
+ vertex2 = verticesOnEdge(2,iEdge)
+ r_dc = invDcEdge(iEdge)
+ r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
+
+ !$acc loop vector
+ do k = 1, nVertLevels
+ delsq_u(k,iEdge) = 0.0_RKIND
+ end do
+
+!DIR$ IVDEP
+ !$acc loop vector
+ do k=1,nVertLevels
+
+ ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
+ ! only valid for h_mom_eddy_visc4 == constant
+ u_diffusion = ( divergence(k,cell2) - divergence(k,cell1) ) * r_dc &
+ -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv
+ ! for LES models we need 2 times the gradient of divergence, in contrast to what is
+ ! saved and used to calculate the 4th-order horizontal filter
+ u_diffusion_les = u_diffusion + tau_12_factor * ( divergence(k,cell2) - divergence(k,cell1) ) * r_dc
+
+ delsq_u(k,iEdge) = delsq_u(k,iEdge) + u_diffusion
+
+ kdiffu = 0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2))
+
+ ! include 2nd-order diffusion here
+ tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) &
+ + rho_edge(k,iEdge)* kdiffu * u_diffusion_les * meshScalingDel2(iEdge)
+
+ end do
+ end do
+
+ !$acc end parallel
+
+ if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active
+
+!$OMP BARRIER
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker
+ do iVertex=vertexStart,vertexEnd
+ !$acc loop vector
+ do k = 1, nVertLevels
+ delsq_vorticity(k,iVertex) = 0.0_RKIND
+ end do
+
+ !$acc loop seq
+ do i=1,vertexDegree
+ iEdge = edgesOnVertex(i,iVertex)
+ edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge) * edgesOnVertex_sign(i,iVertex)
+
+ !$acc loop vector
+ do k=1,nVertLevels
+ delsq_vorticity(k,iVertex) = delsq_vorticity(k,iVertex) + edge_sign * delsq_u(k,iEdge)
+ end do
+ end do
+ end do
+
+ !$acc loop gang worker
+ do iCell=cellStart,cellEnd
+ !$acc loop vector
+ do k = 1, nVertLevels
+ delsq_divergence(k,iCell) = 0.0_RKIND
+ end do
+
+ r = invAreaCell(iCell)
+
+ !$acc loop seq
+ do i=1,nEdgesOnCell(iCell)
+ iEdge = edgesOnCell(i,iCell)
+ edge_sign = r * dvEdge(iEdge) * edgesOnCell_sign(i,iCell)
+
+ !$acc loop vector
+ do k=1,nVertLevels
+ delsq_divergence(k,iCell) = delsq_divergence(k,iCell) + edge_sign * delsq_u(k,iEdge)
+ end do
+ end do
+ end do
+
+ !$acc end parallel
+
+!$OMP BARRIER
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker
+ do iEdge=edgeSolveStart,edgeSolveEnd
+ cell1 = cellsOnEdge(1,iEdge)
+ cell2 = cellsOnEdge(2,iEdge)
+ vertex1 = verticesOnEdge(1,iEdge)
+ vertex2 = verticesOnEdge(2,iEdge)
+
+ u_mix_scale = meshScalingDel4(iEdge)*h_mom_eddy_visc4
+ r_dc = u_mix_scale * config_del4u_div_factor * invDcEdge(iEdge)
+ r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
+
+!DIR$ IVDEP
+ !$acc loop vector
+ do k=1,nVertLevels
+
+ ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
+ ! only valid for h_mom_eddy_visc4 == constant
+ !
+ ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor
+ ! relative to the rotational part. The stability constraint on the divergence component is much less
+ ! stringent than the rotational part, and this flexibility may be useful.
+ !
+ u_diffusion = rho_edge(k,iEdge) * ( ( delsq_divergence(k,cell2) - delsq_divergence(k,cell1) ) * r_dc &
+ -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) * r_dv )
+ tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - u_diffusion
+
+ end do
+ end do
+
+ !$acc end parallel
+
+ end if ! 4th order mixing is active
+
+ !
+ ! vertical mixing for u - 2nd order filter in physical (z) space
+ !
+ if ( v_mom_eddy_visc2 > 0.0 ) then
+
+ if (config_mix_full) then ! mix full state
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker
+ do iEdge=edgeSolveStart,edgeSolveEnd
+
+ cell1 = cellsOnEdge(1,iEdge)
+ cell2 = cellsOnEdge(2,iEdge)
+
+ !$acc loop vector
+ do k=2,nVertLevels-1
+
+ z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
+ z2 = 0.5*(zgrid(k ,cell1)+zgrid(k ,cell2))
+ z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
+ z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
+
+ zm = 0.5*(z1+z2)
+ z0 = 0.5*(z2+z3)
+ zp = 0.5*(z3+z4)
+
+ tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*( &
+ (u(k+1,iEdge)-u(k ,iEdge))/(zp-z0) &
+ -(u(k ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm))
+ end do
+ end do
+
+ !$acc end parallel
+
+ else ! idealized cases where we mix on the perturbation from the initial 1-D state
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker private(u_mix)
+ do iEdge=edgeSolveStart,edgeSolveEnd
+
+ cell1 = cellsOnEdge(1,iEdge)
+ cell2 = cellsOnEdge(2,iEdge)
+
+ !$acc loop vector
+ do k=1,nVertLevels
+ u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) &
+ + v_init(k) * sin( angleEdge(iEdge) )
+ end do
+
+ !$acc loop vector
+ do k=2,nVertLevels-1
+
+ z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
+ z2 = 0.5*(zgrid(k ,cell1)+zgrid(k ,cell2))
+ z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
+ z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
+
+ zm = 0.5*(z1+z2)
+ z0 = 0.5*(z2+z3)
+ zp = 0.5*(z3+z4)
+
+ tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*( &
+ (u_mix(k+1)-u_mix(k ))/(zp-z0) &
+ -(u_mix(k )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm))
+ end do
+ end do
+
+ !$acc end parallel
+
+ end if ! mix perturbation state
+
+ end if ! vertical mixing of horizontal momentum for les formulation
+
+ if ( les_model_opt /= LES_MODEL_NONE ) then
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker private(turb_vflux)
+ do iEdge=edgeSolveStart,edgeSolveEnd
+
+ cell1 = cellsOnEdge(1,iEdge)
+ cell2 = cellsOnEdge(2,iEdge)
+ turb_vflux(nVertlevels+1) = 0.0_RKIND ! no turbulent flux out of the domain
+ turb_vflux(1) = 0.0_RKIND ! lower bc flux handled where ???
+
+ !$acc loop vector
+ do k=2,nVertLevels
+ rho_k_cell1 = fzm(k)*rho_zz(k ,cell1)*zz(k ,cell1)*eddy_visc_vert(k ,cell1) &
+ +fzp(k)*rho_zz(k-1,cell1)*zz(k-1,cell1)*eddy_visc_vert(k-1,cell1)
+ rho_k_cell2 = fzm(k)*rho_zz(k ,cell2)*zz(k ,cell2)*eddy_visc_vert(k ,cell2) &
+ +fzp(k)*rho_zz(k-1,cell2)*zz(k-1,cell2)*eddy_visc_vert(k-1,cell2)
+ rho_k_at_w = 0.5*(rho_k_cell1+rho_k_cell2)
+
+ zz_cell1 = fzm(k)*zz(k,cell1)+fzp(k)*zz(k-1,cell1)
+ zz_cell2 = fzm(k)*zz(k,cell2)+fzp(k)*zz(k-1,cell2)
+ zz_at_w = 0.5*(zz_cell1+zz_cell2)
+ turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(u(k,iEdge)-u(k-1,iEdge))
+ end do
+
+ if( les_surface_opt == LES_SURFACE_SPECIFIED ) then
+ velocity_magnitude = sqrt(u(1,iEdge)**2 + v(1,iEdge)**2)
+ turb_vflux(1) = -rho_edge(1,iEdge)*config_surface_drag_coefficient*u(1,iEdge)*velocity_magnitude
+ turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+ else if ( les_surface_opt == LES_SURFACE_VARYING ) then
+ ust_edge = 0.5*(ustm(cell1) + ustm(cell2))
+ velocity_magnitude = max(sqrt(u(1,iEdge)**2 + v(1,iEdge)**2),0.1)
+ turb_vflux(1) = -rho_edge(1,iEdge)*ust_edge*ust_edge*(u(1,iEdge)/velocity_magnitude)
+ turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+ ! end test conditions
+ else
+ ! test conditions for supercell case
+ turb_vflux(1) = turb_vflux(2)
+ turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+ ! end test conditions
+ end if
+
+ !$acc loop vector
+ do k=1,nVertLevels
+ tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
+ end do
+
+ end do
+
+ !$acc end parallel
+
+ end if
+
+ !$acc exit data delete(turb_vflux)
+ !$acc exit data delete(u_mix)
+
+ DEBUG_WRITE(' exiting u_dissipation_3d ')
+
+ end subroutine u_dissipation_3d
+
+!------------------------
+
+ subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd, &
+ nCells, nEdges, &
+ nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+ invAreaCell, invDcEdge, dvEdge, &
+ meshScalingDel2, meshScalingDel4, &
+ rdzw, rdzu, &
+ v_mom_eddy_visc2, h_mom_eddy_visc4, &
+ delsq_w, &
+ w, rho_edge, rho_zz, divergence, zz, &
+ eddy_visc_horz, eddy_visc_vert, &
+ les_model_opt, les_surface_opt, &
+ tend_w_euler )
+
+
+ ! 3D w dissipation using the 3D smagorinsky eddy viscosities.
+ ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter
+
+ use mpas_atm_dimensions ! pull nVertLevels and maxEdges from here
+
+ implicit none
+
+ integer, intent(in) :: cellStart, cellEnd
+ integer, intent(in) :: cellSolveStart, cellSolveEnd
+ integer, intent(in) :: nCells, nEdges
+
+ integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+ integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
+
+ integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+
+ integer, intent(in) :: les_model_opt
+ integer, intent(in) :: les_surface_opt
+
+ real (kind=RKIND), intent(in) :: h_mom_eddy_visc4
+ real (kind=RKIND), intent(in) :: v_mom_eddy_visc2
+
+ real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
+ real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
+ real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
+ real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
+
+ real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: divergence
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz
+ real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
+
+ real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(inout) :: tend_w_euler
+
+ ! storage passed in from calling routine
+ real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_w
+ real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux
+
+ ! local variables
+
+ integer :: cell1, cell2, iEdge, iCell, i, k
+ real (kind=RKIND) :: r_areaCell, edge_sign, w_turb_flux
+
+
+! !OMP BARRIER why is this openmp barrier here???
+
+ ! del^4 horizontal filter. We compute this as del^2 ( del^2 (w) ).
+ !
+ ! First, storage to hold the result from the first del^2 computation.
+ ! we copied code from the theta mixing, hence the theta* names.
+
+
+ DEBUG_WRITE(' begin w_dissipation_3d ')
+ DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_mom_eddy_visc4/))
+
+ !$acc enter data create(turb_vflux)
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker
+ do iCell=cellStart,cellEnd
+
+ !$acc loop vector
+ do k = 1, nVertLevels
+ delsq_w(k,iCell) = 0.0_RKIND
+ end do
+
+ !$acc loop vector
+ do k = 1, nVertLevels+1
+ tend_w_euler(k,iCell) = 0.0_RKIND
+ end do
+
+ r_areaCell = invAreaCell(iCell)
+
+ !$acc loop seq
+ do i=1,nEdgesOnCell(iCell)
+ iEdge = edgesOnCell(i,iCell)
+
+ edge_sign = 0.5 * r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+
+ cell1 = cellsOnEdge(1,iEdge)
+ cell2 = cellsOnEdge(2,iEdge)
+
+!DIR$ IVDEP
+ !$acc loop vector
+ do k=2,nVertLevels
+
+ w_turb_flux = edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
+ delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux
+ w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * &
+ ( eddy_visc_horz(k ,cell1)+eddy_visc_horz(k ,cell2) &
+ +eddy_visc_horz(k-1,cell1)+eddy_visc_horz(k-1,cell2) )
+ tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux
+ end do
+ end do
+ end do
+
+ !$acc end parallel
+
+!$OMP BARRIER
+
+ if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker
+ do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before...
+
+ r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell)
+
+ !$acc loop seq
+ do i=1,nEdgesOnCell(iCell)
+ iEdge = edgesOnCell(i,iCell)
+ cell1 = cellsOnEdge(1,iEdge)
+ cell2 = cellsOnEdge(2,iEdge)
+
+ edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge)
+
+ !$acc loop vector
+ do k=2,nVertLevels
+ tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1))
+ end do
+
+ end do
+ end do
+
+ !$acc end parallel
+
+ end if ! 4th order mixing is active
+
+ if ( v_mom_eddy_visc2 > 0.0 ) then ! vertical mixing
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker
+ do iCell=cellSolveStart,cellSolveEnd
+!DIR$ IVDEP
+ !$acc loop vector
+ do k=2,nVertLevels
+ tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*( &
+ (w(k+1,iCell)-w(k ,iCell))*rdzw(k) &
+ -(w(k ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k)
+ end do
+ end do
+
+ !$acc end parallel
+
+ end if
+
+ if ( les_model_opt /= LES_MODEL_NONE ) then
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker private(turb_vflux)
+ do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
+ ! compute turbulent fluxes
+
+ !$acc loop vector
+ do k=1,nVertLevels
+ turb_vflux(k) = - rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*( &
+ 2.0*zz(k,iCell)*rdzw(k)*(w(k+1,iCell)-w(k,iCell)) &
+ + divergence(k,iCell) )
+ end do
+
+ turb_vflux(nVertLevels+1) = 0.0
+
+ !$acc loop vector
+ do k=2,nVertLevels
+ tend_w_euler(k,iCell) = tend_w_euler(k,iCell) &
+ - rdzu(k)*(turb_vflux(k)-turb_vflux(k-1))
+ end do
+ end do
+
+ !$acc end parallel
+
+ end if
+
+ !$acc exit data delete(turb_vflux)
+
+ DEBUG_WRITE(' exiting w_dissipation_3d ')
+
+ end subroutine w_dissipation_3d
+
+!-----------------------------------------------------
+
+ subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSolveEnd, &
+ nCells, nEdges, &
+ nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+ invAreaCell, invDcEdge, dvEdge, &
+ meshScalingDel2, meshScalingDel4, &
+ config_mix_full, t_init, zgrid, &
+ rdzw, rdzu, fzm, fzp, &
+ v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv, &
+ prandtl_3d_inv, &
+ delsq_theta, &
+ theta_m, rho_edge, rho_zz, zz, &
+ eddy_visc_horz, eddy_visc_vert, &
+ bv_freq2, config_len_disp, scalars, tend_scalars, &
+ index_tke, index_qv, num_scalars_dummy, mix_scalars, &
+ les_model_opt, les_surface_opt, clock, dt, &
+ config_surface_heat_flux, config_surface_moisture_flux, &
+ uReconstructZonal, uReconstructMeridional, &
+ hfx, qfx, &
+ tend_theta_euler, dynamics_substep )
+
+
+ ! 3D theta_m dissipation using the 3D smagorinsky eddy viscosities.
+ ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter
+
+ use mpas_atm_dimensions ! pull nVertLevels and maxEdges from here
+
+ implicit none
+
+ integer, intent(in) :: cellStart, cellEnd
+ integer, intent(in) :: cellSolveStart, cellSolveEnd
+ integer, intent(in) :: nCells, nEdges
+ integer, intent(in) :: num_scalars_dummy
+ integer, intent(in) :: index_tke, index_qv
+ integer, intent(in) :: dynamics_substep
+
+ real (kind=RKIND), intent(in) :: config_surface_heat_flux
+ real (kind=RKIND), intent(in) :: config_surface_moisture_flux
+
+ logical, intent(in) :: config_mix_full, mix_scalars
+
+ integer, intent(in) :: les_model_opt
+ integer, intent(in) :: les_surface_opt
+
+ type (MPAS_Clock_type), intent(in) :: clock
+ real (kind=RKIND), intent(in) :: dt
+
+ integer, dimension(nCells+1), intent(in) :: nEdgesOnCell
+ integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell
+
+ integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge
+
+ real (kind=RKIND), intent(in) :: h_theta_eddy_visc4
+ real (kind=RKIND), intent(in) :: v_theta_eddy_visc2
+ real (kind=RKIND), intent(in) :: prandtl_inv
+ real (kind=RKIND), intent(in) :: config_len_disp
+
+ real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign
+ real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2
+ real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4
+ real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw
+ real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu
+ real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzm
+ real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzp
+ real (kind=RKIND), dimension(nVertLevels+1, nCells+1), intent(in) :: zgrid
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: uReconstructZonal, uReconstructMeridional
+
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: t_init
+
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: bv_freq2
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: prandtl_3d_inv
+ real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars
+ real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(inout) :: tend_scalars
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz
+ real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge
+ real (kind=RKIND), dimension(:), intent(in) :: hfx, qfx
+
+ real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(inout) :: tend_theta_euler
+
+ ! storage passed in from calling routine
+ real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_theta
+
+ ! local variables
+ integer :: cell1, cell2, iEdge, iCell, i, k, iScalar
+ real (kind=RKIND) :: r_areaCell, edge_sign, theta_turb_flux, pr_scale
+ real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp
+ real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux, prandtl_1d_inverse
+ real (kind=RKIND), dimension(num_scalars,nVertLevels+1) :: turb_vflux_scalars
+ real (kind=RKIND), dimension(nVertLevels) :: rho_k_at_w, zz_at_w
+
+ real (kind=RKIND) :: moisture_flux, heat_flux, theta_m_flux
+ real (kind=RKIND) :: qv_cell, theta_m_cell, theta_cell
+
+
+ DEBUG_WRITE(' begin scalar_dissipation_3d ')
+ DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_theta_eddy_visc4/))
+
+ if( mix_scalars .and. (dynamics_substep == 1)) call mpas_log_write(' scalar mixing on ')
+
+ !$acc enter data create(turb_vflux_scalars)
+ !$acc enter data create(turb_vflux, prandtl_1d_inverse)
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker
+ do iCell=cellStart,cellEnd
+
+ !$acc loop vector
+ do k = 1, nVertLevels
+ delsq_theta(k,iCell) = 0.0_RKIND
+ tend_theta_euler(k,iCell) = 0.0_RKIND
+ end do
+
+ r_areaCell = invAreaCell(iCell)
+
+ !$acc loop seq
+ do i=1,nEdgesOnCell(iCell)
+ iEdge = edgesOnCell(i,iCell)
+ edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+ pr_scale = prandtl_inv * meshScalingDel2(iEdge)
+ cell1 = cellsOnEdge(1,iEdge)
+ cell2 = cellsOnEdge(2,iEdge)
+
+!DIR$ IVDEP
+ !$acc loop vector
+ do k=1,nVertLevels
+
+! we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below.
+! This is in conservative form.
+
+ theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge)
+ delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
+ theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) * pr_scale
+ tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux
+
+ end do
+ end do
+ end do
+
+ !$acc end parallel
+
+!$OMP BARRIER
+
+ if (h_theta_eddy_visc4 > 0.0) then ! 4th order mixing is active
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker
+ do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before...
+ r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
+
+ !$acc loop seq
+ do i=1,nEdgesOnCell(iCell)
+
+ iEdge = edgesOnCell(i,iCell)
+ edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
+
+ cell1 = cellsOnEdge(1,iEdge)
+ cell2 = cellsOnEdge(2,iEdge)
+
+ !$acc loop vector
+ do k=1,nVertLevels
+ tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
+ end do
+ end do
+ end do
+
+ !$acc end parallel
+
+ end if ! 4th order mixing is active
+
+ if(mix_scalars .and. (dynamics_substep == 1)) then ! dissipation for scalars, including 4th-order filter. Likely needs optimization
+
+ do iScalar=1,num_scalars
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker
+ do iCell=cellStart,cellEnd
+ !$acc loop vector
+ do k = 1, nVertLevels
+ delsq_theta(k,iCell) = 0.0_RKIND
+ end do
+
+ ! tend_theta_euler(1:nVertLevels,iCell) = 0.0
+ r_areaCell = invAreaCell(iCell)
+
+ !$acc loop seq
+ do i=1,nEdgesOnCell(iCell)
+ iEdge = edgesOnCell(i,iCell)
+ edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
+ pr_scale = prandtl_inv * meshScalingDel2(iEdge)
+ cell1 = cellsOnEdge(1,iEdge)
+ cell2 = cellsOnEdge(2,iEdge)
+
+!DIR$ IVDEP
+ !$acc loop vector
+ do k=1,nVertLevels
+
+! we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below.
+! This is in conservative form.
+
+ theta_turb_flux = edge_sign*(scalars(iScalar,k,cell2) - scalars(iScalar,k,cell1))*rho_edge(k,iEdge)
+ delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
+ theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) * pr_scale
+ tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) + theta_turb_flux
+
+ end do
+ end do
+ end do
+
+ !$acc end parallel
+
+!$OMP BARRIER
+
+ if (h_theta_eddy_visc4 > 0.0) then ! 4th order mixing is active
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker
+ do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before...
+
+ r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
+
+ !$acc loop seq
+ do i=1,nEdgesOnCell(iCell)
+
+ iEdge = edgesOnCell(i,iCell)
+ edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
+
+ cell1 = cellsOnEdge(1,iEdge)
+ cell2 = cellsOnEdge(2,iEdge)
+
+ !$acc loop vector
+ do k=1,nVertLevels
+ tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
+ end do
+ end do
+ end do
+
+ !$acc end parallel
+
+ end if ! 4th order mixing is active
+
+ end do ! loop over scalars for horizontal mixing
+
+ end if ! horizontal scalar mixing
+
+
+ ! idealized case vertical mixing. No scalar mixing here.
+
+ if ( v_theta_eddy_visc2 > 0.0 ) then ! vertical mixing for theta_m
+
+ if (config_mix_full) then
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker
+ do iCell = cellSolveStart,cellSolveEnd
+
+ !$acc loop vector
+ do k=2,nVertLevels-1
+ z1 = zgrid(k-1,iCell)
+ z2 = zgrid(k ,iCell)
+ z3 = zgrid(k+1,iCell)
+ z4 = zgrid(k+2,iCell)
+
+ zm = 0.5*(z1+z2)
+ z0 = 0.5*(z2+z3)
+ zp = 0.5*(z3+z4)
+
+ tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
+ (theta_m(k+1,iCell)-theta_m(k ,iCell))/(zp-z0) &
+ -(theta_m(k ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
+ end do
+ end do
+
+ !$acc end parallel
+
+ else ! idealized cases where we mix on the perturbation from the initial 1-D state
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker
+ do iCell = cellSolveStart,cellSolveEnd
+
+ !$acc loop vector
+ do k=2,nVertLevels-1
+ z1 = zgrid(k-1,iCell)
+ z2 = zgrid(k ,iCell)
+ z3 = zgrid(k+1,iCell)
+ z4 = zgrid(k+2,iCell)
+
+ zm = 0.5*(z1+z2)
+ z0 = 0.5*(z2+z3)
+ zp = 0.5*(z3+z4)
+
+ tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
+ ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k ,iCell)-t_init(k,iCell)))/(zp-z0) &
+ -((theta_m(k ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
+ end do
+ end do
+
+ !$acc end parallel
+
+ end if
+
+ end if
+
+ if ( les_model_opt /= LES_MODEL_NONE ) then
+
+ !$acc parallel default(present)
+
+ !$acc loop gang worker private(turb_vflux, turb_vflux_scalars, prandtl_1d_inverse, rho_k_at_w, zz_at_w)
+ do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column
+ ! compute turbulent fluxes
+
+ turb_vflux(nVertlevels+1) = 0. ! no turbulent flux out of the domain
+ turb_vflux(1) = 0. ! lower bc flux handled where ???
+
+ if ( les_model_opt == LES_MODEL_3D_SMAGORINSKY ) then
+ !$acc loop vector
+ do k=2,nVertLevels
+ prandtl_1d_inverse(k) = prandtl_inv
+ end do
+ else ! prognostic_1.5_order, isentropic mixing length
+ ! do k=2,nVertLevels
+ ! delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell))
+ ! delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
+ ! bv_frequency2 = 0.5*(bv_freq2(k,iCell)+bv_freq2(k-1,iCell))
+ ! tke_length = delta_s
+ ! if(bv_frequency2 .gt. 1.e-06) &
+ ! tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/sqrt(bv_frequency2)
+ ! tke_length = min(delta_z,tke_length)
+ ! prandtl_inverse(k) = 1. + 2.*tke_length/delta_z
+ ! end do
+
+ !$acc loop vector
+ do k=2,nVertLevels
+ ! prandtl_1d_inverse(k) = 0.5*(prandtl_3d_inv(k,iCell)+prandtl_3d_inv(k-1,iCell))
+ prandtl_1d_inverse(k) = fzm(k)*prandtl_3d_inv(k,iCell)+fzp(k)*prandtl_3d_inv(k-1,iCell)
+ end do
+
+ end if
+
+ !$acc loop vector
+ do k=2,nVertLevels
+
+ ! delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell))
+ ! delta_s = ((config_len_disp**2)*delta_z)**(1./3.)
+ ! bv_frequency2 = 0.5*(bv_freq2(k)+bv_freq(k-1))
+ ! bv = max( sqrt(abs(bv_frequency2)), epsilon_bv )
+ rho_k_at_w(k) = fzm(k)*rho_zz(k ,iCell)*zz(k ,iCell)*zz(k ,iCell)*eddy_visc_vert(k ,iCell) &
+ +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
+ zz_at_w(k) = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
+ turb_vflux(k) = - prandtl_1d_inverse(k)*rho_k_at_w(k)*zz_at_w(k)*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell))
+ end do
+
+ ! test boundary conditions for supercell and les test cases
+
+ if( les_surface_opt == LES_SURFACE_SPECIFIED .or. les_surface_opt == LES_SURFACE_VARYING ) then
+
+ if( les_surface_opt == LES_SURFACE_SPECIFIED ) then
+ moisture_flux = config_surface_moisture_flux
+ heat_flux = config_surface_heat_flux
+
+! place holder routine for time-varying specified
+! call flux_les_sas( heat_flux, moisture_flux, clock, dt )
+
+ else if ( les_surface_opt == LES_SURFACE_VARYING ) then
+ heat_flux = hfx(iCell)/rho_zz(1,iCell)/cp
+ moisture_flux = qfx(iCell)/rho_zz(1,iCell)
+ endif
+
+ qv_cell = scalars(index_qv,1,iCell)
+ theta_m_cell = theta_m(1,iCell)
+ theta_cell = theta_m_cell/(1.0+(rv/rgas)*qv_cell)
+
+ theta_m_flux = heat_flux*(1.0+(rv/rgas)*qv_cell)+(rv/rgas)*theta_cell*moisture_flux
+ turb_vflux(1) = theta_m_flux*rho_zz(1,iCell)
+ moisture_flux = moisture_flux*rho_zz(1,iCell)
+ turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+
+ else
+
+ turb_vflux(1) = turb_vflux(2)
+ turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels)
+
+ end if
+
+ !$acc loop vector
+ do k=1,nVertLevels
+ tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) &
+ - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k))
+ end do
+
+ if (mix_scalars ) then
+
+ ! compute turbulent fluxes
+ !$acc loop vector
+ do iScalar=1,num_scalars
+ turb_vflux_scalars(iScalar,nVertlevels+1) = 0.0_RKIND ! no turbulent flux out of the domain
+ turb_vflux_scalars(iScalar,1) = 0.0_RKIND ! lower bc flux handled where ???
+ end do
+
+ !$acc loop vector
+ do k=2,nVertLevels
+ rho_k_at_w(k) = fzm(k)*rho_zz(k ,iCell)*zz(k ,iCell)*zz(k ,iCell)*eddy_visc_vert(k ,iCell) &
+ + fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell)
+ zz_at_w(k) = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell)
+ end do
+
+ !$acc loop vector collapse(2)
+ do k=2,nVertLevels
+ do iScalar=1,num_scalars
+ turb_vflux_scalars(iScalar,k) = - prandtl_1d_inverse(k)*rho_k_at_w(k)*zz_at_w(k)*rdzu(k)* &
+ (scalars(iScalar,k,iCell)-scalars(iScalar,k-1,iCell))
+ end do
+ end do
+
+ if( les_surface_opt == LES_SURFACE_SPECIFIED .or. les_surface_opt == LES_SURFACE_VARYING ) turb_vflux_scalars(index_qv,1) = moisture_flux ! lower b.c. for qv
+
+ !$acc loop vector collapse(2)
+ do k=1,nVertLevels
+ do iScalar=1,num_scalars
+ tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) &
+ - rdzw(k)*(turb_vflux_scalars(iScalar,k+1)-turb_vflux_scalars(iScalar,k))
+ end do
+ end do
+
+ end if ! mix scalars
+
+ end do ! loop over cells (columns)
+
+ !$acc end parallel
+
+ end if
+
+ !$acc exit data delete(turb_vflux_scalars)
+ !$acc exit data delete(turb_vflux, prandtl_1d_inverse)
+
+ DEBUG_WRITE(' exiting scalar_dissipation_3d ')
+
+ end subroutine scalar_dissipation_3d_les
+
+!-----------
+
+! subroutine flux_les_sas(heat_flux, moisture_flux, clock, dt)
+
+! implicit none
+
+! real (kind=RKIND), intent(out) :: heat_flux, moisture_flux
+! type (MPAS_Clock_type), intent(in) :: clock
+! real (kind=RKIND), intent(in) :: dt
+
+! real (kind=RKIND), parameter:: t_start_t_flux = 3600.*6.0
+! real (kind=RKIND), parameter:: t_end_t_flux = 3600.*19.50
+! real (kind=RKIND), parameter:: t_start_q_flux = 3600.*7.0
+! real (kind=RKIND), parameter:: t_end_q_flux = 3600.*19.50
+! real (kind=RKIND) :: rel_time_t_flux, rel_time_q_flux
+! real (kind=RKIND) :: time_of_day_seconds
+! type (MPAS_Time_type) :: currTime
+! integer :: H, M, S, S_n, S_d
+! integer :: ierr
+
+! currTime = mpas_get_clock_time(clock, MPAS_NOW, ierr)
+! call mpas_get_time(curr_time=currTime, H=H, M=M, S=S, S_n=S_n, S_d=S_d)
+! time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*dt
+! call mpas_log_write(' les integration, timestep midpoint time of day in seconds, $r ', realArgs=(/time_of_day_seconds/))
+
+! rel_time_t_flux = max(0.,(time_of_day_seconds - t_start_t_flux)/(t_end_t_flux - t_start_t_flux))
+! rel_time_q_flux = max(0.,(time_of_day_seconds - t_start_q_flux)/(t_end_q_flux - t_start_q_flux))
+
+! heat_flux = max(0., 0.1*sin(pii*rel_time_t_flux))
+! moisture_flux = max(0., 0.15*sin(pii*rel_time_q_flux))/1000.
+
+! end subroutine flux_les_sas
+
+end module mpas_atm_dissipation_models
diff --git a/src/core_atmosphere/dynamics/mpas_atm_iau.F b/src/core_atmosphere/dynamics/mpas_atm_iau.F
index 654fd3ae82..7459de89b4 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_iau.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_iau.F
@@ -5,6 +5,15 @@
! Additional copyright and license information can be found in the LICENSE file
! distributed with this code, or at http://mpas-dev.github.com/license.html
!
+
+#ifdef MPAS_OPENACC
+#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
+#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
+#else
+#define MPAS_ACC_TIMER_START(X)
+#define MPAS_ACC_TIMER_STOP(X)
+#endif
+
module mpas_atm_iau
use mpas_derived_types
@@ -13,9 +22,10 @@ module mpas_atm_iau
use mpas_dmpar
use mpas_constants
use mpas_log, only : mpas_log_write
+ use mpas_timer
+
+ !public :: atm_compute_iau_coef, atm_add_tend_anal_incr
- !public :: atm_compute_iau_coef, atm_add_tend_anal_incr
-
contains
!==================================================================================================
@@ -76,6 +86,39 @@ real (kind=RKIND) function atm_iau_coef(configs, itimestep, dt) result(wgt_iau)
end if
end function atm_iau_coef
+
+!==================================================================================================
+ subroutine update_d2h_pre_add_tend_anal_incr(configs,structs)
+!==================================================================================================
+
+ implicit none
+
+ type (mpas_pool_type), intent(in) :: configs
+ type (mpas_pool_type), intent(inout) :: structs
+
+ type (mpas_pool_type), pointer :: tend
+ type (mpas_pool_type), pointer :: state
+ type (mpas_pool_type), pointer :: diag
+
+ real (kind=RKIND), dimension(:,:), pointer :: rho_edge, rho_zz, theta_m
+ real(kind=RKIND),dimension(:,:,:), pointer :: scalars, tend_scalars
+
+ call mpas_pool_get_subpool(structs, 'tend', tend)
+ call mpas_pool_get_subpool(structs, 'state', state)
+ call mpas_pool_get_subpool(structs, 'diag', diag)
+
+ MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
+ call mpas_pool_get_array(state, 'theta_m', theta_m, 1)
+ call mpas_pool_get_array(state, 'scalars', scalars, 1)
+ call mpas_pool_get_array(state, 'rho_zz', rho_zz, 2)
+ call mpas_pool_get_array(diag , 'rho_edge', rho_edge)
+ !$acc update self(theta_m, scalars, rho_zz, rho_edge)
+
+ call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars)
+ !$acc update self(tend_scalars)
+ MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
+
+ end subroutine update_d2h_pre_add_tend_anal_incr
!==================================================================================================
subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, tend_rtheta, tend_rho)
diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
index 4fe2faefc4..786f3a3416 100644
--- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
+++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F
@@ -16,7 +16,6 @@
module atm_time_integration
- use mpas_derived_types
use mpas_pool_routines
use mpas_kind_types
use mpas_constants
@@ -29,6 +28,7 @@ module atm_time_integration
#ifdef DO_PHYSICS
use mpas_atmphys_driver_microphysics
+ use mpas_atmphys_interface, only: update_d2h_pre_microphysics, update_h2d_post_microphysics
use mpas_atmphys_todynamics
use mpas_atmphys_utilities
#endif
@@ -36,18 +36,20 @@ module atm_time_integration
use mpas_atm_boundaries, only : nSpecZone, nRelaxZone, nBdyZone, mpas_atm_get_bdy_state, mpas_atm_get_bdy_tend ! regional_MPAS addition
use mpas_atm_iau
+ use mpas_atm_dissipation_models
!
! Abstract interface for routine used to communicate halos of fields
! in a named group
!
abstract interface
- subroutine halo_exchange_routine(domain, halo_group, ierr)
+ subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr)
use mpas_derived_types, only : domain_type
type (domain_type), intent(inout) :: domain
character(len=*), intent(in) :: halo_group
+ logical, intent(in), optional :: withGPUAwareMPI
integer, intent(out), optional :: ierr
end subroutine halo_exchange_routine
@@ -85,6 +87,7 @@ end subroutine halo_exchange_routine
!$acc declare create(s_max_arr, s_min_arr)
!$acc declare create(flux_array, flux_upwind_tmp_arr)
!$acc declare create(flux_tmp_arr, wdtn_arr)
+ !$acc declare create(rho_zz_int)
real (kind=RKIND), dimension(:,:), allocatable :: ru_driving_tend ! regional_MPAS addition
real (kind=RKIND), dimension(:,:), allocatable :: rt_driving_tend ! regional_MPAS addition
@@ -272,6 +275,13 @@ subroutine mpas_atm_dynamics_init(domain)
real (kind=RKIND), dimension(:), pointer :: angleEdge
real (kind=RKIND), dimension(:), pointer :: meshScalingDel2
real (kind=RKIND), dimension(:), pointer :: meshScalingDel4
+ real (kind=RKIND), dimension(:), pointer :: u_init, v_init, qv_init
+ real (kind=RKIND), dimension(:,:), pointer :: t_init
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s2
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_cs
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s
#endif
#ifdef MPAS_CAM_DYCORE
@@ -292,6 +302,7 @@ subroutine mpas_atm_dynamics_init(domain)
nullify(mesh)
call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh)
+ MPAS_ACC_TIMER_START('mpas_dynamics_init [ACC_data_xfer]')
call mpas_pool_get_array(mesh, 'dvEdge', dvEdge)
!$acc enter data copyin(dvEdge)
@@ -456,9 +467,971 @@ subroutine mpas_atm_dynamics_init(domain)
call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4)
!$acc enter data copyin(meshScalingDel4)
+
+ call mpas_pool_get_array(mesh, 'u_init', u_init)
+ !$acc enter data copyin(u_init)
+ call mpas_pool_get_array(mesh, 'v_init', v_init)
+ !$acc enter data copyin(v_init)
+ call mpas_pool_get_array(mesh, 't_init', t_init)
+ !$acc enter data copyin(t_init)
+ call mpas_pool_get_array(mesh, 'qv_init', qv_init)
+ !$acc enter data copyin(qv_init)
+
+ call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2)
+ !$acc enter data copyin(deformation_coef_c2)
+
+ call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2)
+ !$acc enter data copyin(deformation_coef_s2)
+
+ call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs)
+ !$acc enter data copyin(deformation_coef_cs)
+
+ call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c)
+ !$acc enter data copyin(deformation_coef_c)
+
+ call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s)
+ !$acc enter data copyin(deformation_coef_s)
+
+ MPAS_ACC_TIMER_STOP('mpas_dynamics_init [ACC_data_xfer]')
+#endif
+
+ end subroutine mpas_atm_dynamics_init
+
+ subroutine mpas_atm_pre_computesolvediag_h2d(block)
+
+ implicit none
+
+ type (block_type), intent(inout) :: block
+
+
+#ifdef MPAS_OPENACC
+ type (mpas_pool_type), pointer :: mesh
+ type (mpas_pool_type), pointer :: diag
+ type (mpas_pool_type), pointer :: state
+ type (mpas_pool_type), pointer :: tend_physics
+ real (kind=RKIND), dimension(:,:), pointer :: rthdynten
+
+ real (kind=RKIND), dimension(:,:), pointer :: h_edge, v, vorticity, ke, pv_edge, &
+ pv_vertex, pv_cell, gradPVn, gradPVt, divergence
+ real (kind=RKIND), dimension(:,:), pointer :: u, h
+
+ real (kind=RKIND), dimension(:,:), pointer :: zz
+ real (kind=RKIND), dimension(:,:,:), pointer :: zb_cell
+ real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell
+ real (kind=RKIND), dimension(:), pointer :: fzm
+ real (kind=RKIND), dimension(:), pointer :: fzp
+ real (kind=RKIND), dimension(:,:,:), pointer :: zb
+ real (kind=RKIND), dimension(:,:,:), pointer :: zb3
+
+
+ real (kind=RKIND), dimension(:), pointer :: dvEdge
+ integer, dimension(:,:), pointer :: cellsOnCell
+ integer, dimension(:,:), pointer :: cellsOnEdge
+ integer, dimension(:,:), pointer :: advCellsForEdge
+ integer, dimension(:,:), pointer :: edgesOnCell
+ integer, dimension(:), pointer :: nAdvCellsForEdge
+ integer, dimension(:), pointer :: nEdgesOnCell
+ real (kind=RKIND), dimension(:,:), pointer :: adv_coefs
+ real (kind=RKIND), dimension(:,:), pointer :: adv_coefs_3rd
+ real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign
+ real (kind=RKIND), dimension(:), pointer :: invAreaCell
+ integer, dimension(:), pointer :: bdyMaskCell
+ integer, dimension(:), pointer :: bdyMaskEdge
+ real (kind=RKIND), dimension(:), pointer :: specZoneMaskEdge
+ real (kind=RKIND), dimension(:), pointer :: invDvEdge
+ real (kind=RKIND), dimension(:), pointer :: dcEdge
+ real (kind=RKIND), dimension(:), pointer :: invDcEdge
+ integer, dimension(:,:), pointer :: edgesOnEdge
+ integer, dimension(:,:), pointer :: edgesOnVertex
+ real (kind=RKIND), dimension(:,:), pointer :: edgesOnVertex_sign
+ integer, dimension(:), pointer :: nEdgesOnEdge
+ real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge
+ integer, dimension(:,:), pointer :: cellsOnVertex
+ integer, dimension(:,:), pointer :: verticesOnCell
+ integer, dimension(:,:), pointer :: verticesOnEdge
+ real (kind=RKIND), dimension(:), pointer :: invAreaTriangle
+ integer, dimension(:,:), pointer :: kiteForCell
+ real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex
+ real (kind=RKIND), dimension(:), pointer :: fEdge
+ real (kind=RKIND), dimension(:), pointer :: fVertex
+
+ nullify(mesh)
+ call mpas_pool_get_subpool(block % structs, 'mesh', mesh)
+ nullify(state)
+ call mpas_pool_get_subpool(block % structs, 'state', state)
+ nullify(diag)
+ call mpas_pool_get_subpool(block % structs, 'diag', diag)
+
+ MPAS_ACC_TIMER_START('first_compute_solve_diagnostics [ACC_data_xfer]')
+ call mpas_pool_get_array(state, 'rho_zz', h, 1)
+ !$acc enter data create(h)
+ call mpas_pool_get_array(state, 'u', u, 1)
+ !$acc enter data copyin(u)
+
+ call mpas_pool_get_array(diag, 'v', v)
+ !$acc enter data copyin(v)
+ call mpas_pool_get_array(diag, 'rho_edge', h_edge)
+ !$acc enter data copyin(h_edge)
+ call mpas_pool_get_array(diag, 'vorticity', vorticity)
+ !$acc enter data copyin(vorticity)
+ call mpas_pool_get_array(diag, 'divergence', divergence)
+ !$acc enter data copyin(divergence)
+ call mpas_pool_get_array(diag, 'ke', ke)
+ !$acc enter data copyin(ke)
+ call mpas_pool_get_array(diag, 'pv_edge', pv_edge)
+ !$acc enter data copyin(pv_edge)
+ call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex)
+ !$acc enter data copyin(pv_vertex)
+ call mpas_pool_get_array(diag, 'pv_cell', pv_cell)
+ !$acc enter data copyin(pv_cell)
+ call mpas_pool_get_array(diag, 'gradPVn', gradPVn)
+ !$acc enter data copyin(gradPVn)
+ call mpas_pool_get_array(diag, 'gradPVt', gradPVt)
+ !$acc enter data copyin(gradPVt)
+
+ ! Required by atm_init_coupled_diagnostics
+ call mpas_pool_get_array(mesh, 'zz', zz)
+ !$acc enter data copyin(zz)
+
+ call mpas_pool_get_array(mesh, 'zb_cell', zb_cell)
+ !$acc enter data copyin(zb_cell)
+
+ call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell)
+ !$acc enter data copyin(zb3_cell)
+
+ call mpas_pool_get_array(mesh, 'fzm', fzm)
+ !$acc enter data copyin(fzm)
+
+ call mpas_pool_get_array(mesh, 'fzp', fzp)
+ !$acc enter data copyin(fzp)
+
+ call mpas_pool_get_array(mesh, 'zb', zb)
+ !$acc enter data copyin(zb)
+
+ call mpas_pool_get_array(mesh, 'zb3', zb3)
+ !$acc enter data copyin(zb3)
+
+ ! Required by atm_compute_solve_diagnostics
+ call mpas_pool_get_array(mesh, 'dvEdge', dvEdge)
+ !$acc enter data copyin(dvEdge)
+
+ call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)
+ !$acc enter data copyin(cellsOnEdge)
+
+ call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell)
+ !$acc enter data copyin(edgesOnCell)
+
+ call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell)
+ !$acc enter data copyin(nEdgesOnCell)
+
+ call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign)
+ !$acc enter data copyin(edgesOnCell_sign)
+
+ call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell)
+ !$acc enter data copyin(invAreaCell)
+
+ call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge)
+ !$acc enter data copyin(invDvEdge)
+
+ call mpas_pool_get_array(mesh, 'dcEdge', dcEdge)
+ !$acc enter data copyin(dcEdge)
+
+ call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge)
+ !$acc enter data copyin(invDcEdge)
+
+ call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge)
+ !$acc enter data copyin(edgesOnEdge)
+
+ call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex)
+ !$acc enter data copyin(edgesOnVertex)
+
+ call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign)
+ !$acc enter data copyin(edgesOnVertex_sign)
+
+ call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge)
+ !$acc enter data copyin(nEdgesOnEdge)
+
+ call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
+ !$acc enter data copyin(weightsOnEdge)
+
+ call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell)
+ !$acc enter data copyin(verticesOnCell)
+
+ call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge)
+ !$acc enter data copyin(verticesOnEdge)
+
+ call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle)
+ !$acc enter data copyin(invAreaTriangle)
+
+ call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell)
+ !$acc enter data copyin(kiteForCell)
+
+ call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex)
+ !$acc enter data copyin(kiteAreasOnVertex)
+
+ call mpas_pool_get_array(mesh, 'fVertex', fVertex)
+ !$acc enter data copyin(fVertex)
+
+ MPAS_ACC_TIMER_STOP('first_compute_solve_diagnostics [ACC_data_xfer]')
+#endif
+
+ end subroutine mpas_atm_pre_computesolvediag_h2d
+
+
+ subroutine mpas_atm_post_computesolvediag_d2h(block)
+
+ implicit none
+
+ type (block_type), intent(inout) :: block
+
+
+#ifdef MPAS_OPENACC
+ type (mpas_pool_type), pointer :: mesh
+ type (mpas_pool_type), pointer :: diag
+ type (mpas_pool_type), pointer :: state
+ type (mpas_pool_type), pointer :: tend_physics
+ real (kind=RKIND), dimension(:,:), pointer :: rthdynten
+
+ real (kind=RKIND), dimension(:,:), pointer :: h_edge, v, vorticity, ke, pv_edge, &
+ pv_vertex, pv_cell, gradPVn, gradPVt, divergence
+ real (kind=RKIND), dimension(:,:), pointer :: u, h
+
+ real (kind=RKIND), dimension(:,:), pointer :: zz
+ real (kind=RKIND), dimension(:,:,:), pointer :: zb_cell
+ real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell
+ real (kind=RKIND), dimension(:), pointer :: fzm
+ real (kind=RKIND), dimension(:), pointer :: fzp
+ real (kind=RKIND), dimension(:,:,:), pointer :: zb
+ real (kind=RKIND), dimension(:,:,:), pointer :: zb3
+
+
+ real (kind=RKIND), dimension(:), pointer :: dvEdge
+ integer, dimension(:,:), pointer :: cellsOnCell
+ integer, dimension(:,:), pointer :: cellsOnEdge
+ integer, dimension(:,:), pointer :: advCellsForEdge
+ integer, dimension(:,:), pointer :: edgesOnCell
+ integer, dimension(:), pointer :: nAdvCellsForEdge
+ integer, dimension(:), pointer :: nEdgesOnCell
+ real (kind=RKIND), dimension(:,:), pointer :: adv_coefs
+ real (kind=RKIND), dimension(:,:), pointer :: adv_coefs_3rd
+ real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign
+ real (kind=RKIND), dimension(:), pointer :: invAreaCell
+ integer, dimension(:), pointer :: bdyMaskCell
+ integer, dimension(:), pointer :: bdyMaskEdge
+ real (kind=RKIND), dimension(:), pointer :: specZoneMaskEdge
+ real (kind=RKIND), dimension(:), pointer :: invDvEdge
+ real (kind=RKIND), dimension(:), pointer :: dcEdge
+ real (kind=RKIND), dimension(:), pointer :: invDcEdge
+ integer, dimension(:,:), pointer :: edgesOnEdge
+ integer, dimension(:,:), pointer :: edgesOnVertex
+ real (kind=RKIND), dimension(:,:), pointer :: edgesOnVertex_sign
+ integer, dimension(:), pointer :: nEdgesOnEdge
+ real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge
+ integer, dimension(:,:), pointer :: cellsOnVertex
+ integer, dimension(:,:), pointer :: verticesOnCell
+ integer, dimension(:,:), pointer :: verticesOnEdge
+ real (kind=RKIND), dimension(:), pointer :: invAreaTriangle
+ integer, dimension(:,:), pointer :: kiteForCell
+ real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex
+ real (kind=RKIND), dimension(:), pointer :: fEdge
+ real (kind=RKIND), dimension(:), pointer :: fVertex
+
+ nullify(mesh)
+ call mpas_pool_get_subpool(block % structs, 'mesh', mesh)
+ nullify(state)
+ call mpas_pool_get_subpool(block % structs, 'state', state)
+ nullify(diag)
+ call mpas_pool_get_subpool(block % structs, 'diag', diag)
+
+ MPAS_ACC_TIMER_START('first_compute_solve_diagnostics [ACC_data_xfer]')
+
+ call mpas_pool_get_array(state, 'rho_zz', h, 1)
+ !$acc exit data copyout(h)
+ call mpas_pool_get_array(state, 'u', u, 1)
+ !$acc exit data copyout(u)
+
+ call mpas_pool_get_array(diag, 'v', v)
+ !$acc exit data copyout(v)
+ call mpas_pool_get_array(diag, 'rho_edge', h_edge)
+ !$acc exit data copyout(h_edge)
+ call mpas_pool_get_array(diag, 'vorticity', vorticity)
+ !$acc exit data copyout(vorticity)
+ call mpas_pool_get_array(diag, 'divergence', divergence)
+ !$acc exit data copyout(divergence)
+ call mpas_pool_get_array(diag, 'ke', ke)
+ !$acc exit data copyout(ke)
+ call mpas_pool_get_array(diag, 'pv_edge', pv_edge)
+ !$acc exit data copyout(pv_edge)
+ call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex)
+ !$acc exit data copyout(pv_vertex)
+ call mpas_pool_get_array(diag, 'pv_cell', pv_cell)
+ !$acc exit data copyout(pv_cell)
+ call mpas_pool_get_array(diag, 'gradPVn', gradPVn)
+ !$acc exit data copyout(gradPVn)
+ call mpas_pool_get_array(diag, 'gradPVt', gradPVt)
+ !$acc exit data copyout(gradPVt)
+
+ ! Required by atm_init_coupled_diagnostics
+ call mpas_pool_get_array(mesh, 'zz', zz)
+ !$acc exit data delete(zz)
+
+ call mpas_pool_get_array(mesh, 'zb_cell', zb_cell)
+ !$acc exit data delete(zb_cell)
+
+ call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell)
+ !$acc exit data delete(zb3_cell)
+
+ call mpas_pool_get_array(mesh, 'fzm', fzm)
+ !$acc exit data delete(fzm)
+
+ call mpas_pool_get_array(mesh, 'fzp', fzp)
+ !$acc exit data delete(fzp)
+
+ call mpas_pool_get_array(mesh, 'zb', zb)
+ !$acc exit data delete(zb)
+
+ call mpas_pool_get_array(mesh, 'zb3', zb3)
+ !$acc exit data delete(zb3)
+
+
+ call mpas_pool_get_array(mesh, 'dvEdge', dvEdge)
+ !$acc exit data delete(dvEdge)
+
+ call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)
+ !$acc exit data delete(cellsOnEdge)
+
+ call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell)
+ !$acc exit data delete(edgesOnCell)
+
+ call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell)
+ !$acc exit data delete(nEdgesOnCell)
+
+ call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign)
+ !$acc exit data delete(edgesOnCell_sign)
+
+ call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell)
+ !$acc exit data delete(invAreaCell)
+
+ call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge)
+ !$acc exit data delete(invDvEdge)
+
+ call mpas_pool_get_array(mesh, 'dcEdge', dcEdge)
+ !$acc exit data delete(dcEdge)
+
+ call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge)
+ !$acc exit data delete(invDcEdge)
+
+ call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge)
+ !$acc exit data delete(edgesOnEdge)
+
+ call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex)
+ !$acc exit data delete(edgesOnVertex)
+
+ call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign)
+ !$acc exit data delete(edgesOnVertex_sign)
+
+ call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge)
+ !$acc exit data delete(nEdgesOnEdge)
+
+ call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
+ !$acc exit data delete(weightsOnEdge)
+
+ call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell)
+ !$acc exit data delete(verticesOnCell)
+
+ call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge)
+ !$acc exit data delete(verticesOnEdge)
+
+ call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle)
+ !$acc exit data delete(invAreaTriangle)
+
+ call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell)
+ !$acc exit data delete(kiteForCell)
+
+ call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex)
+ !$acc exit data delete(kiteAreasOnVertex)
+
+ call mpas_pool_get_array(mesh, 'fVertex', fVertex)
+ !$acc exit data delete(fVertex)
+
+ MPAS_ACC_TIMER_STOP('first_compute_solve_diagnostics [ACC_data_xfer]')
+#endif
+
+ end subroutine mpas_atm_post_computesolvediag_d2h
+
+ subroutine mpas_atm_pre_dynamics_h2d(domain)
+
+ implicit none
+
+ type (domain_type), intent(inout) :: domain
+
+
+#ifdef MPAS_OPENACC
+ type (mpas_pool_type), pointer :: mesh
+ type (mpas_pool_type), pointer :: state
+ type (mpas_pool_type), pointer :: diag
+ type (mpas_pool_type), pointer :: tend
+ type (mpas_pool_type), pointer :: tend_physics
+ type (mpas_pool_type), pointer :: lbc
+
+ logical, pointer :: config_apply_lbcs_ptr
+ logical :: config_apply_lbcs
+
+ real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p
+ real (kind=RKIND), dimension(:,:), pointer :: ru_save
+ real (kind=RKIND), dimension(:,:), pointer :: rw, rw_p
+ real (kind=RKIND), dimension(:,:), pointer :: rw_save
+ real (kind=RKIND), dimension(:,:), pointer :: rtheta_p
+ real (kind=RKIND), dimension(:,:), pointer :: exner, exner_base
+ real (kind=RKIND), dimension(:,:), pointer :: rtheta_base, rho_base
+ real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save
+ real (kind=RKIND), dimension(:,:), pointer :: rho_p, rho_pp, rho, theta, theta_base
+ real (kind=RKIND), dimension(:,:), pointer :: rho_p_save
+ real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split
+ real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp
+ real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, pressure, v
+ real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke
+ real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri
+ real (kind=RKIND), dimension(:), pointer :: cofrz
+ real (kind=RKIND), dimension(:,:), pointer :: gradPVn, gradPVt
+
+
+ real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2
+ real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2
+ real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2
+ real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2
+ real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2
+ real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split
+
+ integer, pointer :: nCells_ptr
+ integer :: nCells
+ real (kind=RKIND), dimension(:,:), pointer :: uReconstructZonal, uReconstructMeridional, uReconstructX, uReconstructY, uReconstructZ
+
+ real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend
+ real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler
+ real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy
+ real(kind=RKIND), dimension(:,:,:), pointer :: scalar_tend_save
+
+ real (kind=RKIND), dimension(:,:), pointer :: rthdynten, divergence, vorticity
+
+ real (kind=RKIND), dimension(:,:), pointer :: lbc_u, lbc_w, lbc_ru, lbc_rho_edge, lbc_rho, lbc_rtheta_m, lbc_rho_zz, lbc_theta
+ real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_u, lbc_tend_w, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_rho
+ real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_rtheta_m, lbc_tend_rho_zz, lbc_tend_theta
+
+ real (kind=RKIND), dimension(:,:,:), pointer :: lbc_scalars, lbc_tend_scalars
+
+ nullify(mesh)
+ nullify(state)
+ nullify(diag)
+ nullify(tend)
+ nullify(tend_physics)
+ nullify(lbc)
+ call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh)
+ call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state)
+ call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag)
+ call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend)
+ call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics)
+ call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc)
+
+ call mpas_pool_get_config(domain % blocklist % configs, 'config_apply_lbcs', config_apply_lbcs_ptr)
+ config_apply_lbcs = config_apply_lbcs_ptr
+
+ MPAS_ACC_TIMER_START('atm_srk3 [ACC_data_xfer]')
+ call mpas_pool_get_array(diag, 'ru', ru)
+ !$acc enter data copyin(ru) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'ru_p', ru_p)
+ !$acc enter data copyin(ru_p)
+ call mpas_pool_get_array(diag, 'ru_save', ru_save)
+ !$acc enter data copyin(ru_save)
+ call mpas_pool_get_array(diag, 'rw', rw)
+ !$acc enter data copyin(rw) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'rw_p', rw_p)
+ !$acc enter data copyin(rw_p)
+ call mpas_pool_get_array(diag, 'rw_save', rw_save)
+ !$acc enter data copyin(rw_save)
+ call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
+ !$acc enter data copyin(rtheta_p) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save)
+ !$acc enter data copyin(rtheta_p_save)
+ call mpas_pool_get_array(diag, 'exner', exner)
+ !$acc enter data copyin(exner) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'exner_base', exner_base)
+ !$acc enter data copyin(exner_base) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base)
+ !$acc enter data copyin(rtheta_base) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'rho_base', rho_base)
+ !$acc enter data copyin(rho_base)
+ call mpas_pool_get_array(diag, 'rho', rho)
+ !$acc enter data copyin(rho)
+ call mpas_pool_get_array(diag, 'theta', theta)
+ !$acc enter data copyin(theta)
+ call mpas_pool_get_array(diag, 'theta_base', theta_base)
+ !$acc enter data copyin(theta_base)
+ call mpas_pool_get_array(diag, 'rho_p', rho_p)
+ !$acc enter data copyin(rho_p) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save)
+ !$acc enter data copyin(rho_p_save)
+ call mpas_pool_get_array(diag, 'rho_pp', rho_pp)
+ !$acc enter data copyin(rho_pp)
+ call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split)
+ !$acc enter data copyin(rho_zz_old_split)
+ call mpas_pool_get_array(diag, 'cqw', cqw)
+ !$acc enter data copyin(cqw)
+ call mpas_pool_get_array(diag, 'cqu', cqu)
+ !$acc enter data copyin(cqu)
+ call mpas_pool_get_array(diag, 'pressure_p', pressure_p)
+ !$acc enter data copyin(pressure_p) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'pressure_base', pressure_base)
+ !$acc enter data copyin(pressure_base) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'pressure', pressure)
+ !$acc enter data copyin(pressure)
+ call mpas_pool_get_array(diag, 'v', v)
+ !$acc enter data copyin(v) ! use values from atm_compute_solve_diagnostics
+ call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp)
+ !$acc enter data copyin(rtheta_pp)
+ call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old)
+ !$acc enter data copyin(rtheta_pp_old)
+ call mpas_pool_get_array(diag, 'kdiff', kdiff)
+ !$acc enter data copyin(kdiff)
+ call mpas_pool_get_array(diag, 'pv_edge', pv_edge)
+ !$acc enter data copyin(pv_edge) ! use values from atm_compute_solve_diagnostics
+ call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex)
+ !$acc enter data copyin(pv_vertex)
+ call mpas_pool_get_array(diag, 'pv_cell', pv_cell)
+ !$acc enter data copyin(pv_cell)
+ call mpas_pool_get_array(diag, 'rho_edge', rho_edge)
+ !$acc enter data copyin(rho_edge) ! use values from atm_compute_solve_diagnostics
+ call mpas_pool_get_array(diag, 'h_divergence', h_divergence)
+ !$acc enter data copyin(h_divergence)
+ call mpas_pool_get_array(diag, 'ke', ke)
+ !$acc enter data copyin(ke) ! use values from atm_compute_solve_diagnostics
+ call mpas_pool_get_array(diag, 'gradPVn', gradPVn)
+ !$acc enter data copyin(gradPVn)
+ call mpas_pool_get_array(diag, 'gradPVt', gradPVt)
+ !$acc enter data copyin(gradPVt)
+
+ call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri)
+ !$acc enter data copyin(alpha_tri)
+ call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri)
+ !$acc enter data copyin(gamma_tri)
+ call mpas_pool_get_array(diag, 'a_tri', a_tri)
+ !$acc enter data copyin(a_tri)
+ call mpas_pool_get_array(diag, 'cofwr', cofwr)
+ !$acc enter data copyin(cofwr)
+ call mpas_pool_get_array(diag, 'cofwz', cofwz)
+ !$acc enter data copyin(cofwz)
+ call mpas_pool_get_array(diag, 'coftz', coftz)
+ !$acc enter data copyin(coftz)
+ call mpas_pool_get_array(diag, 'cofwt', cofwt)
+ !$acc enter data copyin(cofwt)
+ call mpas_pool_get_array(diag, 'cofrz', cofrz)
+ !$acc enter data copyin(cofrz)
+ call mpas_pool_get_array(diag, 'vorticity', vorticity)
+ !$acc enter data copyin(vorticity)
+ call mpas_pool_get_array(diag, 'divergence', divergence)
+ !$acc enter data copyin(divergence)
+ call mpas_pool_get_array(diag, 'ruAvg', ruAvg)
+ !$acc enter data copyin(ruAvg)
+ call mpas_pool_get_array(diag, 'ruAvg_split', ruAvg_split)
+ !$acc enter data copyin(ruAvg_split)
+ call mpas_pool_get_array(diag, 'wwAvg', wwAvg)
+ !$acc enter data copyin(wwAvg)
+ call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split)
+ !$acc enter data copyin(wwAvg_split)
+
+ call mpas_pool_get_dimension(mesh, 'nCellsSolve', nCells_ptr)
+ nCells = nCells_ptr
+ call mpas_pool_get_array(diag, 'uReconstructX', uReconstructX)
+ !$acc enter data create(uReconstructX(:,1:nCells))
+ call mpas_pool_get_array(diag, 'uReconstructY', uReconstructY)
+ !$acc enter data create(uReconstructY(:,1:nCells))
+ call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ)
+ !$acc enter data create(uReconstructZ(:,1:nCells))
+ call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal)
+ !$acc enter data copyin(uReconstructZonal(:,1:nCells))
+ call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional)
+ !$acc enter data copyin(uReconstructMeridional(:,1:nCells))
+
+ call mpas_pool_get_array(state, 'u', u_1, 1)
+ !$acc enter data copyin(u_1)
+ call mpas_pool_get_array(state, 'u', u_2, 2)
+ !$acc enter data copyin(u_2)
+ call mpas_pool_get_array(state, 'w', w_1, 1)
+ !$acc enter data copyin(w_1)
+ call mpas_pool_get_array(state, 'w', w_2, 2)
+ !$acc enter data copyin(w_2)
+ call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1)
+ !$acc enter data copyin(theta_m_1) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2)
+ !$acc enter data copyin(theta_m_2)
+ call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1)
+ !$acc enter data copyin(rho_zz_1)
+ call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2)
+ !$acc enter data copyin(rho_zz_2)
+ call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
+ !$acc enter data copyin(scalars_1)
+ call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+ !$acc enter data copyin(scalars_2)
+
+
+ call mpas_pool_get_array(tend, 'u', tend_ru)
+ !$acc enter data copyin(tend_ru)
+ call mpas_pool_get_array(tend, 'rho_zz', tend_rho)
+ !$acc enter data copyin(tend_rho)
+ call mpas_pool_get_array(tend, 'theta_m', tend_rt)
+ !$acc enter data copyin(tend_rt)
+ call mpas_pool_get_array(tend, 'w', tend_rw)
+ !$acc enter data copyin(tend_rw)
+ call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend)
+ !$acc enter data copyin(rt_diabatic_tend)
+ call mpas_pool_get_array(tend, 'u_euler', tend_u_euler)
+ !$acc enter data copyin(tend_u_euler)
+ call mpas_pool_get_array(tend, 'theta_euler', tend_theta_euler)
+ !$acc enter data copyin(tend_theta_euler)
+ call mpas_pool_get_array(tend, 'w_euler', tend_w_euler)
+ !$acc enter data copyin(tend_w_euler)
+ call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf)
+ !$acc enter data copyin(tend_w_pgf)
+ call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy)
+ !$acc enter data copyin(tend_w_buoy)
+ call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save)
+ !$acc enter data copyin(scalar_tend_save)
+
+
+ if(config_apply_lbcs) then
+ call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2)
+ !$acc enter data copyin(lbc_u)
+ call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2)
+ !$acc enter data copyin(lbc_w)
+ call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2)
+ !$acc enter data copyin(lbc_ru)
+ call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2)
+ !$acc enter data copyin(lbc_rho_edge)
+ call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2)
+ !$acc enter data copyin(lbc_theta)
+ call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2)
+ !$acc enter data copyin(lbc_rtheta_m)
+ call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2)
+ !$acc enter data copyin(lbc_rho_zz)
+ call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2)
+ !$acc enter data copyin(lbc_rho)
+ call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2)
+ !$acc enter data copyin(lbc_scalars)
+
+
+ call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1)
+ !$acc enter data copyin(lbc_tend_u)
+ call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1)
+ !$acc enter data copyin(lbc_tend_ru)
+ call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1)
+ !$acc enter data copyin(lbc_tend_rho_edge)
+ call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1)
+ !$acc enter data copyin(lbc_tend_w)
+ call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1)
+ !$acc enter data copyin(lbc_tend_theta)
+ call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1)
+ !$acc enter data copyin(lbc_tend_rtheta_m)
+ call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1)
+ !$acc enter data copyin(lbc_tend_rho_zz)
+ call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1)
+ !$acc enter data copyin(lbc_tend_rho)
+ call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1)
+ !$acc enter data copyin(lbc_tend_scalars)
+ end if
+
+ call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
+ !$acc enter data copyin(rthdynten)
+
+ MPAS_ACC_TIMER_STOP('atm_srk3 [ACC_data_xfer]')
+#endif
+
+ end subroutine mpas_atm_pre_dynamics_h2d
+
+
+ subroutine mpas_atm_post_dynamics_d2h(domain)
+
+ implicit none
+
+ type (domain_type), intent(inout) :: domain
+
+
+#ifdef MPAS_OPENACC
+ type (mpas_pool_type), pointer :: mesh
+ type (mpas_pool_type), pointer :: state
+ type (mpas_pool_type), pointer :: diag
+ type (mpas_pool_type), pointer :: tend
+ type (mpas_pool_type), pointer :: tend_physics
+ type (mpas_pool_type), pointer :: lbc
+
+ logical, pointer :: config_apply_lbcs_ptr
+ logical :: config_apply_lbcs
+
+ real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p
+ real (kind=RKIND), dimension(:,:), pointer :: ru_save
+ real (kind=RKIND), dimension(:,:), pointer :: rw, rw_p
+ real (kind=RKIND), dimension(:,:), pointer :: rw_save
+ real (kind=RKIND), dimension(:,:), pointer :: rtheta_p
+ real (kind=RKIND), dimension(:,:), pointer :: exner, exner_base
+ real (kind=RKIND), dimension(:,:), pointer :: rtheta_base, rho_base
+ real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save
+ real (kind=RKIND), dimension(:,:), pointer :: rho_p, rho_pp, rho, theta, theta_base
+ real (kind=RKIND), dimension(:,:), pointer :: rho_p_save
+ real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split
+ real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp
+ real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, pressure, v
+ real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke
+ real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri
+ real (kind=RKIND), dimension(:), pointer :: cofrz
+ real (kind=RKIND), dimension(:,:), pointer :: gradPVn, gradPVt
+
+
+ real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2
+ real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2
+ real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2
+ real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2
+ real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2
+ real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split
+
+ integer, pointer :: nCells_ptr
+ integer :: nCells
+ real (kind=RKIND), dimension(:,:), pointer :: uReconstructZonal, uReconstructMeridional, uReconstructX, uReconstructY, uReconstructZ
+
+ real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend
+ real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler
+ real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy
+ real(kind=RKIND), dimension(:,:,:), pointer :: scalar_tend_save
+
+ real (kind=RKIND), dimension(:,:), pointer :: rthdynten, divergence, vorticity
+
+ real (kind=RKIND), dimension(:,:), pointer :: lbc_u, lbc_w, lbc_ru, lbc_rho_edge, lbc_rho, lbc_rtheta_m, lbc_rho_zz, lbc_theta
+ real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_u, lbc_tend_w, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_rho
+ real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_rtheta_m, lbc_tend_rho_zz, lbc_tend_theta
+
+ real (kind=RKIND), dimension(:,:,:), pointer :: lbc_scalars, lbc_tend_scalars
+
+ nullify(mesh)
+ nullify(state)
+ nullify(diag)
+ nullify(tend)
+ nullify(tend_physics)
+ nullify(lbc)
+ call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh)
+ call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state)
+ call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag)
+ call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend)
+ call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics)
+ call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc)
+
+ call mpas_pool_get_config(domain % blocklist % configs, 'config_apply_lbcs', config_apply_lbcs_ptr)
+ config_apply_lbcs = config_apply_lbcs_ptr
+
+ MPAS_ACC_TIMER_START('atm_srk3 [ACC_data_xfer]')
+ call mpas_pool_get_array(diag, 'ru', ru)
+ !$acc exit data copyout(ru) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'ru_p', ru_p)
+ !$acc exit data copyout(ru_p)
+ call mpas_pool_get_array(diag, 'ru_save', ru_save)
+ !$acc exit data delete(ru_save)
+ call mpas_pool_get_array(diag, 'rw', rw)
+ !$acc exit data copyout(rw) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'rw_p', rw_p)
+ !$acc exit data copyout(rw_p)
+ call mpas_pool_get_array(diag, 'rw_save', rw_save)
+ !$acc exit data delete(rw_save)
+ call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
+ !$acc exit data copyout(rtheta_p) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save)
+ !$acc exit data delete(rtheta_p_save)
+ call mpas_pool_get_array(diag, 'exner', exner)
+ !$acc exit data copyout(exner) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'exner_base', exner_base)
+ !$acc exit data copyout(exner_base) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base)
+ !$acc exit data copyout(rtheta_base) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'rho_base', rho_base)
+ !$acc exit data copyout(rho_base)
+ call mpas_pool_get_array(diag, 'rho', rho)
+ !$acc exit data copyout(rho)
+ call mpas_pool_get_array(diag, 'theta', theta)
+ !$acc exit data copyout(theta)
+ call mpas_pool_get_array(diag, 'theta_base', theta_base)
+ !$acc exit data copyout(theta_base)
+ call mpas_pool_get_array(diag, 'rho_p', rho_p)
+ !$acc exit data copyout(rho_p) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save)
+ !$acc exit data delete(rho_p_save)
+ call mpas_pool_get_array(diag, 'rho_pp', rho_pp)
+ !$acc exit data copyout(rho_pp)
+ call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split)
+ !$acc exit data delete(rho_zz_old_split)
+ call mpas_pool_get_array(diag, 'cqw', cqw)
+ !$acc exit data delete(cqw)
+ call mpas_pool_get_array(diag, 'cqu', cqu)
+ !$acc exit data copyout(cqu)
+ call mpas_pool_get_array(diag, 'pressure_p', pressure_p)
+ !$acc exit data copyout(pressure_p) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'pressure_base', pressure_base)
+ !$acc exit data copyout(pressure_base) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(diag, 'pressure', pressure)
+ !$acc exit data copyout(pressure)
+ call mpas_pool_get_array(diag, 'v', v)
+ !$acc exit data copyout(v) ! use values from atm_compute_solve_diagnostics
+ call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp)
+ !$acc exit data copyout(rtheta_pp)
+ call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old)
+ !$acc exit data copyout(rtheta_pp_old)
+ call mpas_pool_get_array(diag, 'kdiff', kdiff)
+ !$acc exit data copyout(kdiff)
+ call mpas_pool_get_array(diag, 'pv_edge', pv_edge)
+ !$acc exit data copyout(pv_edge) ! use values from atm_compute_solve_diagnostics
+ call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex)
+ !$acc exit data copyout(pv_vertex)
+ call mpas_pool_get_array(diag, 'pv_cell', pv_cell)
+ !$acc exit data delete(pv_cell)
+ call mpas_pool_get_array(diag, 'rho_edge', rho_edge)
+ !$acc exit data copyout(rho_edge) ! use values from atm_compute_solve_diagnostics
+ call mpas_pool_get_array(diag, 'h_divergence', h_divergence)
+ !$acc exit data copyout(h_divergence)
+ call mpas_pool_get_array(diag, 'ke', ke)
+ !$acc exit data copyout(ke) ! use values from atm_compute_solve_diagnostics
+ call mpas_pool_get_array(diag, 'gradPVn', gradPVn)
+ !$acc exit data delete(gradPVn)
+ call mpas_pool_get_array(diag, 'gradPVt', gradPVt)
+ !$acc exit data delete(gradPVt)
+
+ call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri)
+ !$acc exit data delete(alpha_tri)
+ call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri)
+ !$acc exit data delete(gamma_tri)
+ call mpas_pool_get_array(diag, 'a_tri', a_tri)
+ !$acc exit data delete(a_tri)
+ call mpas_pool_get_array(diag, 'cofwr', cofwr)
+ !$acc exit data delete(cofwr)
+ call mpas_pool_get_array(diag, 'cofwz', cofwz)
+ !$acc exit data delete(cofwz)
+ call mpas_pool_get_array(diag, 'coftz', coftz)
+ !$acc exit data delete(coftz)
+ call mpas_pool_get_array(diag, 'cofwt', cofwt)
+ !$acc exit data delete(cofwt)
+ call mpas_pool_get_array(diag, 'cofrz', cofrz)
+ !$acc exit data delete(cofrz)
+ call mpas_pool_get_array(diag, 'vorticity', vorticity)
+ !$acc exit data copyout(vorticity)
+ call mpas_pool_get_array(diag, 'divergence', divergence)
+ !$acc exit data copyout(divergence)
+ call mpas_pool_get_array(diag, 'ruAvg', ruAvg)
+ !$acc exit data copyout(ruAvg)
+ call mpas_pool_get_array(diag, 'ruAvg_split', ruAvg_split)
+ !$acc exit data copyout(ruAvg_split)
+ call mpas_pool_get_array(diag, 'wwAvg', wwAvg)
+ !$acc exit data copyout(wwAvg)
+ call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split)
+ !$acc exit data copyout(wwAvg_split)
+
+ call mpas_pool_get_dimension(mesh, 'nCellsSolve', nCells_ptr)
+ nCells = nCells_ptr
+ call mpas_pool_get_array(diag, 'uReconstructX', uReconstructX)
+ !$acc exit data copyout(uReconstructX(:,1:nCells))
+ call mpas_pool_get_array(diag, 'uReconstructY', uReconstructY)
+ !$acc exit data copyout(uReconstructY(:,1:nCells))
+ call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ)
+ !$acc exit data copyout(uReconstructZ(:,1:nCells))
+ call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal)
+ !$acc exit data copyout(uReconstructZonal(:,1:nCells))
+ call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional)
+ !$acc exit data copyout(uReconstructMeridional(:,1:nCells))
+
+ call mpas_pool_get_array(state, 'u', u_1, 1)
+ !$acc exit data copyout(u_1)
+ call mpas_pool_get_array(state, 'u', u_2, 2)
+ !$acc exit data copyout(u_2)
+ call mpas_pool_get_array(state, 'w', w_1, 1)
+ !$acc exit data copyout(w_1)
+ call mpas_pool_get_array(state, 'w', w_2, 2)
+ !$acc exit data copyout(w_2)
+ call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1)
+ !$acc exit data copyout(theta_m_1) ! use values from atm_init_coupled_diagnostics
+ call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2)
+ !$acc exit data copyout(theta_m_2) ! Delete gives incorrect results
+ call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1)
+ !$acc exit data copyout(rho_zz_1)
+ call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2)
+ !$acc exit data copyout(rho_zz_2)
+ call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
+ !$acc exit data copyout(scalars_1)
+ call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+ !$acc exit data copyout(scalars_2) ! Delete gives incorrect results
+
+
+ call mpas_pool_get_array(tend, 'u', tend_ru)
+ !$acc exit data copyout(tend_ru)
+ call mpas_pool_get_array(tend, 'rho_zz', tend_rho)
+ !$acc exit data copyout(tend_rho)
+ call mpas_pool_get_array(tend, 'theta_m', tend_rt)
+ !$acc exit data copyout(tend_rt)
+ call mpas_pool_get_array(tend, 'w', tend_rw)
+ !$acc exit data copyout(tend_rw)
+ call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend)
+ !$acc exit data copyout(rt_diabatic_tend)
+ call mpas_pool_get_array(tend, 'u_euler', tend_u_euler)
+ !$acc exit data copyout(tend_u_euler)
+ call mpas_pool_get_array(tend, 'theta_euler', tend_theta_euler)
+ !$acc exit data copyout(tend_theta_euler)
+ call mpas_pool_get_array(tend, 'w_euler', tend_w_euler)
+ !$acc exit data copyout(tend_w_euler)
+ call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf)
+ !$acc exit data copyout(tend_w_pgf)
+ call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy)
+ !$acc exit data copyout(tend_w_buoy)
+ call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save)
+ !$acc exit data copyout(scalar_tend_save)
+
+ if(config_apply_lbcs) then
+ call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2)
+ !$acc exit data delete(lbc_u)
+ call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2)
+ !$acc exit data delete(lbc_w)
+ call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2)
+ !$acc exit data delete(lbc_ru)
+ call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2)
+ !$acc exit data delete(lbc_rho_edge)
+ call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2)
+ !$acc exit data delete(lbc_theta)
+ call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2)
+ !$acc exit data delete(lbc_rtheta_m)
+ call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2)
+ !$acc exit data delete(lbc_rho_zz)
+ call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2)
+ !$acc exit data delete(lbc_rho)
+ call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2)
+ !$acc exit data delete(lbc_scalars)
+
+ call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1)
+ !$acc exit data delete(lbc_tend_u)
+ call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1)
+ !$acc exit data delete(lbc_tend_ru)
+ call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1)
+ !$acc exit data delete(lbc_tend_rho_edge)
+ call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1)
+ !$acc exit data delete(lbc_tend_w)
+ call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1)
+ !$acc exit data delete(lbc_tend_theta)
+ call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1)
+ !$acc exit data delete(lbc_tend_rtheta_m)
+ call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1)
+ !$acc exit data delete(lbc_tend_rho_zz)
+ call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1)
+ !$acc exit data delete(lbc_tend_rho)
+ call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1)
+ !$acc exit data delete(lbc_tend_scalars)
+ end if
+
+ call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
+ !$acc exit data copyout(rthdynten)
+ MPAS_ACC_TIMER_STOP('atm_srk3 [ACC_data_xfer]')
#endif
- end subroutine mpas_atm_dynamics_init
+ end subroutine mpas_atm_post_dynamics_d2h
!----------------------------------------------------------------------------
@@ -546,6 +1519,11 @@ subroutine mpas_atm_dynamics_finalize(domain)
real (kind=RKIND), dimension(:), pointer :: angleEdge
real (kind=RKIND), dimension(:), pointer :: meshScalingDel2
real (kind=RKIND), dimension(:), pointer :: meshScalingDel4
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s2
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_cs
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s
#endif
@@ -731,6 +1709,21 @@ subroutine mpas_atm_dynamics_finalize(domain)
call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4)
!$acc exit data delete(meshScalingDel4)
+
+ call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2)
+ !$acc exit data delete(deformation_coef_c2)
+
+ call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2)
+ !$acc exit data delete(deformation_coef_s2)
+
+ call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs)
+ !$acc exit data delete(deformation_coef_cs)
+
+ call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c)
+ !$acc exit data delete(deformation_coef_c)
+
+ call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s)
+ !$acc exit data delete(deformation_coef_s)
#endif
end subroutine mpas_atm_dynamics_finalize
@@ -774,12 +1767,14 @@ subroutine atm_timestep(domain, dt, nowTime, itimestep, exchange_halo_group)
config_apply_lbcs = config_apply_lbcs_ptr
+ call mpas_atm_pre_dynamics_h2d(domain)
if (trim(config_time_integration) == 'SRK3') then
call atm_srk3(domain, dt, itimestep, exchange_halo_group)
else
call mpas_log_write('Unknown time integration option '//trim(config_time_integration), messageType=MPAS_LOG_ERR)
call mpas_log_write('Currently, only ''SRK3'' is supported.', messageType=MPAS_LOG_CRIT)
end if
+ call mpas_atm_post_dynamics_d2h(domain)
call mpas_set_timeInterval(dtInterval, dt=dt)
currTime = nowTime + dtInterval
@@ -850,6 +1845,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
logical, pointer :: config_scalar_advection
logical, pointer :: config_positive_definite
logical, pointer :: config_monotonic
+ logical, pointer :: config_gpu_aware_mpi
character (len=StrKIND), pointer :: config_microp_scheme
character (len=StrKIND), pointer :: config_convection_scheme
@@ -873,6 +1869,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
real (kind=RKIND), dimension(:,:,:), pointer :: scalars, scalars_1, scalars_2
real (kind=RKIND), dimension(:,:), pointer :: rqvdynten, rthdynten, theta_m
+ real (kind=RKIND), dimension(:,:), pointer :: pressure_p, rtheta_p, exner, tend_u
+ real (kind=RKIND), dimension(:,:), pointer :: rho_pp, rtheta_pp, ru_p, rw_p, pv_edge, rho_edge
real (kind=RKIND) :: theta_local, fac_m
#ifndef MPAS_CAM_DYCORE
@@ -892,6 +1890,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
call mpas_pool_get_config(block % configs, 'config_scalar_advection', config_scalar_advection)
call mpas_pool_get_config(block % configs, 'config_positive_definite', config_positive_definite)
call mpas_pool_get_config(block % configs, 'config_monotonic', config_monotonic)
+ call mpas_pool_get_config(block % configs, 'config_gpu_aware_mpi', config_gpu_aware_mpi)
call mpas_pool_get_config(block % configs, 'config_IAU_option', config_IAU_option)
! config variables for dynamics-transport splitting, WCS 18 November 2014
call mpas_pool_get_config(block % configs, 'config_split_dynamics_transport', config_split_dynamics_transport)
@@ -1040,7 +2039,15 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
!
! Communicate halos for theta_m, scalars, pressure_p, and rtheta_p
!
- call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p')
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+ call mpas_pool_get_array(state, 'theta_m', theta_m, 1)
+ call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
+ call mpas_pool_get_array(diag, 'pressure_p', pressure_p)
+ call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
+ !$acc update self(theta_m,scalars_1,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p', config_gpu_aware_mpi)
+ !$acc update device(theta_m,scalars_1,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
call mpas_timer_start('atm_rk_integration_setup')
@@ -1075,6 +2082,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
call mpas_timer_stop('atm_compute_moist_coefficients')
#ifdef DO_PHYSICS
+ call update_d2h_pre_physics_get_tend(block % configs, state, diag, tend)
call mpas_timer_start('physics_get_tend')
rk_step = 1
dynamics_substep = 1
@@ -1083,6 +2091,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
tend_ru_physics, tend_rtheta_physics, tend_rho_physics, &
exchange_halo_group )
call mpas_timer_stop('physics_get_tend')
+ call update_h2d_post_physics_get_tend(block % configs, state, diag, tend)
#else
#ifndef MPAS_CAM_DYCORE
!
@@ -1098,10 +2107,13 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
! IAU - Incremental Analysis Update
!
if (trim(config_IAU_option) /= 'off') then
+ call update_d2h_pre_add_tend_anal_incr(block % configs, block % structs)
call atm_add_tend_anal_incr(block % configs, block % structs, itimestep, dt, &
tend_ru_physics, tend_rtheta_physics, tend_rho_physics)
end if
+ !$acc enter data copyin(tend_rtheta_physics,tend_rho_physics,tend_ru_physics)
+
DYNAMICS_SUBSTEPS : do dynamics_substep = 1, dynamics_split
@@ -1121,8 +2133,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
!$OMP END PARALLEL DO
call mpas_timer_stop('atm_compute_vert_imp_coefs')
- call exchange_halo_group(domain, 'dynamics:exner')
-
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+ call mpas_pool_get_array(diag, 'exner', exner)
+ !$acc update self(exner) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:exner', config_gpu_aware_mpi)
+ !$acc update device(exner) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! BEGIN Runge-Kutta loop
@@ -1171,7 +2187,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
!$OMP PARALLEL DO
do thread=1,nThreads
- call atm_compute_dyn_tend( tend, tend_physics, state, diag, mesh, block % configs, nVertLevels, rk_step, dt, &
+ call atm_compute_dyn_tend( tend, tend_physics, state, diag, mesh, diag_physics, &
+ block % configs, nVertLevels, rk_step, dynamics_substep, dt, &
cellThreadStart(thread), cellThreadEnd(thread), &
vertexThreadStart(thread), vertexThreadEnd(thread), &
edgeThreadStart(thread), edgeThreadEnd(thread), &
@@ -1200,7 +2217,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
!***********************************
! tend_u
- call exchange_halo_group(domain, 'dynamics:tend_u')
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+ call mpas_pool_get_array(tend, 'u', tend_u)
+ !$acc update self(tend_u) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:tend_u', config_gpu_aware_mpi)
+ !$acc update device(tend_u) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
call mpas_timer_start('small_step_prep')
@@ -1276,7 +2298,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
do small_step = 1, number_sub_steps(rk_step)
- call exchange_halo_group(domain, 'dynamics:rho_pp')
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+ call mpas_pool_get_array(diag, 'rho_pp', rho_pp)
+ !$acc update self(rho_pp) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:rho_pp', config_gpu_aware_mpi)
+ !$acc update device(rho_pp) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
call mpas_timer_start('atm_advance_acoustic_step')
@@ -1298,8 +2325,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
! rtheta_pp
! This is the only communications needed during the acoustic steps because we solve for u on all edges of owned cells
-
- call exchange_halo_group(domain, 'dynamics:rtheta_pp')
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+ call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp)
+ !$acc update self(rtheta_pp) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:rtheta_pp', config_gpu_aware_mpi)
+ !$acc update device(rtheta_pp) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
! complete update of horizontal momentum by including 3d divergence damping at the end of the acoustic step
@@ -1319,7 +2350,15 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
!
! Communicate halos for rw_p[1,2], ru_p[1,2], rho_pp[1,2], rtheta_pp[2]
!
- call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp')
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+ call mpas_pool_get_array(diag, 'ru_p', ru_p)
+ call mpas_pool_get_array(diag, 'rw_p', rw_p)
+ call mpas_pool_get_array(diag, 'rho_pp', rho_pp)
+ call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp)
+ !$acc update self(rw_p,ru_p,rho_pp,rtheta_pp) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp', config_gpu_aware_mpi)
+ !$acc update device(rw_p,ru_p,rho_pp,rtheta_pp) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
call mpas_timer_start('atm_recover_large_step_variables')
@@ -1354,7 +2393,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
call mpas_atm_get_bdy_state(clock, block, nVertLevels, nEdges, 'u', time_dyn_step, ru_driving_values)
! do this inline at present - it is simple enough
- !$acc enter data copyin(u)
!$acc parallel default(present)
!$acc loop gang worker
do iEdge = 1, nEdgesSolve
@@ -1366,12 +2404,10 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
end if
end do
!$acc end parallel
- !$acc exit data copyout(u)
call mpas_atm_get_bdy_state(clock, block, nVertLevels, nEdges, 'ru', time_dyn_step, ru_driving_values)
call mpas_pool_get_array(diag, 'ru', u)
! do this inline at present - it is simple enough
- !$acc enter data copyin(u)
!$acc parallel default(present)
!$acc loop gang worker
do iEdge = 1, nEdges
@@ -1383,7 +2419,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
end if
end do
!$acc end parallel
- !$acc exit data copyout(u)
deallocate(ru_driving_values)
@@ -1391,12 +2426,17 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
!-------------------------------------------------------------------
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+ call mpas_pool_get_array(state, 'u', u, 2)
+ !$acc update self(u) if (.not. config_gpu_aware_mpi)
! u
if (config_apply_lbcs) then
- call exchange_halo_group(domain, 'dynamics:u_123')
+ call exchange_halo_group(domain, 'dynamics:u_123', config_gpu_aware_mpi)
else
- call exchange_halo_group(domain, 'dynamics:u_3')
+ call exchange_halo_group(domain, 'dynamics:u_3', config_gpu_aware_mpi)
end if
+ !$acc update device(u) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
! scalar advection: RK3 scheme of Skamarock and Gassmann (2011).
! PD or monotonicity constraints applied only on the final Runge-Kutta substep.
@@ -1404,11 +2444,16 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then
call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, &
- config_time_integration_order, config_split_dynamics_transport, exchange_halo_group)
+ config_time_integration_order, config_split_dynamics_transport, config_gpu_aware_mpi, exchange_halo_group)
if (config_apply_lbcs) then ! adjust boundary tendencies for regional_MPAS scalar transport
- call exchange_halo_group(domain, 'dynamics:scalars')
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+ call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+ !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi)
+ !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
allocate(scalars_driving(num_scalars,nVertLevels,nCells+1))
@@ -1460,17 +2505,27 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
call mpas_timer_stop('atm_compute_solve_diagnostics')
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+ call mpas_pool_get_array(state, 'w', w, 2)
+ call mpas_pool_get_array(diag, 'pv_edge', pv_edge)
+ call mpas_pool_get_array(diag, 'rho_edge', rho_edge)
+ !$acc update self(w,pv_edge,rho_edge) if (.not. config_gpu_aware_mpi)
if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then
!
! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2], scalars[1,2]
!
- call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars')
+ call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+ !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars', config_gpu_aware_mpi)
+ !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi)
else
!
! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2]
!
- call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge')
+ call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge', config_gpu_aware_mpi)
end if
+ !$acc update device(w,pv_edge,rho_edge) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
! set the zero-gradient condition on w for regional_MPAS
@@ -1483,8 +2538,13 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
end do
!$OMP END PARALLEL DO
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
! w halo values needs resetting after regional boundary update
- call exchange_halo_group(domain, 'dynamics:w')
+ call mpas_pool_get_array(state, 'w', w, 2)
+ !$acc update self(w) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:w', config_gpu_aware_mpi)
+ !$acc update device(w) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
end if ! end of regional_MPAS addition
@@ -1495,7 +2555,14 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
!
! Communicate halos for theta_m[1,2], pressure_p[1,2], and rtheta_p[1,2]
!
- call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p')
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+ call mpas_pool_get_array(state, 'theta_m', theta_m, 2)
+ call mpas_pool_get_array(diag, 'pressure_p', pressure_p)
+ call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
+ !$acc update self(theta_m,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p', config_gpu_aware_mpi)
+ !$acc update device(theta_m,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
!
! Note: A halo exchange for 'exner' here as well as after the call
@@ -1532,6 +2599,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
deallocate(qtot) ! we are finished with these now
+ !$acc exit data delete(tend_rtheta_physics,tend_rho_physics,tend_ru_physics)
#ifndef MPAS_CAM_DYCORE
call mpas_deallocate_scratch_field(tend_rtheta_physicsField)
call mpas_deallocate_scratch_field(tend_rho_physicsField)
@@ -1555,12 +2623,17 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, &
- config_time_integration_order, config_split_dynamics_transport, exchange_halo_group)
+ config_time_integration_order, config_split_dynamics_transport, config_gpu_aware_mpi, exchange_halo_group)
if (config_apply_lbcs) then ! adjust boundary tendencies for regional_MPAS scalar transport
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
! need to fill halo for horizontal filter
- call exchange_halo_group(domain, 'dynamics:scalars')
+ call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+ !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi)
+ !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
allocate(scalars_driving(num_scalars,nVertLevels,nCells+1))
@@ -1586,7 +2659,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
!------------------------------------------------------------------------------------------------------------------------
if (rk_step < 3) then
- call exchange_halo_group(domain, 'dynamics:scalars')
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+ call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+ !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi)
+ !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
end if
end do RK3_SPLIT_TRANSPORT
@@ -1608,7 +2686,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
uReconstructY, &
uReconstructZ, &
uReconstructZonal, &
- uReconstructMeridional &
+ uReconstructMeridional, &
+ lACC = .true. &
)
@@ -1618,16 +2697,24 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
!
#ifdef DO_PHYSICS
+ MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
+ !$acc update self(scalars_1)
call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+ !$acc update self(scalars_2)
+ MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
if(config_convection_scheme == 'cu_grell_freitas' .or. &
config_convection_scheme == 'cu_ntiedtke') then
+ MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
call mpas_pool_get_array(tend_physics, 'rqvdynten', rqvdynten)
call mpas_pool_get_array(state, 'theta_m', theta_m, 2)
+ !$acc update self(theta_m)
call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
+ !$acc update self(rthdynten)
+ MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
!NOTE: The calculation of the tendency due to horizontal and vertical advection for the water vapor mixing ratio
!requires that the subroutine atm_advance_scalars_mono was called on the third Runge Kutta step, so that a halo
@@ -1652,8 +2739,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
where ( scalars_2(:,:,:) < 0.0) &
scalars_2(:,:,:) = 0.0
+ MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
+ !$acc update device(scalars_2, rthdynten)
+ MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
!call microphysics schemes:
if (trim(config_microp_scheme) /= 'off') then
+ call update_d2h_pre_microphysics( block % configs, state, diag, 2)
call mpas_timer_start('microphysics')
!$OMP PARALLEL DO
do thread=1,nThreads
@@ -1662,6 +2753,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
end do
!$OMP END PARALLEL DO
call mpas_timer_stop('microphysics')
+ call update_h2d_post_microphysics( block % configs, state, diag, tend, 2)
end if
!
@@ -1699,7 +2791,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group)
if (config_apply_lbcs) then ! adjust boundary values for regional_MPAS scalar transport
- call exchange_halo_group(domain, 'dynamics:scalars')
+ MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer')
+ call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
+ !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi)
+ call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi)
+ !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi)
+ MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer')
allocate(scalars_driving(num_scalars,nVertLevels,nCells+1))
@@ -1752,7 +2849,7 @@ end subroutine atm_srk3
!
!-----------------------------------------------------------------------
subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, &
- config_time_integration_order, config_split_dynamics_transport, exchange_halo_group)
+ config_time_integration_order, config_split_dynamics_transport, config_gpu_aware_mpi, exchange_halo_group)
implicit none
@@ -1765,6 +2862,7 @@ subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_mono
logical, intent(in) :: config_positive_definite
integer, intent(in) :: config_time_integration_order
logical, intent(in) :: config_split_dynamics_transport
+ logical, intent(in) :: config_gpu_aware_mpi
procedure (halo_exchange_routine) :: exchange_halo_group
! Local variables
@@ -1896,7 +2994,7 @@ subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_mono
edgeThreadStart(thread), edgeThreadEnd(thread), &
cellSolveThreadStart(thread), cellSolveThreadEnd(thread), &
scalar_old_arr, scalar_new_arr, s_max_arr, s_min_arr, wdtn_arr, &
- flux_array, flux_upwind_tmp_arr, flux_tmp_arr, &
+ flux_array, flux_upwind_tmp_arr, flux_tmp_arr, config_gpu_aware_mpi, &
exchange_halo_group, &
advance_density=config_split_dynamics_transport, rho_zz_int=rho_zz_int)
end if
@@ -1976,12 +3074,6 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, &
call mpas_pool_get_array(state, 'scalars', scalars_1, 1)
call mpas_pool_get_array(state, 'scalars', scalars_2, 2)
- MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]')
- !$acc enter data create(ru_save, u_2, rw_save, rtheta_p_save, rho_p_save, &
- !$acc w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) &
- !$acc copyin(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, &
- !$acc rho_zz_1, scalars_1)
- MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]')
!$acc kernels
theta_m_2(:,cellEnd+1) = 0.0_RKIND
@@ -2029,12 +3121,6 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, &
end do
!$acc end parallel
- MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]')
- !$acc exit data copyout(ru_save, rw_save, rtheta_p_save, rho_p_save, u_2, &
- !$acc w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) &
- !$acc delete(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, &
- !$acc rho_zz_1, scalars_1)
- MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]')
end subroutine atm_rk_integration_setup
@@ -2085,11 +3171,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, &
moist_start = moist_start_ptr
moist_end = moist_end_ptr
- MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]')
- !$acc enter data create(cqw, cqu) &
- !$acc copyin(scalars)
- MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]')
-
!$acc parallel default(present)
!$acc loop gang worker
! do iCell = cellSolveStart,cellSolveEnd
@@ -2138,10 +3219,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, &
end do
!$acc end parallel
- MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]')
- !$acc exit data copyout(cqw, cqu) &
- !$acc delete(scalars)
- MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]')
end subroutine atm_compute_moist_coefficients
@@ -2273,11 +3350,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts,
real (kind=RKIND) :: dtseps, c2, qtotal, rcv
real (kind=RKIND), dimension( nVertLevels ) :: b_tri, c_tri
- MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]')
- !$acc enter data copyin(cqw, p, t, rb, rtb, rt, pb)
- !$acc enter data create(cofrz, cofwr, cofwz, coftz, cofwt, a_tri, b_tri, &
- !$acc c_tri, alpha_tri, gamma_tri)
- MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]')
+ !$acc enter data create(b_tri, c_tri)
! set coefficients
dtseps = .5*dts*(1.+epssm)
@@ -2357,11 +3430,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts,
end do ! loop over cells
!$acc end parallel
- MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]')
- !$acc exit data copyout(cofrz, cofwr, cofwz, coftz, cofwt, a_tri, b_tri, &
- !$acc c_tri, alpha_tri, gamma_tri)
- !$acc exit data delete(cqw, p, t, rb, rtb, rt, pb)
- MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]')
+ !$acc exit data delete(b_tri, c_tri)
end subroutine atm_compute_vert_imp_coefs_work
@@ -2465,9 +3534,6 @@ subroutine atm_set_smlstep_pert_variables_work(nCells, nEdges, &
integer :: iCell, iEdge, i, k
real (kind=RKIND) :: flux
- MPAS_ACC_TIMER_START('atm_set_smlstep_pert_variables [ACC_data_xfer]')
- !$acc enter data copyin(u_tend, w_tend)
- MPAS_ACC_TIMER_STOP('atm_set_smlstep_pert_variables [ACC_data_xfer]')
! we solve for omega instead of w (see Klemp et al MWR 2007),
! so here we change the w_p tendency to an omega_p tendency
@@ -2500,10 +3566,6 @@ subroutine atm_set_smlstep_pert_variables_work(nCells, nEdges, &
end do
!$acc end parallel
- MPAS_ACC_TIMER_START('atm_set_smlstep_pert_variables [ACC_data_xfer]')
- !$acc exit data delete(u_tend)
- !$acc exit data copyout(w_tend)
- MPAS_ACC_TIMER_STOP('atm_set_smlstep_pert_variables [ACC_data_xfer]')
end subroutine atm_set_smlstep_pert_variables_work
@@ -2736,17 +3798,6 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart
resm = (1.0 - epssm) / (1.0 + epssm)
rdts = 1./dts
- MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]')
- !$acc enter data copyin(exner,cqu,cofwt,coftz,cofrz,cofwr,cofwz, &
- !$acc a_tri,alpha_tri,gamma_tri,rho_zz,theta_m,w, &
- !$acc tend_ru,tend_rho,tend_rt,tend_rw,rw,rw_save)
- !$acc enter data create(rtheta_pp_old)
- if(small_step == 1) then
- !$acc enter data create(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p)
- else
- !$acc enter data copyin(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p)
- end if
- MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]')
if(small_step /= 1) then ! not needed on first small step
@@ -2973,13 +4024,6 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart
end do ! end of loop over cells
!$acc end parallel
- MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]')
- !$acc exit data delete(exner,cqu,cofwt,coftz,cofrz,cofwr,cofwz, &
- !$acc a_tri,alpha_tri,gamma_tri,rho_zz,theta_m,w, &
- !$acc tend_ru,tend_rho,tend_rt,tend_rw,rw,rw_save)
- !$acc exit data copyout(rtheta_pp_old,ru_p,ruAvg,rho_pp, &
- !$acc rtheta_pp,wwAvg,rw_p)
- MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]')
end subroutine atm_advance_acoustic_step_work
@@ -3031,9 +4075,6 @@ subroutine atm_divergence_damping_3d( state, diag, mesh, configs, dts, edgeStart
nCellsSolve = nCellsSolve_ptr
nVertLevels = nVertLevels_ptr
- MPAS_ACC_TIMER_START('atm_divergence_damping_3d [ACC_data_xfer]')
- !$acc enter data copyin(ru_p, rtheta_pp, rtheta_pp_old, theta_m)
- MPAS_ACC_TIMER_STOP('atm_divergence_damping_3d [ACC_data_xfer]')
!$acc parallel default(present)
!$acc loop gang worker
@@ -3066,10 +4107,6 @@ subroutine atm_divergence_damping_3d( state, diag, mesh, configs, dts, edgeStart
end do ! end loop over edges
!$acc end parallel
- MPAS_ACC_TIMER_START('atm_divergence_damping_3d [ACC_data_xfer]')
- !$acc exit data copyout(ru_p) &
- !$acc delete(rtheta_pp, rtheta_pp_old, theta_m)
- MPAS_ACC_TIMER_STOP('atm_divergence_damping_3d [ACC_data_xfer]')
end subroutine atm_divergence_damping_3d
@@ -3260,17 +4297,6 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE
integer :: i, iCell, iEdge, k, cell1, cell2
real (kind=RKIND) :: invNs, rcv, p0, flux
- MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]')
- !$acc enter data copyin(rho_p_save,rho_pp,rho_base,rw_save,rw_p, &
- !$acc rtheta_p_save,rtheta_pp,rtheta_base, &
- !$acc ru_save,ru_p,wwAvg,ruAvg) &
- !$acc create(rho_zz,rho_p,rw,w,rtheta_p,theta_m, &
- !$acc ru,u)
- if (rk_step == 3) then
- !$acc enter data copyin(rt_diabatic_tend,exner_base) &
- !$acc create(exner,pressure_p)
- end if
- MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]')
rcv = rgas/(cp-rgas)
p0 = 1.0e+05 ! this should come from somewhere else...
@@ -3416,17 +4442,6 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE
end do
!$acc end parallel
- MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]')
- !$acc exit data delete(rho_p_save,rho_pp,rho_base,rw_save,rw_p, &
- !$acc rtheta_p_save,rtheta_pp,rtheta_base, &
- !$acc ru_save,ru_p) &
- !$acc copyout(rho_zz,rho_p,rw,w,rtheta_p,theta_m, &
- !$acc ru,u,wwAvg,ruAvg)
- if (rk_step == 3) then
- !$acc exit data delete(rt_diabatic_tend,exner_base) &
- !$acc copyout(exner,pressure_p)
- end if
- MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]')
end subroutine atm_recover_large_step_variables_work
@@ -3661,10 +4676,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, &
weight_time_old = 1. - weight_time_new
- MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]')
- !$acc enter data copyin(uhAvg, scalar_new)
- MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]')
-
!$acc parallel async
!$acc loop gang worker private(scalar_weight2, ica)
do iEdge=edgeStart,edgeEnd
@@ -3759,12 +4770,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, &
!
MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]')
-#ifndef DO_PHYSICS
- !$acc enter data create(scalar_tend_save)
-#else
- !$acc enter data copyin(scalar_tend_save)
-#endif
- !$acc enter data copyin(scalar_old, fnm, fnp, rdnw, wwAvg, rho_zz_old, rho_zz_new)
!$acc enter data create(scalar_tend_column)
MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]')
@@ -3847,9 +4852,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, &
!$acc end parallel
MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]')
- !$acc exit data copyout(scalar_new)
- !$acc exit data delete(scalar_tend_column, uhAvg, wwAvg, scalar_old, fnm, fnp, &
- !$acc rdnw, rho_zz_old, rho_zz_new, scalar_tend_save)
+ !$acc exit data delete(scalar_tend_column)
MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]')
end subroutine atm_advance_scalars_work
@@ -3870,7 +4873,7 @@ subroutine atm_advance_scalars_mono(field_name, block, tend, state, diag, mesh,
cellStart, cellEnd, edgeStart, edgeEnd, &
cellSolveStart, cellSolveEnd, &
scalar_old, scalar_new, s_max, s_min, wdtn, flux_arr, &
- flux_upwind_tmp, flux_tmp, exchange_halo_group, advance_density, rho_zz_int)
+ flux_upwind_tmp, flux_tmp, config_gpu_aware_mpi, exchange_halo_group, advance_density, rho_zz_int)
implicit none
@@ -3891,6 +4894,7 @@ subroutine atm_advance_scalars_mono(field_name, block, tend, state, diag, mesh,
real (kind=RKIND), dimension(:,:), intent(inout) :: wdtn
real (kind=RKIND), dimension(:,:), intent(inout) :: flux_arr
real (kind=RKIND), dimension(:,:), intent(inout) :: flux_upwind_tmp, flux_tmp
+ logical, intent(in) :: config_gpu_aware_mpi
procedure (halo_exchange_routine) :: exchange_halo_group
logical, intent(in), optional :: advance_density
real (kind=RKIND), dimension(:,:), intent(inout), optional :: rho_zz_int
@@ -3969,7 +4973,7 @@ subroutine atm_advance_scalars_mono(field_name, block, tend, state, diag, mesh,
edgesOnCell, edgesOnCell_sign, nEdgesOnCell, fnm, fnp, rdnw, nAdvCellsForEdge, &
advCellsForEdge, adv_coefs, adv_coefs_3rd, scalar_old, scalar_new, s_max, s_min, &
wdtn, scale_arr, flux_arr, flux_upwind_tmp, flux_tmp, &
- bdyMaskCell, bdyMaskEdge, &
+ bdyMaskCell, bdyMaskEdge, config_gpu_aware_mpi, &
exchange_halo_group, advance_density, rho_zz_int)
call mpas_deallocate_scratch_field(scale)
@@ -4017,7 +5021,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
edgesOnCell, edgesOnCell_sign, nEdgesOnCell, fnm, fnp, rdnw, nAdvCellsForEdge, &
advCellsForEdge, adv_coefs, adv_coefs_3rd, scalar_old, scalar_new, s_max, s_min, &
wdtn, scale_arr, flux_arr, flux_upwind_tmp, flux_tmp, &
- bdyMaskCell, bdyMaskEdge, &
+ bdyMaskCell, bdyMaskEdge, config_gpu_aware_mpi, &
exchange_halo_group, advance_density, rho_zz_int)
use mpas_atm_dimensions, only : nVertLevels
@@ -4033,6 +5037,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
real (kind=RKIND), intent(in) :: dt
integer, intent(in) :: cellStart, cellEnd, edgeStart, edgeEnd
integer, intent(in) :: cellSolveStart, cellSolveEnd
+ logical, intent(in) :: config_gpu_aware_mpi
procedure (halo_exchange_routine) :: exchange_halo_group
logical, intent(in), optional :: advance_density
real (kind=RKIND), dimension(:,:), intent(inout), optional :: rho_zz_int
@@ -4107,22 +5112,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
! The transport will maintain this positive definite solution and optionally, shape preservation (monotonicity).
- MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
- !$acc data present(nEdgesOnCell, edgesOnCell, edgesOnCell_sign, &
- !$acc invAreaCell, cellsOnCell, cellsOnEdge, nAdvCellsForEdge, &
- !$acc advCellsForEdge, adv_coefs, adv_coefs_3rd, dvEdge, bdyMaskCell)
-
-#ifdef DO_PHYSICS
- !$acc enter data copyin(scalar_tend)
-#else
- !$acc enter data create(scalar_tend)
-#endif
- if (local_advance_density) then
- !$acc enter data copyin(rho_zz_int)
- end if
- !$acc enter data copyin(scalars_old, rho_zz_old, rdnw, uhAvg, wwAvg)
- MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
-
!$acc parallel
!$acc loop gang worker
@@ -4145,19 +5134,17 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
!$acc end parallel
MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
- !$acc exit data copyout(scalar_tend)
-
- !$acc update self(scalars_old)
+ !$acc update self(scalars_old) if (.not. config_gpu_aware_mpi)
MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
!$OMP BARRIER
!$OMP MASTER
- call exchange_halo_group(block % domain, 'dynamics:'//trim(field_name)//'_old')
+ call exchange_halo_group(block % domain, 'dynamics:'//trim(field_name)//'_old', config_gpu_aware_mpi)
!$OMP END MASTER
!$OMP BARRIER
MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
- !$acc update device(scalars_old)
+ !$acc update device(scalars_old) if (.not. config_gpu_aware_mpi)
MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
!
@@ -4209,13 +5196,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
end if
- MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
- if (.not. local_advance_density) then
- !$acc enter data copyin(rho_zz_new)
- end if
- !$acc enter data copyin(scalars_new, fnm, fnp)
!$acc enter data create(scale_arr)
- MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
do iScalar = 1, num_scalars
@@ -4560,17 +5541,17 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
!
MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
- !$acc update self(scale_arr)
+ !$acc update self(scale_arr) if (.not. config_gpu_aware_mpi)
MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
!$OMP BARRIER
!$OMP MASTER
- call exchange_halo_group(block % domain, 'dynamics:scale')
+ call exchange_halo_group(block % domain, 'dynamics:scale', config_gpu_aware_mpi)
!$OMP END MASTER
!$OMP BARRIER
MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
- !$acc update device(scale_arr)
+ !$acc update device(scale_arr) if (.not. config_gpu_aware_mpi)
MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
!$acc parallel
@@ -4718,23 +5699,12 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge
end do ! loop over scalars
- MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]')
- if (local_advance_density) then
- !$acc exit data copyout(rho_zz_int)
- else
- !$acc exit data delete(rho_zz_new)
- end if
- !$acc exit data copyout(scalars_new)
- !$acc exit data delete(scalars_old, scale_arr, rho_zz_old, wwAvg, &
- !$acc uhAvg, fnm, fnp, rdnw)
-
- !$acc end data
- MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]')
+ !$acc exit data delete(scale_arr)
end subroutine atm_advance_scalars_mono_work
- subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, nVertLevels, rk_step, dt, &
+ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_physics, configs, nVertLevels, rk_step, dynamics_substep, dt, &
cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -4750,6 +5720,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ use mpas_atm_dissipation_models, only : les_model_from_string, les_surface_from_string
+
implicit none
!
@@ -4760,9 +5732,10 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
type (mpas_pool_type), intent(in) :: state
type (mpas_pool_type), intent(in) :: diag
type (mpas_pool_type), intent(in) :: mesh
+ type (mpas_pool_type), intent(in) :: diag_physics
type (mpas_pool_type), intent(in) :: configs
integer, intent(in) :: nVertLevels ! for allocating stack variables
- integer, intent(in) :: rk_step
+ integer, intent(in) :: rk_step, dynamics_substep
real (kind=RKIND), intent(in) :: dt
integer, intent(in) :: cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd
integer, intent(in) :: cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd
@@ -4778,7 +5751,9 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, &
- h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save
+ h_divergence, bn2, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save
+
+ real (kind=RKIND), dimension(:,:), pointer :: eddy_visc_horz, eddy_visc_vert
real (kind=RKIND), dimension(:,:), pointer :: theta_m_save
@@ -4788,7 +5763,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
real (kind=RKIND), dimension(:,:), pointer :: rthdynten
- real (kind=RKIND), dimension(:,:,:), pointer :: scalars
+ real (kind=RKIND), dimension(:,:,:), pointer :: scalars, tend_scalars
real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler
@@ -4803,6 +5778,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
real (kind=RKIND), dimension(:), pointer :: rdzu, rdzw, fzm, fzp, qv_init
real (kind=RKIND), dimension(:,:), pointer :: t_init
+ real (kind=RKIND), dimension(:), pointer:: ustm, hfx, qfx
real (kind=RKIND), pointer :: cf1, cf2, cf3
@@ -4810,12 +5786,23 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
real (kind=RKIND), dimension(:,:), pointer :: ur_cell, vr_cell
real (kind=RKIND), dimension(:,:), pointer :: defc_a, defc_b
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2, deformation_coef_s2, deformation_coef_cs
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c, deformation_coef_s
+ real (kind=RKIND), dimension(:,:), pointer :: prandtl_3d_inv
+
real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy
real (kind=RKIND), pointer :: coef_3rd_order, c_s
logical, pointer :: config_mix_full
+ logical, pointer :: config_mix_scalars
character (len=StrKIND), pointer :: config_horiz_mixing
+ character (len=StrKIND), pointer :: config_les_model
+ character (len=StrKIND), pointer :: config_les_surface
+ integer :: les_model_opt, les_surface_opt
+ real (kind=RKIND), pointer :: config_surface_heat_flux
+ real (kind=RKIND), pointer :: config_surface_moisture_flux
+ real (kind=RKIND), pointer :: config_surface_drag_coefficient
real (kind=RKIND), pointer :: config_del4u_div_factor
real (kind=RKIND), pointer :: config_h_theta_eddy_visc4
real (kind=RKIND), pointer :: config_h_mom_eddy_visc4
@@ -4828,12 +5815,22 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
logical, pointer :: config_rayleigh_damp_u
real (kind=RKIND), pointer :: config_rayleigh_damp_u_timescale_days
integer, pointer :: config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels
+ integer, pointer :: index_qv, index_qc, index_tke
+
+ logical :: inactive_rthdynten
+ logical :: nopbl
call mpas_pool_get_config(mesh, 'sphere_radius', r_earth)
call mpas_pool_get_config(configs, 'config_coef_3rd_order', coef_3rd_order)
call mpas_pool_get_config(configs, 'config_mix_full', config_mix_full)
+ call mpas_pool_get_config(configs, 'config_mix_scalars', config_mix_scalars)
call mpas_pool_get_config(configs, 'config_horiz_mixing', config_horiz_mixing)
+ call mpas_pool_get_config(configs, 'config_les_model', config_les_model)
+ call mpas_pool_get_config(configs, 'config_les_surface', config_les_surface)
+ call mpas_pool_get_config(configs, 'config_surface_heat_flux', config_surface_heat_flux)
+ call mpas_pool_get_config(configs, 'config_surface_moisture_flux', config_surface_moisture_flux)
+ call mpas_pool_get_config(configs, 'config_surface_drag_coefficient', config_surface_drag_coefficient)
call mpas_pool_get_config(configs, 'config_del4u_div_factor', config_del4u_div_factor)
call mpas_pool_get_config(configs, 'config_h_theta_eddy_visc4', config_h_theta_eddy_visc4)
call mpas_pool_get_config(configs, 'config_h_mom_eddy_visc4', config_h_mom_eddy_visc4)
@@ -4864,7 +5861,9 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
call mpas_pool_get_array(diag, 'rho_p', rr)
call mpas_pool_get_array(diag, 'rho_p_save', rr_save)
call mpas_pool_get_array(diag, 'v', v)
- call mpas_pool_get_array(diag, 'kdiff', kdiff)
+ call mpas_pool_get_array(diag, 'eddy_visc_horz', eddy_visc_horz)
+ call mpas_pool_get_array(diag, 'eddy_visc_vert', eddy_visc_vert)
+ call mpas_pool_get_array(diag, 'bn2', bn2)
call mpas_pool_get_array(diag, 'ru', ru)
call mpas_pool_get_array(diag, 'ru_save', ru_save)
call mpas_pool_get_array(diag, 'rw', rw)
@@ -4877,9 +5876,29 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
call mpas_pool_get_array(diag, 'pressure_base', pressure_b)
call mpas_pool_get_array(diag, 'h_divergence', h_divergence)
call mpas_pool_get_array(diag, 'exner', exner)
+ call mpas_pool_get_array(diag, 'prandtl_3d_inv', prandtl_3d_inv)
+
call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten)
+ nullify(ustm)
+ call mpas_pool_get_array(diag_physics,'ustm',ustm)
+ nullify(hfx)
+ call mpas_pool_get_array(diag_physics,'hfx',hfx)
+ nullify(qfx)
+ call mpas_pool_get_array(diag_physics,'qfx',qfx)
+
+ nopbl = .false.
+ if (.not. associated(ustm) &
+ .or. .not. associated(hfx) &
+ .or. .not. associated(qfx)) then
+
+ allocate(ustm(1))
+ allocate(hfx(1))
+ allocate(qfx(1))
+ nopbl = .true.
+ end if
+
call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)
call mpas_pool_get_array(mesh, 'cellsOnCell', cellsOnCell)
@@ -4905,6 +5924,11 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
call mpas_pool_get_array(mesh, 'angleEdge', angleEdge)
call mpas_pool_get_array(mesh, 'defc_a', defc_a)
call mpas_pool_get_array(mesh, 'defc_b', defc_b)
+ call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2)
+ call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2)
+ call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs)
+ call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c)
+ call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s)
call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2)
call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4)
call mpas_pool_get_array(mesh, 'u_init', u_init)
@@ -4928,6 +5952,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
call mpas_pool_get_array(tend, 'w_euler', tend_w_euler)
call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf)
call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy)
+ call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars)
+
call mpas_pool_get_array(diag, 'cqw', cqw)
call mpas_pool_get_array(diag, 'cqu', cqu)
@@ -4944,6 +5970,9 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
call mpas_pool_get_dimension(state, 'num_scalars', num_scalars)
call mpas_pool_get_dimension(state, 'moist_start', moist_start)
call mpas_pool_get_dimension(state, 'moist_end', moist_end)
+ call mpas_pool_get_dimension(state, 'index_qv', index_qv)
+ call mpas_pool_get_dimension(state, 'index_qc', index_qc)
+ call mpas_pool_get_dimension(state, 'index_tke', index_tke)
call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell)
call mpas_pool_get_array(mesh, 'nAdvCellsForEdge', nAdvCellsForEdge)
@@ -4955,53 +5984,73 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs,
call mpas_pool_get_array(mesh, 'cf2', cf2)
call mpas_pool_get_array(mesh, 'cf3', cf3)
+ les_model_opt = les_model_from_string(config_les_model)
+ les_surface_opt = les_surface_from_string(config_les_surface)
+
call atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels, &
- nCellsSolve, nEdgesSolve, vertexDegree, maxEdges, maxEdges2, num_scalars, moist_start, moist_end, &
+ nCellsSolve, nEdgesSolve, vertexDegree, maxEdges, maxEdges2, num_scalars, index_qv, index_qc, moist_start, moist_end, &
+ tend_scalars, &
fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, meshScalingDel2, meshScalingDel4, &
weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, &
- h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
+ h_divergence, bn2, eddy_visc_horz, eddy_visc_vert, index_tke, &
+ edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, &
latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
- tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_del4u_div_factor, &
+ deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
+ tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_mix_scalars, config_horiz_mixing, les_model_opt, &
+ les_surface_opt, prandtl_3d_inv, config_del4u_div_factor, &
config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
- config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, &
+ config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dynamics_substep, dt, &
config_mpas_cam_coef, &
config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, &
config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, &
- rthdynten, &
+ config_surface_heat_flux, config_surface_moisture_flux, config_surface_drag_coefficient, &
+ rthdynten, ustm, hfx, qfx, &
cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
+ if (nopbl) then
+ deallocate(ustm)
+ deallocate(hfx)
+ deallocate(qfx)
+ end if
+
end subroutine atm_compute_dyn_tend
subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dummy, &
- nCellsSolve, nEdgesSolve, vertexDegree, maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, moist_start, moist_end, &
+ nCellsSolve, nEdgesSolve, vertexDegree, maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, index_qc, moist_start, moist_end, &
+ tend_scalars, &
fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, meshScalingDel2, meshScalingDel4, &
weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, &
divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, &
rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, &
- h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
+ h_divergence, bn2, eddy_visc_horz, eddy_visc_vert, index_tke, &
+ edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, &
theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, &
cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, &
latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, &
rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, &
- tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_del4u_div_factor, &
+ deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, &
+ tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_mix_scalars, config_horiz_mixing, les_model_opt, &
+ les_surface_opt, prandtl_3d_inv, config_del4u_div_factor, &
config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, &
- config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, &
+ config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dynamics_substep, dt, &
config_mpas_cam_coef, &
config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, &
config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, &
- rthdynten, &
+ config_surface_heat_flux, config_surface_moisture_flux, config_surface_drag_coefficient, &
+ rthdynten, ustm, hfx, qfx, &
cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, &
cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd)
use mpas_atm_dimensions
+ use mpas_atm_dissipation_models, only : LES_MODEL_NONE
implicit none
@@ -5011,7 +6060,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
! Dummy arguments
!
integer :: nCells, nEdges, nVertices, nVertLevels_dummy, nCellsSolve, nEdgesSolve, vertexDegree, &
- maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, moist_start, moist_end
+ maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, index_qc, moist_start, moist_end, index_tke
real (kind=RKIND), dimension(nEdges+1) :: fEdge
real (kind=RKIND), dimension(nEdges+1) :: dvEdge
@@ -5050,7 +6099,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: zxu
real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: cqu
real (kind=RKIND), dimension(nVertLevels,nCells+1) :: h_divergence
- real (kind=RKIND), dimension(nVertLevels,nCells+1) :: kdiff
+ real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_horz
+ real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_vert
+ real (kind=RKIND), dimension(nVertLevels,nCells+1) :: bn2
+ real (kind=RKIND), dimension(:) :: ustm, hfx, qfx
+ real (kind=RKIND), dimension(nVertLevels,nCells+1) :: prandtl_3d_inv
real (kind=RKIND), dimension(maxEdges,nCells+1) :: edgesOnCell_sign
real (kind=RKIND), dimension(vertexDegree,nVertices+1) :: edgesOnVertex_sign
real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: rw_save
@@ -5060,6 +6113,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
real (kind=RKIND), dimension(nVertLevels,nCells+1) :: exner
real (kind=RKIND), dimension(nVertLevels,nCells+1) :: rr_save
real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1) :: scalars
+ real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1) :: tend_scalars
real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: tend_u_euler
real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: tend_w_euler
real (kind=RKIND), dimension(nVertLevels,nCells+1) :: tend_theta_euler
@@ -5098,13 +6152,20 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
real (kind=RKIND), dimension(maxEdges,nCells+1) :: defc_a
real (kind=RKIND), dimension(maxEdges,nCells+1) :: defc_b
+ real (kind=RKIND), dimension(maxEdges,nCells+1) :: deformation_coef_c2, deformation_coef_s2, deformation_coef_cs
+ real (kind=RKIND), dimension(maxEdges,nCells+1) :: deformation_coef_c, deformation_coef_s
real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: tend_w_pgf
real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: tend_w_buoy
real (kind=RKIND) :: coef_3rd_order, c_s
- logical :: config_mix_full
+ logical :: config_mix_full, config_mix_scalars
character (len=StrKIND) :: config_horiz_mixing
+ integer, intent(in) :: les_model_opt
+ integer, intent(in) :: les_surface_opt
+ real (kind=RKIND) :: config_surface_heat_flux
+ real (kind=RKIND) :: config_surface_moisture_flux
+ real (kind=RKIND) :: config_surface_drag_coefficient
real (kind=RKIND) :: config_del4u_div_factor
real (kind=RKIND) :: config_h_theta_eddy_visc4
real (kind=RKIND) :: config_h_mom_eddy_visc4
@@ -5112,7 +6173,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
real (kind=RKIND) :: config_len_disp
real (kind=RKIND) :: config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2
- integer, intent(in) :: rk_step
+ integer, intent(in) :: rk_step, dynamics_substep
real (kind=RKIND), intent(in) :: dt
real (kind=RKIND) :: config_mpas_cam_coef
@@ -5136,6 +6197,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
real (kind=RKIND), dimension( nVertLevels+1 ) :: d_diag, d_off_diag, flux_arr
real (kind=RKIND), dimension( nVertLevels + 1 ) :: wduz, wdwz, wdtz, dpzx
real (kind=RKIND), dimension( nVertLevels ) :: ru_edge_w, q, u_mix
+
+! real (kind=RKIND), dimension( nVertLevels+1 ) :: d_11, d_22, d_12
+! real (kind=RKIND), dimension( nVertLevels+1 ) :: dudx, dudy, dvdx, dvdy
+
real (kind=RKIND) :: theta_turb_flux, w_turb_flux, r
real (kind=RKIND) :: scalar_weight
real (kind=RKIND) :: inv_r_earth
@@ -5153,6 +6218,9 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
real (kind=RKIND) :: flux3, flux4
real (kind=RKIND) :: q_im2, q_im1, q_i, q_ip1, ua, coef3
+ logical, parameter :: perturbation_coriolis = .true.
+ real (kind=RKIND) :: reference_u
+
flux4(q_im2, q_im1, q_i, q_ip1, ua) = &
ua*( 7.*(q_i + q_im1) - (q_ip1 + q_im2) )/12.0
@@ -5162,43 +6230,14 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]')
- if (rk_step == 1) then
- !$acc enter data create(tend_w_euler)
- !$acc enter data create(tend_u_euler)
- !$acc enter data create(tend_theta_euler)
- !$acc enter data create(tend_rho)
-
- !$acc enter data create(kdiff)
- !$acc enter data copyin(tend_rho_physics)
- !$acc enter data copyin(rb, rr_save)
- !$acc enter data copyin(divergence, vorticity)
- !$acc enter data copyin(v)
- !$acc enter data copyin(u_init, v_init)
- else
- !$acc enter data copyin(tend_w_euler)
- !$acc enter data copyin(tend_u_euler)
- !$acc enter data copyin(tend_theta_euler)
- !$acc enter data copyin(tend_rho)
+ if (les_model_opt /= LES_MODEL_NONE) then
+ !$acc enter data copyin(bn2)
end if
- !$acc enter data create(tend_u)
- !$acc enter data copyin(cqu, pp, u, w, pv_edge, rho_edge, ke)
- !$acc enter data create(h_divergence)
- !$acc enter data copyin(ru, rw)
+ !$acc enter data copyin(ustm, hfx, qfx)
!$acc enter data create(rayleigh_damp_coef)
- !$acc enter data copyin(tend_ru_physics)
- !$acc enter data create(tend_w)
- !$acc enter data copyin(rho_zz)
- !$acc enter data create(tend_theta)
- !$acc enter data copyin(theta_m)
- !$acc enter data copyin(ru_save, theta_m_save)
- !$acc enter data copyin(cqw)
- !$acc enter data copyin(tend_rtheta_physics)
- !$acc enter data copyin(rw_save, rt_diabatic_tend)
- !$acc enter data create(rthdynten)
- !$acc enter data copyin(t_init)
-#ifdef CURVATURE
- !$acc enter data copyin(ur_cell, vr_cell)
-#endif
+ !$acc enter data create(eddy_visc_horz)
+ !$acc enter data create(eddy_visc_vert)
+ !$acc enter data create(prandtl_3d_inv)
MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]')
prandtl_inv = 1.0_RKIND / prandtl
@@ -5221,57 +6260,51 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
!$acc end parallel
! Smagorinsky eddy viscosity, based on horizontal deformation (in this case on model coordinate surfaces).
- ! The integration coefficients were precomputed and stored in defc_a and defc_b
+ ! The integration coefficients were precomputed and stored in deformation_coef_*
- if(config_horiz_mixing == "2d_smagorinsky") then
+ if(les_model_opt == LES_MODEL_NONE) then
- !$acc parallel default(present)
- !$acc loop gang worker private(d_diag, d_off_diag)
- do iCell = cellStart,cellEnd
+ if(config_horiz_mixing == "2d_smagorinsky") then
- !$acc loop vector
- do k = 1, nVertLevels
- d_diag(k) = 0.0_RKIND
- d_off_diag(k) = 0.0_RKIND
- end do
+ call smagorinsky_2d( eddy_visc_horz, u, v, c_s, config_len_disp, &
+ deformation_coef_c2, deformation_coef_s2, deformation_coef_cs, &
+ invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4, &
+ cellStart, cellEnd, nEdgesOnCell, edgesOnCell, &
+ nCells, nEdges )
- !$acc loop seq
- do iEdge=1,nEdgesOnCell(iCell)
+ else if(config_horiz_mixing == "2d_fixed") then
+
+ !$acc parallel default(present)
+ !$acc loop gang worker
+ do iCell = cellStart, cellEnd
!$acc loop vector
- do k=1,nVertLevels
- d_diag(k) = d_diag(k) + defc_a(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
- - defc_b(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
- d_off_diag(k) = d_off_diag(k) + defc_b(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) &
- + defc_a(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell))
+ do k = 1, nVertLevels
+ eddy_visc_horz(k,iCell) = config_h_theta_eddy_visc2
end do
end do
-!DIR$ IVDEP
- !$acc loop vector
- do k=1, nVertLevels
- ! here is the Smagorinsky formulation,
- ! followed by imposition of an upper bound on the eddy viscosity
- kdiff(k,iCell) = min((c_s * config_len_disp)**2 * sqrt(d_diag(k)**2 + d_off_diag(k)**2),(0.01*config_len_disp**2) * invDt)
- end do
- end do
- !$acc end parallel
+ !$acc end parallel
- h_mom_eddy_visc4 = config_visc4_2dsmag * config_len_disp**3
- h_theta_eddy_visc4 = h_mom_eddy_visc4
+ h_mom_eddy_visc4 = config_h_mom_eddy_visc4
+ h_theta_eddy_visc4 = config_h_theta_eddy_visc4
- else if(config_horiz_mixing == "2d_fixed") then
+ end if
- !$acc parallel default(present)
- !$acc loop gang worker
- do iCell = cellStart, cellEnd
- !$acc loop vector
- do k = 1, nVertLevels
- kdiff(k,iCell) = config_h_theta_eddy_visc2
- end do
- end do
- !$acc end parallel
+ else if (les_model_opt /= LES_MODEL_NONE) then
- h_mom_eddy_visc4 = config_h_mom_eddy_visc4
- h_theta_eddy_visc4 = config_h_theta_eddy_visc4
+ ! call mpas_log_write(' BV call, index qv, qc, tke $i $i $i ', intArgs=(/index_qv, index_qc, index_tke/))
+
+ call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, &
+ cellStart, cellEnd, nCells)
+
+ call les_models( les_model_opt, les_surface_opt, dynamics_substep, eddy_visc_horz, eddy_visc_vert, &
+ u, v, ur_cell, vr_cell, &
+ w, c_s, bn2, zgrid, config_len_disp, &
+ deformation_coef_c2, deformation_coef_s2, deformation_coef_cs, &
+ deformation_coef_c, deformation_coef_s, prandtl_3d_inv, &
+ invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4, &
+ scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2, &
+ cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge, &
+ nCells, nEdges, nVertLevels, maxEdges, num_scalars )
end if
@@ -5288,7 +6321,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
do k = nVertLevels-config_number_cam_damping_levels + 1, nVertLevels
visc2cam = 4.0*2.0833*config_len_disp*config_mpas_cam_coef
visc2cam = visc2cam*(1.0-real(nVertLevels-k)/real(config_number_cam_damping_levels))
- kdiff(k ,iCell) = max(kdiff(k ,iCell),visc2cam)
+ eddy_visc_horz(k,iCell) = max(eddy_visc_horz(k,iCell),visc2cam)
end do
end do
!$acc end parallel
@@ -5298,8 +6331,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
end if
! tendency for density.
- ! accumulate total water here for later use in w tendency calculation.
-
! accumulate horizontal mass-flux
!$acc parallel default(present)
@@ -5381,281 +6412,112 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
!$acc loop vector
do k=1,nVertLevels
tend_u_euler(k,iEdge) = - cqu(k,iEdge)*( (pp(k,cell2)-pp(k,cell1))*invDcEdge(iEdge)/(.5*(zz(k,cell2)+zz(k,cell1))) &
- -0.5*zxu(k,iEdge)*(dpdz(k,cell1)+dpdz(k,cell2)) )
- end do
-
- end if
-
- ! vertical transport of u
-
- wduz(1) = 0.
-
- k = 2
- wduz(k) = 0.5*( rw(k,cell1)+rw(k,cell2))*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))
- !$acc loop vector
- do k=3,nVertLevels-1
- wduz(k) = flux3( u(k-2,iEdge),u(k-1,iEdge),u(k,iEdge),u(k+1,iEdge),0.5*(rw(k,cell1)+rw(k,cell2)), 1.0_RKIND )
- end do
- k = nVertLevels
- wduz(k) = 0.5*( rw(k,cell1)+rw(k,cell2))*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))
-
- wduz(nVertLevels+1) = 0.
-
-!DIR$ IVDEP
- !$acc loop vector
- do k=1,nVertLevels
- tend_u(k,iEdge) = - rdzw(k)*(wduz(k+1)-wduz(k)) ! first use of tend_u
- end do
-
- ! Next, nonlinear Coriolis term (q) following Ringler et al JCP 2009
-
- !$acc loop vector
- do k=1,nVertLevels
- q(k) = 0.0_RKIND
- end do
-
- !$acc loop seq
- do j = 1,nEdgesOnEdge(iEdge)
- eoe = edgesOnEdge(j,iEdge)
-
- !$acc loop vector
- do k=1,nVertLevels
- workpv = 0.5 * (pv_edge(k,iEdge) + pv_edge(k,eoe))
-! the original definition of pv_edge had a factor of 1/density. We have removed that factor
-! given that it was not integral to any conservation property of the system
- q(k) = q(k) + weightsOnEdge(j,iEdge) * u(k,eoe) * workpv
- end do
- end do
-
-!DIR$ IVDEP
- !$acc loop vector
- do k=1,nVertLevels
-
- ! horizontal ke gradient and vorticity terms in the vector invariant formulation
- ! of the horizontal momentum equation
- tend_u(k,iEdge) = tend_u(k,iEdge) + rho_edge(k,iEdge)* (q(k) - (ke(k,cell2) - ke(k,cell1)) &
- * invDcEdge(iEdge)) &
- - u(k,iEdge)*0.5*(h_divergence(k,cell1)+h_divergence(k,cell2))
-#ifdef CURVATURE
- ! curvature terms for the sphere
- tend_u(k,iEdge) = tend_u(k,iEdge) &
- - 2.*omega*cos(angleEdge(iEdge))*cos(latEdge(iEdge)) &
- *rho_edge(k,iEdge)*.25*(w(k,cell1)+w(k+1,cell1)+w(k,cell2)+w(k+1,cell2)) &
- - u(k,iEdge)*.25*(w(k+1,cell1)+w(k,cell1)+w(k,cell2)+w(k+1,cell2)) &
- *rho_edge(k,iEdge) * inv_r_earth
-#endif
- end do
-
- end do
- !$acc end parallel
-
- !
- ! horizontal mixing for u
- ! mixing terms are integrated using forward-Euler, so this tendency is only computed in the
- ! first Runge-Kutta substep and saved for use in later RK substeps 2 and 3.
- !
-
- if (rk_step == 1) then
-
-!$OMP BARRIER
-
- ! del^4 horizontal filter. We compute this as del^2 ( del^2 (u) ).
- ! First, storage to hold the result from the first del^2 computation.
-
- !$acc parallel default(present)
- !$acc loop gang worker
- do iEdge = edgeStart, edgeEnd
- !$acc loop vector
- do k = 1, nVertLevels
- delsq_u(k,iEdge) = 0.0_RKIND
- end do
- end do
- !$acc end parallel
-
- !$acc parallel default(present)
- !$acc loop gang worker
- do iEdge=edgeStart,edgeEnd
- cell1 = cellsOnEdge(1,iEdge)
- cell2 = cellsOnEdge(2,iEdge)
- vertex1 = verticesOnEdge(1,iEdge)
- vertex2 = verticesOnEdge(2,iEdge)
- r_dc = invDcEdge(iEdge)
- r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
-
-!DIR$ IVDEP
- !$acc loop vector
- do k=1,nVertLevels
-
- ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
- ! only valid for h_mom_eddy_visc4 == constant
- u_diffusion = ( divergence(k,cell2) - divergence(k,cell1) ) * r_dc &
- -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv
-
- delsq_u(k,iEdge) = delsq_u(k,iEdge) + u_diffusion
-
- kdiffu = 0.5*(kdiff(k,cell1)+kdiff(k,cell2))
-
- ! include 2nd-orer diffusion here
- tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) &
- + rho_edge(k,iEdge)* kdiffu * u_diffusion * meshScalingDel2(iEdge)
-
- end do
- end do
- !$acc end parallel
-
- if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active
-
-!$OMP BARRIER
-
- !$acc parallel default(present)
- !$acc loop gang worker
- do iVertex=vertexStart,vertexEnd
-
- !$acc loop vector
- do k=1,nVertLevels
- delsq_vorticity(k,iVertex) = 0.0_RKIND
- end do
-
- !$acc loop seq
- do i=1,vertexDegree
- iEdge = edgesOnVertex(i,iVertex)
- edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge) * edgesOnVertex_sign(i,iVertex)
-
- !$acc loop vector
- do k=1,nVertLevels
- delsq_vorticity(k,iVertex) = delsq_vorticity(k,iVertex) + edge_sign * delsq_u(k,iEdge)
- end do
- end do
- end do
-
- !$acc loop gang worker
- do iCell=cellStart,cellEnd
-
- !$acc loop vector
- do k=1,nVertLevels
- delsq_divergence(k,iCell) = 0.0_RKIND
- end do
-
- r = invAreaCell(iCell)
-
- !$acc loop seq
- do i=1,nEdgesOnCell(iCell)
- iEdge = edgesOnCell(i,iCell)
- edge_sign = r * dvEdge(iEdge) * edgesOnCell_sign(i,iCell)
-
- !$acc loop vector
- do k=1,nVertLevels
- delsq_divergence(k,iCell) = delsq_divergence(k,iCell) + edge_sign * delsq_u(k,iEdge)
- end do
- end do
- end do
- !$acc end parallel
-
-!$OMP BARRIER
-
- !$acc parallel default(present)
- !$acc loop gang worker
- do iEdge=edgeSolveStart,edgeSolveEnd
- cell1 = cellsOnEdge(1,iEdge)
- cell2 = cellsOnEdge(2,iEdge)
- vertex1 = verticesOnEdge(1,iEdge)
- vertex2 = verticesOnEdge(2,iEdge)
-
- u_mix_scale = meshScalingDel4(iEdge)*h_mom_eddy_visc4
- r_dc = u_mix_scale * config_del4u_div_factor * invDcEdge(iEdge)
- r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge))
+ -0.5*zxu(k,iEdge)*(dpdz(k,cell1)+dpdz(k,cell2)) )
+ end do
-!DIR$ IVDEP
- !$acc loop vector
- do k=1,nVertLevels
+ end if
- ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity
- ! only valid for h_mom_eddy_visc4 == constant
- !
- ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor
- ! relative to the rotational part. The stability constraint on the divergence component is much less
- ! stringent than the rotational part, and this flexibility may be useful.
- !
- u_diffusion = rho_edge(k,iEdge) * ( ( delsq_divergence(k,cell2) - delsq_divergence(k,cell1) ) * r_dc &
- -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) * r_dv )
- tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - u_diffusion
-
- end do
- end do
- !$acc end parallel
-
- end if ! 4th order mixing is active
+ ! vertical transport of u
- !
- ! vertical mixing for u - 2nd order filter in physical (z) space
- !
- if ( v_mom_eddy_visc2 > 0.0 ) then
+ wduz(1) = 0.
- if (config_mix_full) then ! mix full state
+ k = 2
+ wduz(k) = 0.5*( rw(k,cell1)+rw(k,cell2))*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))
+ !$acc loop vector
+ do k=3,nVertLevels-1
+ wduz(k) = flux3( u(k-2,iEdge),u(k-1,iEdge),u(k,iEdge),u(k+1,iEdge),0.5*(rw(k,cell1)+rw(k,cell2)), 1.0_RKIND )
+ end do
+ k = nVertLevels
+ wduz(k) = 0.5*( rw(k,cell1)+rw(k,cell2))*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge))
- !$acc parallel default(present)
- !$acc loop gang worker
- do iEdge=edgeSolveStart,edgeSolveEnd
+ wduz(nVertLevels+1) = 0.
- cell1 = cellsOnEdge(1,iEdge)
- cell2 = cellsOnEdge(2,iEdge)
+!DIR$ IVDEP
+ !$acc loop vector
+ do k=1,nVertLevels
+ tend_u(k,iEdge) = - rdzw(k)*(wduz(k+1)-wduz(k)) ! first use of tend_u
+ end do
- !$acc loop vector
- do k=2,nVertLevels-1
+ ! Next, nonlinear Coriolis term (q) following Ringler et al JCP 2009
- z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
- z2 = 0.5*(zgrid(k ,cell1)+zgrid(k ,cell2))
- z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
- z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
+ !$acc loop vector
+ do k=1,nVertLevels
+ q(k) = 0.0_RKIND
+ end do
- zm = 0.5*(z1+z2)
- z0 = 0.5*(z2+z3)
- zp = 0.5*(z3+z4)
+ !$acc loop seq
+ do j = 1,nEdgesOnEdge(iEdge)
+ eoe = edgesOnEdge(j,iEdge)
- tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*( &
- (u(k+1,iEdge)-u(k ,iEdge))/(zp-z0) &
- -(u(k ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm))
- end do
- end do
- !$acc end parallel
+ !$acc loop vector
+ do k=1,nVertLevels
+ workpv = 0.5 * (pv_edge(k,iEdge) + pv_edge(k,eoe))
+! the original definition of pv_edge had a factor of 1/density. We have removed that factor
+! given that it was not integral to any conservation property of the system
+ q(k) = q(k) + weightsOnEdge(j,iEdge) * u(k,eoe) * workpv
+ end do
+ end do
- else ! idealized cases where we mix on the perturbation from the initial 1-D state
+ if (perturbation_coriolis) then ! this is correct only for constant f
+ !$acc loop seq
+ do j = 1,nEdgesOnEdge(iEdge)
+ eoe = edgesOnEdge(j,iEdge)
- !$acc parallel default(present)
- !$acc loop gang worker private(u_mix)
- do iEdge=edgeSolveStart,edgeSolveEnd
+ !$acc loop vector
+ do k=1,nVertLevels
+ reference_u = u_init(k) * cos(angleEdge(eoe)) - v_init(k) * sin(angleEdge(eoe))
+ q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * fEdge(iEdge)
+! q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * 0.729210E-04
+ end do
+ end do
+ end if
- cell1 = cellsOnEdge(1,iEdge)
- cell2 = cellsOnEdge(2,iEdge)
+!DIR$ IVDEP
+ !$acc loop vector
+ do k=1,nVertLevels
- !$acc loop vector
- do k=1,nVertLevels
- u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) &
- - v_init(k) * sin( angleEdge(iEdge) )
- end do
+ ! horizontal ke gradient and vorticity terms in the vector invariant formulation
+ ! of the horizontal momentum equation
+ tend_u(k,iEdge) = tend_u(k,iEdge) + rho_edge(k,iEdge)* (q(k) - (ke(k,cell2) - ke(k,cell1)) &
+ * invDcEdge(iEdge)) &
+ - u(k,iEdge)*0.5*(h_divergence(k,cell1)+h_divergence(k,cell2))
+#ifdef CURVATURE
+ ! curvature terms for the sphere
+ tend_u(k,iEdge) = tend_u(k,iEdge) &
+ - 2.*omega*cos(angleEdge(iEdge))*cos(latEdge(iEdge)) &
+ *rho_edge(k,iEdge)*.25*(w(k,cell1)+w(k+1,cell1)+w(k,cell2)+w(k+1,cell2)) &
+ - u(k,iEdge)*.25*(w(k+1,cell1)+w(k,cell1)+w(k,cell2)+w(k+1,cell2)) &
+ *rho_edge(k,iEdge) * inv_r_earth
+#endif
+ end do
- !$acc loop vector
- do k=2,nVertLevels-1
+ end do
+ !$acc end parallel
- z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2))
- z2 = 0.5*(zgrid(k ,cell1)+zgrid(k ,cell2))
- z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2))
- z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2))
+ !
+ ! horizontal mixing for u
+ ! mixing terms are integrated using forward-Euler, so this tendency is only computed in the
+ ! first Runge-Kutta substep and saved for use in later RK substeps 2 and 3.
+ !
- zm = 0.5*(z1+z2)
- z0 = 0.5*(z2+z3)
- zp = 0.5*(z3+z4)
+ if (rk_step == 1) then
- tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*( &
- (u_mix(k+1)-u_mix(k ))/(zp-z0) &
- -(u_mix(k )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm))
- end do
- end do
- !$acc end parallel
+!$OMP BARRIER
- end if ! mix perturbation state
+ call u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd, &
+ cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree, &
+ cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex, &
+ nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign, &
+ invAreaCell, invAreaTriangle, invDvEdge, invDcEdge, &
+ angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4, &
+ config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2, &
+ config_del4u_div_factor, zgrid, &
+ eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw, &
+ fzm, fzp, les_model_opt, les_surface_opt, &
+ config_surface_drag_coefficient, &
+ delsq_u, delsq_vorticity, delsq_divergence, &
+ u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, tend_u_euler )
- end if ! vertical mixing of horizontal momentum
end if ! (rk_step 1 test for computing mixing terms)
@@ -5783,82 +6645,20 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
if (rk_step == 1) then
-! !OMP BARRIER why is this openmp barrier here???
-
- ! del^4 horizontal filter. We compute this as del^2 ( del^2 (u) ).
- !
- ! First, storage to hold the result from the first del^2 computation.
- ! we copied code from the theta mixing, hence the theta* names.
-
- !$acc parallel default(present)
- !$acc loop gang worker
- do iCell=cellStart,cellEnd
-
- !$acc loop vector
- do k=1,nVertLevels
- delsq_w(k,iCell) = 0.0_RKIND
- end do
-
- !$acc loop vector
- do k=1,nVertLevels+1
- tend_w_euler(k,iCell) = 0.0_RKIND
- end do
-
- r_areaCell = invAreaCell(iCell)
-
- !$acc loop seq
- do i=1,nEdgesOnCell(iCell)
- iEdge = edgesOnCell(i,iCell)
-
- edge_sign = 0.5 * r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
-
- cell1 = cellsOnEdge(1,iEdge)
- cell2 = cellsOnEdge(2,iEdge)
-
-!DIR$ IVDEP
- !$acc loop vector
- do k=2,nVertLevels
-
- w_turb_flux = edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1))
- delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux
- w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * &
- (kdiff(k,cell1)+kdiff(k,cell2)+kdiff(k-1,cell1)+kdiff(k-1,cell2))
- tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux
- end do
- end do
- end do
- !$acc end parallel
-
-!$OMP BARRIER
-
- if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active
-
- !$acc parallel default(present)
- !$acc loop gang worker
- do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before...
-
- r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell)
-
- !$acc loop seq
- do i=1,nEdgesOnCell(iCell)
- iEdge = edgesOnCell(i,iCell)
- cell1 = cellsOnEdge(1,iEdge)
- cell2 = cellsOnEdge(2,iEdge)
-
- edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge)
-
- !$acc loop vector
- do k=2,nVertLevels
- tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1))
- end do
-
- end do
- end do
- !$acc end parallel
-
- end if ! 4th order mixing is active
-
- end if ! horizontal mixing for w computed in first rk_step
+ call w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd, &
+ nCells, nEdges, &
+ nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+ invAreaCell, invDcEdge, dvEdge, &
+ meshScalingDel2, meshScalingDel4, &
+ rdzw, rdzu, &
+ v_mom_eddy_visc2, h_mom_eddy_visc4, &
+ delsq_w, &
+ w, rho_edge, rho_zz, divergence, zz, &
+ eddy_visc_horz, eddy_visc_vert, &
+ les_model_opt, les_surface_opt, &
+ tend_w_euler )
+
+ end if ! mixing for w computed in first rk_step
! Note for OpenMP parallelization: We could avoid allocating the delsq_w scratch
! array, and just use the delsq_theta array as was previously done; however,
@@ -5911,27 +6711,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
end do
!$acc end parallel
- if (rk_step == 1) then
-
- if ( v_mom_eddy_visc2 > 0.0 ) then
-
- !$acc parallel default(present)
- !$acc loop gang worker
- do iCell=cellSolveStart,cellSolveEnd
-!DIR$ IVDEP
- !$acc loop vector
- do k=2,nVertLevels
- tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*( &
- (w(k+1,iCell)-w(k ,iCell))*rdzw(k) &
- -(w(k ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k)
- end do
- end do
- !$acc end parallel
-
- end if
-
- end if ! mixing term computed first rk_step
-
! add in mixing terms for w
!$acc parallel default(present)
@@ -6022,69 +6801,25 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
if (rk_step == 1) then
- !$acc parallel default(present)
- !$acc loop gang worker
- do iCell=cellStart,cellEnd
-
- !$acc loop vector
- do k=1,nVertLevels
- delsq_theta(k,iCell) = 0.0_RKIND
- tend_theta_euler(k,iCell) = 0.0_RKIND
- end do
-
- r_areaCell = invAreaCell(iCell)
-
- !$acc loop seq
- do i=1,nEdgesOnCell(iCell)
- iEdge = edgesOnCell(i,iCell)
- edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge)
- pr_scale = prandtl_inv * meshScalingDel2(iEdge)
- cell1 = cellsOnEdge(1,iEdge)
- cell2 = cellsOnEdge(2,iEdge)
-!DIR$ IVDEP
- !$acc loop vector
- do k=1,nVertLevels
-
-! we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below
-
- theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge)
- delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux
- theta_turb_flux = theta_turb_flux*0.5*(kdiff(k,cell1)+kdiff(k,cell2)) * pr_scale
- tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux
-
- end do
- end do
- end do
- !$acc end parallel
-
-!$OMP BARRIER
-
- if (h_theta_eddy_visc4 > 0.0) then ! 4th order mixing is active
-
- !$acc parallel default(present)
- !$acc loop gang worker
- do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before...
-
- r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell)
-
- !$acc loop seq
- do i=1,nEdgesOnCell(iCell)
-
- iEdge = edgesOnCell(i,iCell)
- edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge)
-
- cell1 = cellsOnEdge(1,iEdge)
- cell2 = cellsOnEdge(2,iEdge)
-
- !$acc loop vector
- do k=1,nVertLevels
- tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1))
- end do
- end do
- end do
- !$acc end parallel
-
- end if ! 4th order mixing is active
+ call scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSolveEnd, &
+ nCells, nEdges, &
+ nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, &
+ invAreaCell, invDcEdge, dvEdge, &
+ meshScalingDel2, meshScalingDel4, &
+ config_mix_full, t_init, zgrid, &
+ rdzw, rdzu, fzm, fzp, &
+ v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv, &
+ prandtl_3d_inv, &
+ delsq_theta, &
+ theta_m, rho_edge, rho_zz, zz, &
+ eddy_visc_horz, eddy_visc_vert, &
+ bn2, config_len_disp, scalars, tend_scalars, &
+ index_tke, index_qv, num_scalars, config_mix_scalars, &
+ les_model_opt, les_surface_opt, clock, dt, &
+ config_surface_heat_flux, config_surface_moisture_flux, &
+ ur_cell, vr_cell, &
+ hfx, qfx, &
+ tend_theta_euler, dynamics_substep )
end if ! theta mixing calculated first rk_step
@@ -6125,116 +6860,26 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm
end do
!$acc end parallel
- !
- ! vertical mixing for theta - 2nd order
- !
-
- if (rk_step == 1) then
-
- if ( v_theta_eddy_visc2 > 0.0 ) then ! vertical mixing for theta_m
-
- if (config_mix_full) then
-
- !$acc parallel default(present)
- !$acc loop gang worker
- do iCell = cellSolveStart,cellSolveEnd
-
- !$acc loop vector
- do k=2,nVertLevels-1
- z1 = zgrid(k-1,iCell)
- z2 = zgrid(k ,iCell)
- z3 = zgrid(k+1,iCell)
- z4 = zgrid(k+2,iCell)
-
- zm = 0.5*(z1+z2)
- z0 = 0.5*(z2+z3)
- zp = 0.5*(z3+z4)
-
- tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
- (theta_m(k+1,iCell)-theta_m(k ,iCell))/(zp-z0) &
- -(theta_m(k ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm))
- end do
- end do
- !$acc end parallel
-
- else ! idealized cases where we mix on the perturbation from the initial 1-D state
-
- !$acc parallel default(present)
- !$acc loop gang worker
- do iCell = cellSolveStart,cellSolveEnd
- do k=2,nVertLevels-1
- z1 = zgrid(k-1,iCell)
- z2 = zgrid(k ,iCell)
- z3 = zgrid(k+1,iCell)
- z4 = zgrid(k+2,iCell)
-
- zm = 0.5*(z1+z2)
- z0 = 0.5*(z2+z3)
- zp = 0.5*(z3+z4)
-
- tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(&
- ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k ,iCell)-t_init(k,iCell)))/(zp-z0) &
- -((theta_m(k ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm))
- end do
- end do
- !$acc end parallel
-
- end if
-
- end if
-
- end if ! compute vertical theta mixing on first rk_step
-
!$acc parallel default(present)
!$acc loop gang worker
do iCell = cellSolveStart,cellSolveEnd
!DIR$ IVDEP
!$acc loop vector
do k=1,nVertLevels
-! tend_theta(k,iCell) = tend_theta(k,iCell) + tend_theta_euler(k,iCell)
tend_theta(k,iCell) = tend_theta(k,iCell) + tend_theta_euler(k,iCell) + tend_rtheta_physics(k,iCell)
end do
end do
!$acc end parallel
MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]')
- if (rk_step == 1) then
- !$acc exit data copyout(tend_w_euler)
- !$acc exit data copyout(tend_u_euler)
- !$acc exit data copyout(tend_theta_euler)
- !$acc exit data copyout(tend_rho)
-
- !$acc exit data delete(kdiff)
- !$acc exit data delete(tend_rho_physics)
- !$acc exit data delete(rb, rr_save)
- !$acc exit data delete(divergence, vorticity)
- !$acc exit data delete(v)
- !$acc exit data delete(u_init, v_init)
- else
- !$acc exit data delete(tend_w_euler)
- !$acc exit data delete(tend_u_euler)
- !$acc exit data delete(tend_theta_euler)
- !$acc exit data delete(tend_rho)
+ if (les_model_opt /= LES_MODEL_NONE) then
+ !$acc exit data copyout(bn2)
end if
- !$acc exit data copyout(tend_u)
- !$acc exit data delete(cqu, pp, u, w, pv_edge, rho_edge, ke)
- !$acc exit data copyout(h_divergence)
- !$acc exit data delete(ru, rw)
+ !$acc exit data delete(ustm, hfx, qfx)
!$acc exit data delete(rayleigh_damp_coef)
- !$acc exit data delete(tend_ru_physics)
- !$acc exit data copyout(tend_w)
- !$acc exit data delete(rho_zz)
- !$acc exit data copyout(tend_theta)
- !$acc exit data delete(theta_m)
- !$acc exit data delete(ru_save, theta_m_save)
- !$acc exit data delete(cqw)
- !$acc exit data delete(tend_rtheta_physics)
- !$acc exit data delete(rw_save, rt_diabatic_tend)
- !$acc exit data copyout(rthdynten)
- !$acc exit data delete(t_init)
-#ifdef CURVATURE
- !$acc exit data delete(ur_cell, vr_cell)
-#endif
+ !$acc exit data delete(eddy_visc_horz)
+ !$acc exit data delete(eddy_visc_vert)
+ !$acc exit data delete(prandtl_3d_inv)
MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]')
end subroutine atm_compute_dyn_tend_work
@@ -6403,26 +7048,10 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
logical :: reconstruct_v
- MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
- !$acc enter data copyin(cellsOnEdge,dcEdge,dvEdge, &
- !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, &
- !$acc nEdgesOnCell,edgesOnCell, &
- !$acc edgesOnCell_sign,invAreaCell, &
- !$acc invAreaTriangle,edgesOnVertex, &
- !$acc verticesOnCell,kiteForCell,kiteAreasOnVertex, &
- !$acc nEdgesOnEdge,edgesOnEdge,weightsOnEdge, &
- !$acc fVertex, &
- !$acc verticesOnEdge, &
- !$acc invDvEdge,invDcEdge)
- !$acc enter data copyin(u,h)
- MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
!
! Compute height on cell edges at velocity locations
!
- MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
- !$acc enter data create(h_edge,vorticity,divergence)
- MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
!$acc parallel default(present)
!$acc loop gang
do iEdge=edgeStart,edgeEnd
@@ -6507,9 +7136,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
!
! Replace 2.0 with 2 in exponentiation to avoid outside chance that
! compiler will actually allow "float raised to float" operation
- MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
- !$acc enter data create(ke)
- MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
!$acc parallel default(present)
!$acc loop gang
do iCell=cellStart,cellEnd
@@ -6604,14 +7230,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
if(rk_step /= 3) reconstruct_v = .false.
end if
- MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
- if (reconstruct_v) then
- !$acc enter data create(v)
- else
- !$acc enter data copyin(v)
- end if
- MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
-
if (reconstruct_v) then
!$acc parallel default(present)
!$acc loop gang
@@ -6639,9 +7257,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
!
! Avoid dividing h_vertex by areaTriangle and move areaTriangle into
! numerator for the pv_vertex calculation
- MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
- !$acc enter data create(pv_vertex)
- MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
!$acc parallel default(present)
!$acc loop collapse(2)
do iVertex = vertexStart,vertexEnd
@@ -6665,9 +7280,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
! Compute pv at the edges
! ( this computes pv_edge at all edges bounding real cells )
!
- MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
- !$acc enter data create(pv_edge)
- MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
!$acc parallel default(present)
!$acc loop collapse(2)
do iEdge = edgeStart,edgeEnd
@@ -6685,9 +7297,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
! ( this computes pv_cell for all real cells )
! only needed for APVM upwinding
!
- MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
- !$acc enter data create(pv_cell)
- MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
!$acc parallel default(present)
!$acc loop gang
do iCell=cellStart,cellEnd
@@ -6726,9 +7335,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
! Merged loops for calculating gradPVt, gradPVn and pv_edge
! Also precomputed inverses of dvEdge and dcEdge to avoid repeated divisions
!
- MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
- !$acc enter data create(gradPVt,gradPVn)
- MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
r = config_apvm_upwinding * dt
!$acc parallel default(present)
!$acc loop gang
@@ -6745,31 +7351,10 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, &
end do
!$acc end parallel
- MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
- !$acc exit data delete(pv_cell,gradPVt,gradPVn)
- MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
end if ! apvm upwinding
- MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]')
- !$acc exit data delete(cellsOnEdge,dcEdge,dvEdge, &
- !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, &
- !$acc nEdgesOnCell,edgesOnCell, &
- !$acc edgesOnCell_sign,invAreaCell, &
- !$acc invAreaTriangle,edgesOnVertex, &
- !$acc verticesOnCell,kiteForCell,kiteAreasOnVertex, &
- !$acc nEdgesOnEdge,edgesOnEdge,weightsOnEdge, &
- !$acc verticesOnEdge, &
- !$acc fVertex,invDvEdge,invDcEdge)
- !$acc exit data delete(u,h)
- !$acc exit data copyout(h_edge,vorticity,divergence, &
- !$acc ke, &
- !$acc v, &
- !$acc pv_vertex, &
- !$acc pv_edge)
- MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]')
-
end subroutine atm_compute_solve_diagnostics_work
@@ -6858,17 +7443,13 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, &
call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell)
MPAS_ACC_TIMER_START('atm_init_coupled_diagnostics [ACC_data_xfer]')
- ! copyin invariant fields
- !$acc enter data copyin(cellsOnEdge,nEdgesOnCell,edgesOnCell, &
- !$acc edgesOnCell_sign,zz,fzm,fzp,zb,zb3, &
- !$acc zb_cell,zb3_cell)
! copyin the data that is only on the right-hand side
- !$acc enter data copyin(scalars(index_qv,:,:),u,w,rho,theta, &
+ !$acc enter data copyin(scalars(index_qv,:,:),w,rho,theta, &
!$acc rho_base,theta_base)
! copyin the data that will be modified in this routine
- !$acc enter data create(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, &
+ !$acc enter data create(theta_m,ru,rw,rho_p,rtheta_base, &
!$acc rtheta_p,exner,exner_base,pressure_p, &
!$acc pressure_base)
MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]')
@@ -6992,17 +7573,12 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, &
!$acc end parallel
MPAS_ACC_TIMER_START('atm_init_coupled_diagnostics [ACC_data_xfer]')
- ! delete invariant fields
- !$acc exit data delete(cellsOnEdge,nEdgesOnCell,edgesOnCell, &
- !$acc edgesOnCell_sign,zz,fzm,fzp,zb,zb3, &
- !$acc zb_cell,zb3_cell)
-
! delete the data that is only on the right-hand side
- !$acc exit data delete(scalars(index_qv,:,:),u,w,rho,theta, &
+ !$acc exit data delete(scalars(index_qv,:,:),w,rho,theta, &
!$acc rho_base,theta_base)
! copyout the data that will be modified in this routine
- !$acc exit data copyout(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, &
+ !$acc exit data copyout(theta_m,ru,rw,rho_p,rtheta_base, &
!$acc rtheta_p,exner,exner_base,pressure_p, &
!$acc pressure_base)
MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]')
@@ -7069,13 +7645,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su
call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1)
call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2)
- MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]')
- !$acc enter data create(ru_save, u_1, rtheta_p_save, theta_m_1, rho_p_save, rw_save, &
- !$acc w_1, rho_zz_1) &
- !$acc copyin(ru, u_2, rtheta_p, rho_p, theta_m_2, rho_zz_2, rw, &
- !$acc w_2, ruAvg, wwAvg, ruAvg_split, wwAvg_split, rho_zz_old_split)
- MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]')
-
! Interim fix for the atm_compute_dyn_tend_work subroutine accessing uninitialized values
! in garbage cells of theta_m
!$acc kernels
@@ -7180,13 +7749,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su
!$acc end parallel
end if
- MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]')
- !$acc exit data copyout(ru_save, u_1, rtheta_p_save, rho_p_save, rw_save, &
- !$acc w_1, theta_m_1, rho_zz_1, ruAvg, wwAvg, ruAvg_split, &
- !$acc wwAvg_split) &
- !$acc delete(ru, u_2, rtheta_p, rho_p, theta_m_2, rho_zz_2, rw, &
- !$acc w_2, rho_zz_old_split)
- MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]')
end subroutine atm_rk_dynamics_substep_finish
@@ -7241,9 +7803,6 @@ subroutine atm_zero_gradient_w_bdy_work( w, bdyMaskCell, nearestRelaxationCell,
integer :: iCell, k
- MPAS_ACC_TIMER_START('atm_zero_gradient_w_bdy_work [ACC_data_xfer]')
- !$acc enter data copyin(w)
- MPAS_ACC_TIMER_STOP('atm_zero_gradient_w_bdy_work [ACC_data_xfer]')
!$acc parallel default(present)
!$acc loop gang worker
@@ -7259,9 +7818,6 @@ subroutine atm_zero_gradient_w_bdy_work( w, bdyMaskCell, nearestRelaxationCell,
end do
!$acc end parallel
- MPAS_ACC_TIMER_START('atm_zero_gradient_w_bdy_work [ACC_data_xfer]')
- !$acc exit data copyout(w)
- MPAS_ACC_TIMER_STOP('atm_zero_gradient_w_bdy_work [ACC_data_xfer]')
end subroutine atm_zero_gradient_w_bdy_work
@@ -7302,11 +7858,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel
call mpas_pool_get_array(mesh, 'bdyMaskEdge', bdyMaskEdge)
call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend)
- MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]')
- !$acc enter data copyin(tend_ru,tend_rho,tend_rt,tend_rw, &
- !$acc rt_diabatic_tend)
- MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]')
-
!$acc parallel default(present)
!$acc loop gang worker
do iCell = cellSolveStart, cellSolveEnd
@@ -7333,11 +7884,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel
end if
end do
!$acc end parallel
-
- MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]')
- !$acc exit data copyout(tend_ru,tend_rho,tend_rt, &
- !$acc tend_rw,rt_diabatic_tend)
- MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]')
end subroutine atm_bdy_adjust_dynamics_speczone_tend
@@ -7423,10 +7969,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me
divdamp_coef = divdamp_coef_ptr
vertexDegree = vertexDegree_ptr
- MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]')
- !$acc enter data copyin(tend_rho, tend_rt, rho_zz, theta_m, tend_ru, ru)
!$acc enter data create(divergence1, divergence2, vorticity1, vorticity2)
- MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]')
! First, Rayleigh damping terms for ru, rtheta_m and rho_zz
!$acc parallel default(present)
@@ -7571,11 +8114,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me
end do ! end of loop over edges
!$acc end parallel
- MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]')
- !$acc exit data copyout(tend_rho, tend_rt, tend_ru)
- !$acc exit data delete(rho_zz, theta_m, ru, &
- !$acc divergence1, divergence2, vorticity1, vorticity2)
- MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]')
+ !$acc exit data delete(divergence1, divergence2, vorticity1, vorticity2)
end subroutine atm_bdy_adjust_dynamics_relaxzone_tend
@@ -7609,10 +8148,6 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, &
call mpas_pool_get_array(state, 'theta_m', theta_m, 2)
call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p)
call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base)
-
- MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]')
- !$acc enter data copyin(rtheta_base, theta_m, rtheta_p)
- MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]')
!$acc parallel default(present)
!$acc loop gang worker
@@ -7627,11 +8162,6 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, &
end do
!$acc end parallel
- MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]')
- !$acc exit data copyout(theta_m, rtheta_p) &
- !$acc delete(rtheta_base)
- MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]')
-
end subroutine atm_bdy_reset_speczone_values
!-------------------------------------------------------------------------
@@ -7720,10 +8250,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk,
integer :: iCell, iEdge, iScalar, i, k, cell1, cell2
!---
- MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]')
- !$acc enter data create(scalars_tmp) &
- !$acc copyin(scalars_new)
- MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]')
+ !$acc enter data create(scalars_tmp)
!$acc parallel default(present)
!$acc loop gang worker
@@ -7805,10 +8332,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk,
end do
!$acc end parallel
- MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]')
- !$acc exit data delete(scalars_tmp) &
- !$acc copyout(scalars_new)
- MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]')
+ !$acc exit data delete(scalars_tmp)
end subroutine atm_bdy_adjust_scalars_work
@@ -7878,10 +8402,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, &
!---
- MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]')
- !$acc enter data copyin(scalars_new)
- MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]')
-
!$acc parallel default(present)
!$acc loop gang worker
do iCell = cellSolveStart, cellSolveEnd ! threaded over cells
@@ -7902,10 +8422,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, &
end do ! updates now in temp storage
!$acc end parallel
-
- MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]')
- !$acc exit data copyout(scalars_new)
- MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]')
end subroutine atm_bdy_set_scalars_work
@@ -7975,16 +8491,6 @@ subroutine summarize_timestep(domain)
nVertLevels = nVertLevels_ptr
num_scalars = num_scalars_ptr
- MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]')
- if (config_print_detailed_minmax_vel) then
- !$acc enter data copyin(w,u,v)
- else if (config_print_global_minmax_vel) then
- !$acc enter data copyin(w,u)
- end if
- if (config_print_global_minmax_sca) then
- !$acc enter data copyin(scalars)
- end if
- MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]')
if (config_print_detailed_minmax_vel) then
call mpas_log_write('')
@@ -8343,17 +8849,6 @@ subroutine summarize_timestep(domain)
end if
- MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]')
- if (config_print_detailed_minmax_vel) then
- !$acc exit data delete(w,u,v)
- else if (config_print_global_minmax_vel) then
- !$acc exit data delete(w,u)
- end if
- if (config_print_global_minmax_sca) then
- !$acc exit data delete(scalars)
- end if
- MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]')
-
end subroutine summarize_timestep
end module atm_time_integration
diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F
index f7d04a1f0c..bae45193d5 100644
--- a/src/core_atmosphere/mpas_atm_core.F
+++ b/src/core_atmosphere/mpas_atm_core.F
@@ -18,12 +18,13 @@ module atm_core
! in a named group
!
abstract interface
- subroutine halo_exchange_routine(domain, halo_group, ierr)
+ subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr)
use mpas_derived_types, only : domain_type
type (domain_type), intent(inout) :: domain
character(len=*), intent(in) :: halo_group
+ logical, intent(in), optional :: withGPUAwareMPI
integer, intent(out), optional :: ierr
end subroutine halo_exchange_routine
@@ -43,7 +44,8 @@ function atm_core_init(domain, startTimeStamp) result(ierr)
use mpas_atm_dimensions, only : mpas_atm_set_dims
use mpas_atm_diagnostics_manager, only : mpas_atm_diag_setup
use mpas_atm_threading, only : mpas_atm_threading_init
- use atm_time_integration, only : mpas_atm_dynamics_init
+ use atm_time_integration, only : mpas_atm_dynamics_init, &
+ mpas_atm_pre_dynamics_h2d, mpas_atm_post_dynamics_d2h
use mpas_timer, only : mpas_timer_start, mpas_timer_stop
use mpas_attlist, only : mpas_modify_att
use mpas_string_utils, only : mpas_string_replace
@@ -509,6 +511,7 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt)
call mpas_pool_get_dimension(block % dimensions, 'edgeSolveThreadStart', edgeSolveThreadStart)
call mpas_pool_get_dimension(block % dimensions, 'edgeSolveThreadEnd', edgeSolveThreadEnd)
+ call mpas_atm_pre_computesolvediag_h2d(block)
!$OMP PARALLEL DO
do thread=1,nThreads
if (.not. config_do_restart .or. (config_do_restart .and. config_do_DAcycling)) then
@@ -527,6 +530,7 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt)
edgeThreadStart(thread), edgeThreadEnd(thread))
end do
!$OMP END PARALLEL DO
+ call mpas_atm_post_computesolvediag_d2h(block)
deallocate(ke_vertex)
deallocate(ke_edge)
@@ -540,13 +544,18 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt)
call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ)
call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal)
call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional)
+ call mpas_reconstruct_2d_h2d(mesh, u, uReconstructX, uReconstructY, uReconstructZ, &
+ uReconstructZonal, uReconstructMeridional)
call mpas_reconstruct(mesh, u, &
uReconstructX, &
uReconstructY, &
uReconstructZ, &
uReconstructZonal, &
- uReconstructMeridional &
+ uReconstructMeridional, &
+ lACC = .true. &
)
+ call mpas_reconstruct_2d_d2h(mesh, u, uReconstructX, uReconstructY, uReconstructZ, &
+ uReconstructZonal, uReconstructMeridional)
#ifdef DO_PHYSICS
!proceed with initialization of physics parameterization if moist_physics is set to true:
diff --git a/src/core_atmosphere/mpas_atm_halos.F b/src/core_atmosphere/mpas_atm_halos.F
index df02ee30a2..19c4a5be46 100644
--- a/src/core_atmosphere/mpas_atm_halos.F
+++ b/src/core_atmosphere/mpas_atm_halos.F
@@ -15,12 +15,13 @@ module mpas_atm_halos
! in a named group
!
abstract interface
- subroutine halo_exchange_routine(domain, halo_group, ierr)
+ subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr)
use mpas_derived_types, only : domain_type
type (domain_type), intent(inout) :: domain
character(len=*), intent(in) :: halo_group
+ logical, intent(in), optional :: withGPUAwareMPI
integer, intent(out), optional :: ierr
end subroutine halo_exchange_routine
@@ -61,18 +62,23 @@ subroutine atm_build_halo_groups(domain, ierr)
! Local variables
character(len=StrKIND), pointer :: config_halo_exch_method
+ logical, pointer :: config_gpu_aware_mpi
!
! Determine from the namelist option config_halo_exch_method which halo exchange method to employ
!
call mpas_pool_get_config(domain % blocklist % configs, 'config_halo_exch_method', config_halo_exch_method)
+ call mpas_pool_get_config(domain % blocklist % configs, 'config_gpu_aware_mpi', config_gpu_aware_mpi)
if (trim(config_halo_exch_method) == 'mpas_dmpar') then
call mpas_log_write('')
call mpas_log_write('*** Using ''mpas_dmpar'' routines for exchanging halos')
call mpas_log_write('')
+ if (config_gpu_aware_mpi) then
+ call mpas_log_write('GPU-aware MPI is not presently supported with config_halo_exch_method = mpas_dmpar',MPAS_LOG_CRIT)
+ end if
!
! Set up halo exchange groups used during atmosphere core initialization
!
diff --git a/src/core_atmosphere/physics/mpas_atmphys_interface.F b/src/core_atmosphere/physics/mpas_atmphys_interface.F
index 71e46dfcd2..76a7e4fb6d 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_interface.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_interface.F
@@ -6,17 +6,29 @@
! distributed with this code, or at http://mpas-dev.github.com/license.html
!
!=================================================================================================================
+
+#ifdef MPAS_OPENACC
+#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
+#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
+#else
+#define MPAS_ACC_TIMER_START(X)
+#define MPAS_ACC_TIMER_STOP(X)
+#endif
+
module mpas_atmphys_interface
use mpas_kind_types
use mpas_pool_routines
use mpas_atmphys_constants
use mpas_atmphys_vars
+ use mpas_timer
implicit none
private
public:: allocate_forall_physics, &
deallocate_forall_physics, &
+ update_d2h_pre_microphysics, &
+ update_h2d_post_microphysics, &
MPAS_to_physics, &
microphysics_from_MPAS, &
microphysics_to_MPAS
@@ -545,6 +557,40 @@ subroutine MPAS_to_physics(configs,mesh,state,time_lev,diag,diag_physics,its,ite
end subroutine MPAS_to_physics
+!=================================================================================================================
+ subroutine update_d2h_pre_microphysics(configs,state,diag,time_lev)
+!=================================================================================================================
+
+!input variables:
+ type(mpas_pool_type),intent(in):: configs
+ type(mpas_pool_type),intent(in):: state
+ type(mpas_pool_type),intent(in):: diag
+
+ integer:: time_lev
+
+!local pointers:
+ real(kind=RKIND),dimension(:,:),pointer :: exner,pressure_b,w
+ real(kind=RKIND),dimension(:,:),pointer :: rho_zz,theta_m,pressure_p
+ real(kind=RKIND),dimension(:,:,:),pointer:: scalars
+
+
+ MPAS_ACC_TIMER_START('update_d2h_pre_microphysics [ACC_data_xfer]')
+ call mpas_pool_get_array(diag,'exner' ,exner )
+ call mpas_pool_get_array(diag,'pressure_base',pressure_b)
+ call mpas_pool_get_array(diag,'pressure_p' ,pressure_p)
+
+ call mpas_pool_get_array(state,'rho_zz' ,rho_zz ,time_lev)
+ call mpas_pool_get_array(state,'theta_m',theta_m,time_lev)
+ call mpas_pool_get_array(state,'w' ,w ,time_lev)
+ !$acc update host(exner, pressure_b, pressure_p, rho_zz, theta_m, w)
+
+ call mpas_pool_get_array(state,'scalars',scalars,time_lev)
+ !$acc update host(scalars)
+
+ MPAS_ACC_TIMER_STOP('update_d2h_pre_microphysics [ACC_data_xfer]')
+
+end subroutine update_d2h_pre_microphysics
+
!=================================================================================================================
subroutine microphysics_from_MPAS(configs,mesh,state,time_lev,diag,diag_physics,tend_physics,its,ite)
!=================================================================================================================
@@ -599,7 +645,7 @@ subroutine microphysics_from_MPAS(configs,mesh,state,time_lev,diag,diag_physics,
call mpas_pool_get_dimension(state,'index_qv',index_qv)
call mpas_pool_get_dimension(state,'index_qc',index_qc)
call mpas_pool_get_dimension(state,'index_qr',index_qr)
- call mpas_pool_get_array(state,'scalars',scalars,time_lev)
+ call mpas_pool_get_array(state,'scalars',scalars,time_lev)
qv => scalars(index_qv,:,:)
qc => scalars(index_qc,:,:)
qr => scalars(index_qr,:,:)
@@ -1042,6 +1088,48 @@ subroutine microphysics_to_MPAS(configs,mesh,state,time_lev,diag,diag_physics,te
end subroutine microphysics_to_MPAS
+ !=================================================================================================================
+ subroutine update_h2d_post_microphysics(configs,state,diag,tend,time_lev)
+!=================================================================================================================
+
+!input variables:
+ type(mpas_pool_type),intent(in):: configs
+ type(mpas_pool_type),intent(in):: state
+ type(mpas_pool_type),intent(in):: diag
+ type(mpas_pool_type),intent(inout):: tend
+
+
+ integer:: time_lev
+
+!local pointers:
+ real(kind=RKIND),dimension(:,:),pointer :: exner,exner_b,pressure_b,rtheta_p,rtheta_b
+ real(kind=RKIND),dimension(:,:),pointer :: rho_zz,theta_m,pressure_p
+ real(kind=RKIND),dimension(:,:,:),pointer:: scalars
+ real(kind=RKIND),dimension(:,:),pointer :: rt_diabatic_tend
+
+ call mpas_pool_get_array(diag,'exner' ,exner )
+ call mpas_pool_get_array(diag,'exner_base' ,exner_b )
+ call mpas_pool_get_array(diag,'pressure_base',pressure_b)
+ call mpas_pool_get_array(diag,'pressure_p' ,pressure_p)
+ call mpas_pool_get_array(diag,'rtheta_base' ,rtheta_b )
+ call mpas_pool_get_array(diag,'rtheta_p' ,rtheta_p )
+
+ call mpas_pool_get_array(state,'rho_zz' ,rho_zz ,time_lev)
+ call mpas_pool_get_array(state,'theta_m',theta_m,time_lev)
+
+ call mpas_pool_get_array(state,'scalars',scalars,time_lev)
+
+ call mpas_pool_get_array(tend,'rt_diabatic_tend',rt_diabatic_tend)
+
+
+ MPAS_ACC_TIMER_START('update_h2d_post_microphysics [ACC_data_xfer]')
+ !$acc update device(exner, exner_b, pressure_b, pressure_p, rtheta_b)
+ !$acc update device(rtheta_p, rho_zz, theta_m, scalars)
+ !$acc update device(rt_diabatic_tend)
+ MPAS_ACC_TIMER_STOP('update_h2d_post_microphysics [ACC_data_xfer]')
+
+end subroutine update_h2d_post_microphysics
+
!=================================================================================================================
end module mpas_atmphys_interface
!=================================================================================================================
diff --git a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F
index 284b072851..71f37eb550 100644
--- a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F
+++ b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F
@@ -6,6 +6,15 @@
! distributed with this code, or at http://mpas-dev.github.com/license.html
!
!=================================================================================================================
+
+#ifdef MPAS_OPENACC
+#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
+#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
+#else
+#define MPAS_ACC_TIMER_START(X)
+#define MPAS_ACC_TIMER_STOP(X)
+#endif
+
module mpas_atmphys_todynamics
use mpas_kind_types
use mpas_pool_routines
@@ -13,10 +22,11 @@ module mpas_atmphys_todynamics
use mpas_atm_dimensions
use mpas_atmphys_constants, only: R_d,R_v,degrad
+ use mpas_timer
implicit none
private
- public:: physics_get_tend
+ public:: physics_get_tend, update_d2h_pre_physics_get_tend, update_h2d_post_physics_get_tend
!Interface between the physics parameterizations and the non-hydrostatic dynamical core.
@@ -46,12 +56,13 @@ module mpas_atmphys_todynamics
! in a named group
!
abstract interface
- subroutine halo_exchange_routine(domain, halo_group, ierr)
+ subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr)
use mpas_derived_types, only : domain_type
type (domain_type), intent(inout) :: domain
character(len=*), intent(in) :: halo_group
+ logical, intent(in), optional :: withGPUAwareMPI
integer, intent(out), optional :: ierr
end subroutine halo_exchange_routine
@@ -60,6 +71,40 @@ end subroutine halo_exchange_routine
contains
+
+!=================================================================================================================
+ subroutine update_d2h_pre_physics_get_tend(configs,state,diag,tend)
+!=================================================================================================================
+
+!input variables:
+ type(mpas_pool_type),intent(in):: configs
+ type(mpas_pool_type),intent(in):: state
+ type(mpas_pool_type),intent(in):: diag
+ type(mpas_pool_type),intent(in):: tend
+
+!local variables:
+ real(kind=RKIND),dimension(:,:),pointer:: mass ! time level 2 rho_zz
+ real(kind=RKIND),dimension(:,:),pointer:: mass_edge ! diag rho_edge
+ real(kind=RKIND),dimension(:,:),pointer:: theta_m ! time level 1
+ real(kind=RKIND),dimension(:,:,:),pointer:: scalars
+
+ real(kind=RKIND),dimension(:,:),pointer:: tend_u_phys
+ real(kind=RKIND),dimension(:,:,:),pointer:: tend_scalars
+
+ MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
+ call mpas_pool_get_array(state,'theta_m' ,theta_m,1)
+ call mpas_pool_get_array(state,'scalars' ,scalars,1)
+ call mpas_pool_get_array(state,'rho_zz' ,mass,2 )
+ call mpas_pool_get_array(diag ,'rho_edge',mass_edge)
+ call mpas_pool_get_array(diag ,'tend_u_phys',tend_u_phys)
+
+ !$acc update self(theta_m, scalars, mass, mass_edge)
+
+ call mpas_pool_get_array(tend,'scalars_tend',tend_scalars)
+ !$acc update self(tend_scalars) ! Probably not needed
+ MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
+
+ end subroutine update_d2h_pre_physics_get_tend
!=================================================================================================================
subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_step,dynamics_substep, &
@@ -245,6 +290,26 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s
end subroutine physics_get_tend
+ !=================================================================================================================
+ subroutine update_h2d_post_physics_get_tend(configs,state,diag,tend)
+!=================================================================================================================
+
+!input variables:
+ type(mpas_pool_type),intent(in):: configs
+ type(mpas_pool_type),intent(in):: state
+ type(mpas_pool_type),intent(in):: diag
+ type(mpas_pool_type),intent(in):: tend
+
+!local variables:
+ real(kind=RKIND),dimension(:,:,:),pointer:: tend_scalars
+
+ MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
+ call mpas_pool_get_array(tend,'scalars_tend',tend_scalars)
+ !$acc update device(tend_scalars)
+ MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')
+
+ end subroutine update_h2d_post_physics_get_tend
+
!=================================================================================================================
subroutine physics_get_tend_work( &
block,mesh,nCells,nEdges,nCellsSolve,nEdgesSolve,rk_step,dynamics_substep, &
diff --git a/src/core_init_atmosphere/Registry.xml b/src/core_init_atmosphere/Registry.xml
index cf4934a81b..b4f4622246 100644
--- a/src/core_init_atmosphere/Registry.xml
+++ b/src/core_init_atmosphere/Registry.xml
@@ -468,6 +468,11 @@
+
+
+
+
+
@@ -573,6 +578,11 @@
+
+
+
+
+
@@ -1114,6 +1124,21 @@
+
+
+
+
+
+
+
+
+
+
@@ -1171,6 +1196,8 @@
+
diff --git a/src/core_init_atmosphere/mpas_atm_advection.F b/src/core_init_atmosphere/mpas_atm_advection.F
index f4d44c984e..4852b17117 100644
--- a/src/core_init_atmosphere/mpas_atm_advection.F
+++ b/src/core_init_atmosphere/mpas_atm_advection.F
@@ -11,6 +11,7 @@ module atm_advection
use mpas_derived_types
use mpas_pool_routines
use mpas_constants
+ use mpas_vector_operations
use mpas_abort, only : mpas_dmpar_global_abort
use mpas_log, only : mpas_log_write
@@ -758,10 +759,14 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
real (kind=RKIND), dimension(:,:), pointer :: defc_a, defc_b
real (kind=RKIND), dimension(:,:), pointer :: cell_gradient_coef_x, cell_gradient_coef_y
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2, deformation_coef_s2, deformation_coef_cs
+ real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c, deformation_coef_s
integer, dimension(:,:), pointer :: cellsOnEdge, edgesOnCell, cellsOnCell, verticesOnCell
integer, dimension(:), pointer :: nEdgesOnCell
real (kind=RKIND), dimension(:), pointer :: xCell, yCell, zCell
real (kind=RKIND), dimension(:), pointer :: xVertex, yVertex, zVertex
+ real (kind=RKIND), dimension(:), pointer :: xEdge, yEdge, zEdge, angleEdge
+ real (kind=RKIND), dimension(:), pointer :: areaCell
real (kind=RKIND), dimension(nCells) :: theta_abs
@@ -772,19 +777,32 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
integer :: iCell
real (kind=RKIND) :: pii
real (kind=RKIND), dimension(25) :: xp, yp
+ real (kind=RKIND) :: xe, ye
real (kind=RKIND) :: length_scale
integer, dimension(25) :: cell_list
- integer :: iv
+ integer :: iv, ie
logical :: do_the_cell
real (kind=RKIND) :: area_cell, sint2, cost2, sint_cost, dx, dy
+ logical, pointer :: is_periodic
+ real(kind=RKIND), pointer :: x_period, y_period
+
+
+ call mpas_pool_get_config(mesh, 'is_periodic', is_periodic)
+ call mpas_pool_get_config(mesh, 'x_period', x_period)
+ call mpas_pool_get_config(mesh, 'y_period', y_period)
call mpas_pool_get_array(mesh, 'defc_a', defc_a)
call mpas_pool_get_array(mesh, 'defc_b', defc_b)
call mpas_pool_get_array(mesh, 'cell_gradient_coef_x', cell_gradient_coef_x)
call mpas_pool_get_array(mesh, 'cell_gradient_coef_y', cell_gradient_coef_y)
+ call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2)
+ call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2)
+ call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs)
+ call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c)
+ call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s)
call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell)
call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)
call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell)
@@ -796,9 +814,19 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
call mpas_pool_get_array(mesh, 'xVertex', xVertex)
call mpas_pool_get_array(mesh, 'yVertex', yVertex)
call mpas_pool_get_array(mesh, 'zVertex', zVertex)
+ call mpas_pool_get_array(mesh, 'xEdge', xEdge)
+ call mpas_pool_get_array(mesh, 'yEdge', yEdge)
+ call mpas_pool_get_array(mesh, 'zEdge', zEdge)
+ call mpas_pool_get_array(mesh, 'angleEdge', angleEdge)
+ call mpas_pool_get_array(mesh, 'areaCell', areaCell)
defc_a(:,:) = 0.
defc_b(:,:) = 0.
+ deformation_coef_c2(:,:) = 0.
+ deformation_coef_s2(:,:) = 0.
+ deformation_coef_cs(:,:) = 0.
+ deformation_coef_c(:,:) = 0.
+ deformation_coef_s(:,:) = 0.
cell_gradient_coef_x(:,:) = 0.
cell_gradient_coef_y(:,:) = 0.
@@ -888,21 +916,58 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
else ! On an x-y plane
- theta_abs(iCell) = 0.0
+ do i=1,n-1
+ iv = verticesOnCell(i,iCell)
+ xp(i) = mpas_fix_periodicity(xVertex(iv),xCell(iCell),x_period) - xCell(iCell)
+ yp(i) = mpas_fix_periodicity(yVertex(iv),yCell(iCell),y_period) - yCell(iCell)
+ end do
- xp(1) = xCell(iCell)
- yp(1) = yCell(iCell)
+ ! if(iCell.lt.11) then
+ ! call mpas_log_write(' setting defc coefs, cell $i', intArgs=(/iCell/))
+ ! do i=1,n-1
+ ! iv = verticesOnCell(i,iCell)
+ ! call mpas_log_write(' xp,yp,xvc,yvc, $r $r $r $r', realArgs=(/xp(i),yp(i),xVertex(iv)-xCell(iCell),yVertex(iv)-yCell(iCell)/))
+ ! end do
+ ! end if
- do i=2,n
- iv = verticesOnCell(i-1,iCell)
- xp(i) = xVertex(iv)
- yp(i) = yVertex(iv)
+ do i=1,n-1
+ ie = edgesOnCell(i,iCell)
+ xe = mpas_fix_periodicity(xEdge(ie),xCell(iCell),x_period) - xCell(iCell)
+ ye = mpas_fix_periodicity(yEdge(ie),yCell(iCell),y_period) - yCell(iCell)
+ thetat(i) = atan2(ye,xe)
end do
+ ! if(iCell .lt. 11) then
+ ! call mpas_log_write(' edge angles, plane calc, cell $i', intArgs=(/iCell/))
+ ! do i=1,n-1
+ ! call mpas_log_write(' edge angle $r', realArgs=(/thetat(i)*180./3.1415926/))
+ ! end do
+ ! end if
+
+ theta_abs(iCell) = thetat(1)
+
end if
! (1) compute cell area on the tangent plane used in the integrals
! (2) compute angle of cell edge normal vector. here we are repurposing thetat
+ thetat(1) = theta_abs(iCell)
+
+ do i=2,n-1
+ ip1 = i+1
+ if (ip1 == n) ip1 = 1
+ thetat(i) = plane_angle( 0.0_RKIND, 0.0_RKIND, 0.0_RKIND, &
+ xp(i)-xp(i-1), yp(i)-yp(i-1), 0.0_RKIND, &
+ xp(ip1)-xp(i), yp(ip1)-yp(i), 0.0_RKIND, &
+ 0.0_RKIND, 0.0_RKIND, 1.0_RKIND)
+ thetat(i) = thetat(i) + thetat(i-1)
+ end do
+
+ ! if(iCell .lt. 11) then
+ ! call mpas_log_write(' edge angles, generic calc, cell $i', intArgs=(/iCell/))
+ ! do i=1,n-1
+ ! call mpas_log_write(' edge angle $r', realArgs=(/thetat(i)*180./3.1415926/))
+ ! end do
+ ! end if
area_cell = 0.
do i=1,n-1
@@ -927,15 +992,241 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere
defc_b(i,iCell) = dl*2.*sint_cost/area_cell
cell_gradient_coef_x(i,iCell) = dl*cos(thetat(i))/area_cell
cell_gradient_coef_y(i,iCell) = dl*sin(thetat(i))/area_cell
+ deformation_coef_c2(i,iCell) = dl*cost2/area_cell
+ deformation_coef_s2(i,iCell) = dl*sint2/area_cell
+ deformation_coef_cs(i,iCell) = dl*sint_cost/area_cell
+ deformation_coef_c(i,iCell) = dl*cos(thetat(i))/area_cell
+ deformation_coef_s(i,iCell) = dl*sin(thetat(i))/area_cell
if (cellsOnEdge(1,EdgesOnCell(i,iCell)) /= iCell) then
defc_a(i,iCell) = - defc_a(i,iCell)
defc_b(i,iCell) = - defc_b(i,iCell)
+ deformation_coef_c2(i,iCell) = - deformation_coef_c2(i,iCell)
+ deformation_coef_s2(i,iCell) = - deformation_coef_s2(i,iCell)
+ deformation_coef_cs(i,iCell) = - deformation_coef_cs(i,iCell)
+! deformation_coef_c(i,iCell) = - deformation_coef_c(i,iCell)
+! deformation_coef_s(i,iCell) = - deformation_coef_s(i,iCell)
end if
end do
end do
+ call atm_init_test_coefs( deformation_coef_c2, deformation_coef_s2, &
+ deformation_coef_cs, deformation_coef_c, &
+ deformation_coef_s, &
+ is_periodic, on_a_sphere, &
+ x_period, y_period, &
+ xEdge, yEdge, zEdge, &
+ xCell, yCell, zCell, nCells, &
+ angleEdge, nEdgesOnCell, edgesOnCell )
+
+
end subroutine atm_initialize_deformation_weights
- end module atm_advection
+
+ subroutine atm_init_test_coefs( deformation_coef_c2, deformation_coef_s2, &
+ deformation_coef_cs, deformation_coef_c, &
+ deformation_coef_s, &
+ is_periodic, on_a_sphere, &
+ x_period, y_period, &
+ xEdge, yEdge, zEdge, &
+ xCell, yCell, zCell, nCells, &
+ angleEdge, nEdgesOnCell, edgesOnCell )
+
+ implicit none
+
+ logical :: is_periodic, on_a_sphere
+ integer :: nCells
+ integer, dimension(:) :: nEdgesOnCell
+ real (kind=RKIND) :: x_period, y_period
+ real (kind=RKIND), dimension(:,:) :: deformation_coef_c2, deformation_coef_s2
+ real (kind=RKIND), dimension(:,:) :: deformation_coef_cs
+ real (kind=RKIND), dimension(:,:) :: deformation_coef_c, deformation_coef_s
+ integer, dimension(:,:) :: edgesOnCell
+ real (kind=RKIND), dimension(:) :: angleEdge, xEdge, yEdge, zEdge
+ real (kind=RKIND), dimension(:) :: xCell, yCell, zCell
+
+ ! local variables
+
+ integer :: iCell, iEdge, ie
+ real (kind=RKIND) :: cos_edge, sin_edge, ux, uy, vx, vy, wx, wy
+ real (kind=RKIND) :: xc, yc, xe, ye
+ real (kind=RKIND) :: angle_e, ue, ve, we, e_int
+ real (kind=RKIND) :: dudx, dudy, dvdx, dvdy, dwdx, dwdy
+ real (kind=RKIND) :: dudx_c, dudy_c, dvdx_c, dvdy_c, dwdx_c, dwdy_c
+
+ real (kind=RKIND) :: dudx_err_max, dudy_err_max, dvdx_err_max, dvdy_err_max, dwdx_err_max, dwdy_err_max
+ real (kind=RKIND) :: dudx_err_tot, dudy_err_tot, dvdx_err_tot, dvdy_err_tot, dwdx_err_tot, dwdy_err_tot
+ real (kind=RKIND) :: dudx_max, dudy_max, dvdx_max, dvdy_max, dwdx_max, dwdy_max
+
+ real (kind=RKIND) :: ang
+ real (kind=RKIND), parameter :: x_vel= 1.0, y_vel=1.0, w_vel=1.0
+ real (kind=RKIND) :: u_edge, v_edge, w_edge, x, y, angle, xl, yl
+ real (kind=RKIND) :: dudx_cell, dudy_cell, dvdx_cell, dvdy_cell, dwdx_cell, dwdy_cell
+
+ ! Test tunction definitions
+ !
+ ! here are the velocity field functions and their derivatives.
+ ! First a simple test: U = x_vel*(-x+y), V = y_vel * (-x+y), W = w_vel*(-x+y)
+
+ u_edge(x,y,ang,xl,yl) = (x_vel*(x+y)) * cos(ang) + (y_vel * (x+y) * sin(ang))
+ v_edge(x,y,ang,xl,yl) = -(x_vel*(x+y)) * sin(ang) + (y_vel * (x+y) * cos(ang))
+ w_edge(x,y,xl,yl) = w_vel * (x+y)
+
+ dudx_cell(x,y,xl,yl) = x_vel
+ dudy_cell(x,y,xl,yl) = x_vel
+ dvdx_cell(x,y,xl,yl) = y_vel
+ dvdy_cell(x,y,xl,yl) = y_vel
+ dwdx_cell(x,y,xl,yl) = w_vel
+ dwdy_cell(x,y,xl,yl) = w_vel
+
+ ! -----------------
+
+ if ( (.not. on_a_sphere) .and. (is_periodic) ) then ! test is for doubly-periodic Cartesian plane only
+
+ dudx_err_max = 0.
+ dudy_err_max = 0.
+ dvdx_err_max = 0.
+ dvdy_err_max = 0.
+ dwdx_err_max = 0.
+ dwdy_err_max = 0.
+
+ dudx_err_tot = 0.
+ dudy_err_tot = 0.
+ dvdx_err_tot = 0.
+ dvdy_err_tot = 0.
+ dwdx_err_tot = 0.
+ dwdy_err_tot = 0.
+
+ dudx_max = 0.
+ dudy_max = 0.
+ dvdx_max = 0.
+ dvdy_max = 0.
+ dwdx_max = 0.
+ dwdy_max = 0.
+
+ do iCell = 1, nCells
+
+ dudx = 0.
+ dudy = 0.
+ dvdx = 0.
+ dvdy = 0.
+ dwdx = 0.
+ dwdy = 0.
+
+ xc = xCell(iCell)
+ yc = yCell(iCell)
+
+ dudx_c = dudx_cell(xc,yc,x_period,y_period)
+ dudy_c = dudy_cell(xc,yc,x_period,y_period)
+ dvdx_c = dvdx_cell(xc,yc,x_period,y_period)
+ dvdy_c = dvdy_cell(xc,yc,x_period,y_period)
+ dwdx_c = dwdx_cell(xc,yc,x_period,y_period)
+ dwdy_c = dwdy_cell(xc,yc,x_period,y_period)
+
+ do iEdge = 1, nEdgesOnCell(iCell)
+
+ ie = edgesOnCell(iEdge,iCell)
+ angle_e = angleEdge(ie)
+ xe = xEdge(ie)
+ ye = yEdge(ie)
+
+ xe = mpas_fix_periodicity(xe,xc,x_period)
+ ye = mpas_fix_periodicity(ye,yc,y_period)
+
+ ue = u_edge(xe,ye,angle_e,x_period,y_period)
+ ve = v_edge(xe,ye,angle_e,x_period,y_period)
+ we = w_edge(xe,ye,x_period,y_period)
+
+ dudx = dudx + deformation_coef_c2(iEdge,iCell)*ue &
+ - deformation_coef_cs(iEdge,iCell)*ve
+ dudy = dudy + deformation_coef_cs(iEdge,iCell)*ue &
+ - deformation_coef_s2(iEdge,iCell)*ve
+ dvdx = dvdx + deformation_coef_cs(iEdge,iCell)*ue &
+ + deformation_coef_c2(iEdge,iCell)*ve
+ dvdy = dvdy + deformation_coef_s2(iEdge,iCell)*ue &
+ + deformation_coef_cs(iEdge,iCell)*ve
+
+ dwdx = dwdx + deformation_coef_c(iEdge,iCell)*we
+ dwdy = dwdy + deformation_coef_s(iEdge,iCell)*we
+
+ end do
+
+ ! call mpas_log_write(' u_x, u_y, $r, $r ', realArgs=(/dudx, dudy/))
+ ! call mpas_log_write(' v_x, v_y, $r, $r ', realArgs=(/dvdx, dvdy/))
+ ! call mpas_log_write(' w_x, w_y, $r, $r ', realArgs=(/dwdx, dwdy/))
+
+ ! check result for cell
+
+ e_int = abs(dudx_c - dudx)
+ dudx_err_tot = dudx_err_tot + e_int
+ dudx_err_max = max(dudx_err_max, e_int)
+
+ e_int = abs(dudy_c - dudy)
+ dudy_err_tot = dudy_err_tot + e_int
+ dudy_err_max = max(dudy_err_max, e_int)
+
+ e_int = abs(dvdx_c - dvdx)
+ dvdx_err_tot = dvdx_err_tot + e_int
+ dvdx_err_max = max(dvdx_err_max, e_int)
+
+ e_int = abs(dvdy_c - dvdy)
+ dvdy_err_tot = dvdy_err_tot + e_int
+ dvdy_err_max = max(dvdy_err_max, e_int)
+
+ e_int = abs(dwdx_c - dwdx)
+ dwdx_err_tot = dwdx_err_tot + e_int
+ dwdx_err_max = max(dwdx_err_max, e_int)
+
+ e_int = abs(dwdy_c - dwdy)
+ dwdy_err_tot = dwdy_err_tot + e_int
+ dwdy_err_max = max(dwdy_err_max, e_int)
+
+ dudx_max = max(dudx_max, abs(dudx_c))
+ dudy_max = max(dudy_max, abs(dudy_c))
+ dvdx_max = max(dvdx_max, abs(dvdx_c))
+ dvdy_max = max(dvdy_max, abs(dvdy_c))
+ dwdx_max = max(dwdx_max, abs(dwdx_c))
+ dwdy_max = max(dwdy_max, abs(dwdy_c))
+
+ end do
+
+ ! scale errors
+
+ dudx_err_max = dudx_err_max/dudx_max
+ dudy_err_max = dudy_err_max/dudy_max
+ dvdx_err_max = dvdx_err_max/dvdx_max
+ dvdy_err_max = dvdy_err_max/dvdy_max
+ dwdx_err_max = dwdx_err_max/dwdx_max
+ dwdy_err_max = dwdy_err_max/dwdy_max
+
+ dudx_err_tot = dudx_err_tot/dudx_max/real(nCells)
+ dudy_err_tot = dudy_err_tot/dudy_max/real(nCells)
+ dvdx_err_tot = dvdx_err_tot/dvdx_max/real(nCells)
+ dvdy_err_tot = dvdy_err_tot/dvdy_max/real(nCells)
+ dwdx_err_tot = dwdx_err_tot/dwdx_max/real(nCells)
+ dwdy_err_tot = dwdy_err_tot/dwdy_max/real(nCells)
+
+ ! output
+
+ call mpas_log_write(' ')
+ call mpas_log_write(' deformation coefficients check ')
+ call mpas_log_write(' dudx check, max abs(dudx), max and avg error $r, $r, $r', &
+ realArgs=(/dudx_max, dudx_err_max, dudx_err_tot/))
+ call mpas_log_write(' dudy check, max abs(dudy), max and avg error $r, $r, $r', &
+ realArgs=(/dudy_max, dudy_err_max, dudy_err_tot/))
+ call mpas_log_write(' dvdx check, max abs(dvdx), max and avg error $r, $r, $r', &
+ realArgs=(/dvdx_max, dvdx_err_max, dvdx_err_tot/))
+ call mpas_log_write(' dvdy check, max abs(dvdy), max and avg error $r, $r, $r', &
+ realArgs=(/dvdy_max, dvdy_err_max, dvdy_err_tot/))
+ call mpas_log_write(' dwdx check, max abs(dwdx), max and avg error $r, $r, $r', &
+ realArgs=(/dwdx_max, dwdx_err_max, dwdx_err_tot/))
+ call mpas_log_write(' dwdy check, max abs(dwdy), max and avg error $r, $r, $r', &
+ realArgs=(/dwdy_max, dwdy_err_max, dwdy_err_tot/))
+ call mpas_log_write(' ')
+
+ end if
+
+ end subroutine atm_init_test_coefs
+
+end module atm_advection
diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F
index 673ebfc525..9c372cb163 100644
--- a/src/core_init_atmosphere/mpas_init_atm_cases.F
+++ b/src/core_init_atmosphere/mpas_init_atm_cases.F
@@ -367,6 +367,27 @@ subroutine init_atm_setup_case(domain, stream_manager)
!
call mpas_stream_mgr_reset_alarms(stream_manager, streamID='lbc', direction=MPAS_STREAM_OUTPUT, ierr=ierr)
+ else if (config_init_case == 10) then
+
+ call mpas_log_write(' les test case ')
+ block_ptr => domain % blocklist
+ do while (associated(block_ptr))
+
+ call mpas_pool_get_dimension(block_ptr % dimensions, 'nCells', nCells)
+ call mpas_pool_get_dimension(block_ptr % dimensions, 'nVertLevels', nVertLevels)
+
+ call mpas_pool_get_subpool(block_ptr % structs, 'mesh', mesh)
+ call mpas_pool_get_subpool(block_ptr % structs, 'fg', fg)
+ call mpas_pool_get_subpool(block_ptr % structs, 'state', state)
+ call mpas_pool_get_subpool(block_ptr % structs, 'diag', diag)
+
+ call mpas_log_write(' calling test case setup ')
+ call init_atm_case_les(domain % dminfo, mesh, fg, nCells, nVertLevels, state, diag, config_init_case, block_ptr % configs)
+ call decouple_variables(mesh, nCells, nVertLevels, state, diag)
+ call mpas_log_write(' returned from test case setup ')
+ block_ptr => block_ptr % next
+ end do
+
else if (config_init_case == 13 ) then
call mpas_log_write(' CAM-MPAS grid ')
@@ -399,7 +420,7 @@ subroutine init_atm_setup_case(domain, stream_manager)
else
call mpas_log_write(' ***********************************************************', messageType=MPAS_LOG_ERR)
- call mpas_log_write(' Only test cases 1 through 9 and 13 are currently supported.', messageType=MPAS_LOG_ERR)
+ call mpas_log_write(' Only test cases 1 through 10 and 13 are currently supported.', messageType=MPAS_LOG_ERR)
call mpas_log_write(' ***********************************************************', messageType=MPAS_LOG_CRIT)
end if
@@ -1408,10 +1429,11 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
integer, dimension(:), pointer :: nEdgesOnEdge
integer, dimension(:,:), pointer :: edgesOnEdge, cellsOnEdge
real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge
-
+ real (kind=RKIND), pointer :: x_period, y_period
integer :: iCell, iCell1, iCell2 , iEdge, ivtx, i, k, nz, nz1, itr, cell1, cell2
integer, pointer :: nEdges, nVertices, maxEdges, nCellsSolve
integer, pointer :: index_qv
+ integer, pointer :: index_tke
real (kind=RKIND), dimension(nVertLevels + 1 ) :: zc, zw, ah
real (kind=RKIND), dimension(nVertLevels ) :: zu, dzw, rdzwp, rdzwm
@@ -1438,6 +1460,7 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
real (kind=RKIND), pointer :: nominalMinDc
logical, pointer :: on_a_sphere
real (kind=RKIND), pointer :: sphere_radius
+ real (kind=RKIND), pointer :: config_ztop
real (kind=RKIND), dimension(:,:), pointer :: t_init, w, rw, v, rho, theta
real (kind=RKIND), dimension(:), pointer :: u_init, qv_init, angleEdge, fEdge, fVertex
@@ -1459,10 +1482,13 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
call mpas_pool_get_array(mesh, 'areaTriangle', areaTriangle)
call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex)
call mpas_pool_get_array(mesh, 'nominalMinDc', nominalMinDc)
-
+
+ call mpas_pool_get_config(mesh, 'x_period', x_period)
+ call mpas_pool_get_config(mesh, 'y_period', y_period)
call mpas_pool_get_config(mesh, 'on_a_sphere', on_a_sphere)
call mpas_pool_get_config(mesh, 'sphere_radius', sphere_radius)
call mpas_pool_get_config(configs, 'config_interface_projection', config_interface_projection)
+ call mpas_pool_get_config(configs, 'config_ztop', config_ztop)
!
! Scale all distances
@@ -1485,6 +1511,8 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
areaTriangle(:) = areaTriangle(:) * a_scale**2.0
kiteAreasOnVertex(:,:) = kiteAreasOnVertex(:,:) * a_scale**2.0
nominalMinDc = nominalMinDc * a_scale
+ x_period = x_period * a_scale
+ y_period = y_period * a_scale
call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge)
@@ -1545,6 +1573,7 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
call mpas_pool_get_array(state, 'scalars', scalars)
call mpas_pool_get_dimension(state, 'index_qv', index_qv)
+ call mpas_pool_get_dimension(state, 'index_tke', index_tke)
scalars(:,:,:) = 0.
@@ -1572,7 +1601,8 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
! metrics for hybrid coordinate and vertical stretching
str = 1.0
- zt = 20000.
+! zt = 20000.
+ zt = config_ztop
dz = zt/float(nz1)
! write(0,*) ' dz = ',dz
@@ -1859,6 +1889,18 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d
t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i))
end do
end do
+!
+! initial seed for tke
+!
+! call mpas_log_write('index_qv $i ', intArgs=(/index_qv/))
+! call mpas_log_write('index_tke $i ', intArgs=(/index_tke/))
+ scalars(index_tke,:,:) = 0.
+
+ do k = 1,nz1
+ do i=1,nCells
+ scalars(index_tke,k,i) = 0.1
+ end do
+ end do
do itr=1,30
@@ -2013,6 +2055,7 @@ subroutine init_atm_case_mtn_wave(dminfo, mesh, nCells, nVertLevels, state, diag
integer, dimension(:), pointer :: nEdgesOnEdge, nEdgesOnCell
integer, dimension(:,:), pointer :: edgesOnEdge, cellsOnEdge, cellsOnCell
real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge
+ real (kind=RKIND), pointer :: x_period, y_period
integer :: iCell, iCell1, iCell2 , iEdge, ivtx, i, k, nz, itr, cell1, cell2, nz1
integer, pointer :: nEdges, maxEdges, nCellsSolve, nVertices
@@ -2071,6 +2114,8 @@ subroutine init_atm_case_mtn_wave(dminfo, mesh, nCells, nVertLevels, state, diag
call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex)
call mpas_pool_get_array(mesh, 'nominalMinDc', nominalMinDc)
+ call mpas_pool_get_config(mesh, 'x_period', x_period)
+ call mpas_pool_get_config(mesh, 'y_period', y_period)
call mpas_pool_get_config(mesh, 'on_a_sphere', on_a_sphere)
call mpas_pool_get_config(mesh, 'sphere_radius', sphere_radius)
@@ -2112,6 +2157,8 @@ subroutine init_atm_case_mtn_wave(dminfo, mesh, nCells, nVertLevels, state, diag
areaTriangle(:) = areaTriangle(:) * a_scale**2.0
kiteAreasOnVertex(:,:) = kiteAreasOnVertex(:,:) * a_scale**2.0
nominalMinDc = nominalMinDc * a_scale
+ x_period = x_period * a_scale
+ y_period = y_period * a_scale
call mpas_pool_get_dimension(mesh, 'nEdges', nEdges)
@@ -6184,6 +6231,692 @@ subroutine init_atm_case_lbc(timestamp, block, mesh, nCells, nEdges, nVertLevels
end subroutine init_atm_case_lbc
+!---------------------
+
+ subroutine init_atm_case_les(dminfo, mesh, fg, nCells, nVertLevels, state, diag, test_case, configs)
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ ! Large Eddy Simulation (les) test case setup
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ implicit none
+
+ type (dm_info), intent(in) :: dminfo
+ type (mpas_pool_type), intent(inout) :: mesh
+ type (mpas_pool_type), intent(inout) :: fg
+ integer, intent(in) :: nCells
+ integer, intent(in) :: nVertLevels
+ type (mpas_pool_type), intent(inout) :: state
+ type (mpas_pool_type), intent(inout) :: diag
+ type (mpas_pool_type), intent(inout):: configs
+
+ integer, intent(in) :: test_case
+
+ real (kind=RKIND), dimension(:), pointer :: rdzw, dzu, rdzu, fzm, fzp
+ real (kind=RKIND), dimension(:,:), pointer :: zgrid, zxu, zz, hx, cqw
+ real (kind=RKIND), dimension(:,:), pointer :: ppb, pb, rho_zz, rb, rr, tb, rtb, p, pp, dss, t, rt, u, ru
+ real (kind=RKIND), dimension(:,:,:), pointer :: scalars
+ real (kind=RKIND), dimension(:,:,:), pointer :: zb, zb3
+
+ real (kind=RKIND), dimension(:), pointer :: xland
+ integer, dimension(:), pointer :: landmask, lu_index
+
+ !This is temporary variable here. It just need when calculate tangential velocity v.
+ integer :: eoe
+ integer, dimension(:), pointer :: nEdgesOnEdge
+ integer, dimension(:,:), pointer :: edgesOnEdge, cellsOnEdge
+ real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge
+ real (kind=RKIND), pointer :: x_period, y_period
+
+ integer :: iCell, iCell1, iCell2 , iEdge, ivtx, i, k, nz, nz1, itr, cell1, cell2
+ integer, pointer :: nEdges, nVertices, maxEdges, nCellsSolve
+ integer, pointer :: index_qv
+ integer, pointer :: index_tke
+
+ real (kind=RKIND), dimension(nVertLevels + 1 ) :: zc, zw, ah
+ real (kind=RKIND), dimension(nVertLevels ) :: zu, dzw, rdzwp, rdzwm
+
+ real (kind=RKIND), dimension(nVertLevels, nCells) :: thi, tbi, cqwb
+
+ real (kind=RKIND) :: xnutr
+ real (kind=RKIND) :: ztemp, zd, zt, dz, str
+
+ real (kind=RKIND), dimension(nVertLevels ) :: qvb, qvp, zg
+ real (kind=RKIND), dimension(nVertLevels ) :: t_init_1d
+
+ real (kind=RKIND) :: cof1, cof2
+ real (kind=RKIND), pointer :: cf1, cf2, cf3
+ real (kind=RKIND) :: pitop, pibtop, ptopb, ptop, rcp, rcv, p0
+ real (kind=RKIND) :: a_scale
+
+ real (kind=RKIND), dimension(:), pointer :: xCell, yCell, zCell
+ real (kind=RKIND), dimension(:), pointer :: xEdge, yEdge, zEdge
+ real (kind=RKIND), dimension(:), pointer :: xVertex, yVertex, zVertex
+ real (kind=RKIND), dimension(:), pointer :: dcEdge, dvEdge, areaCell, areaTriangle
+ real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex
+ logical, pointer :: on_a_sphere
+ real (kind=RKIND), pointer :: sphere_radius
+ real (kind=RKIND), pointer :: config_ztop
+
+ real (kind=RKIND), dimension(:,:), pointer :: t_init, w, rw, v, rho, theta
+ real (kind=RKIND), dimension(:), pointer :: u_init, v_init, qv_init, angleEdge, fEdge, fVertex
+ real (kind=RKIND) :: u_vel, v_vel, randx
+
+ call mpas_pool_get_array(mesh, 'xCell', xCell)
+ call mpas_pool_get_array(mesh, 'yCell', yCell)
+ call mpas_pool_get_array(mesh, 'zCell', zCell)
+ call mpas_pool_get_array(mesh, 'xEdge', xEdge)
+ call mpas_pool_get_array(mesh, 'yEdge', yEdge)
+ call mpas_pool_get_array(mesh, 'zEdge', zEdge)
+ call mpas_pool_get_array(mesh, 'xVertex', xVertex)
+ call mpas_pool_get_array(mesh, 'yVertex', yVertex)
+ call mpas_pool_get_array(mesh, 'zVertex', zVertex)
+ call mpas_pool_get_array(mesh, 'dvEdge', dvEdge)
+ call mpas_pool_get_array(mesh, 'dcEdge', dcEdge)
+ call mpas_pool_get_array(mesh, 'areaCell', areaCell)
+ call mpas_pool_get_array(mesh, 'areaTriangle', areaTriangle)
+ call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex)
+
+ call mpas_pool_get_config(mesh, 'x_period', x_period)
+ call mpas_pool_get_config(mesh, 'y_period', y_period)
+ call mpas_pool_get_config(mesh, 'on_a_sphere', on_a_sphere)
+ call mpas_pool_get_config(mesh, 'sphere_radius', sphere_radius)
+ call mpas_pool_get_config(configs, 'config_ztop', config_ztop)
+
+ !
+ ! Scale all distances
+ !
+
+ a_scale = 1.0
+
+ xCell(:) = xCell(:) * a_scale
+ yCell(:) = yCell(:) * a_scale
+ zCell(:) = zCell(:) * a_scale
+ xVertex(:) = xVertex(:) * a_scale
+ yVertex(:) = yVertex(:) * a_scale
+ zVertex(:) = zVertex(:) * a_scale
+ xEdge(:) = xEdge(:) * a_scale
+ yEdge(:) = yEdge(:) * a_scale
+ zEdge(:) = zEdge(:) * a_scale
+ dvEdge(:) = dvEdge(:) * a_scale
+ dcEdge(:) = dcEdge(:) * a_scale
+ areaCell(:) = areaCell(:) * a_scale**2.0
+ areaTriangle(:) = areaTriangle(:) * a_scale**2.0
+ kiteAreasOnVertex(:,:) = kiteAreasOnVertex(:,:) * a_scale**2.0
+ x_period = x_period * a_scale
+ y_period = y_period * a_scale
+
+ call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge)
+ call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge)
+ call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge)
+ call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge)
+
+ call mpas_pool_get_dimension(mesh, 'nEdges', nEdges)
+ call mpas_pool_get_dimension(mesh, 'nVertices', nVertices)
+ call mpas_pool_get_dimension(mesh, 'nCellsSolve', nCellsSolve)
+ call mpas_pool_get_dimension(mesh, 'maxEdges', maxEdges)
+ nz1 = nVertLevels
+ nz = nz1 + 1
+
+ call mpas_pool_get_array(mesh, 'zgrid', zgrid)
+ call mpas_pool_get_array(mesh, 'zb', zb)
+ call mpas_pool_get_array(mesh, 'zb3', zb3)
+ call mpas_pool_get_array(mesh, 'rdzw', rdzw)
+ call mpas_pool_get_array(mesh, 'dzu', dzu)
+ call mpas_pool_get_array(mesh, 'rdzu', rdzu)
+ call mpas_pool_get_array(mesh, 'fzm', fzm)
+ call mpas_pool_get_array(mesh, 'fzp', fzp)
+ call mpas_pool_get_array(mesh, 'zxu', zxu)
+ call mpas_pool_get_array(mesh, 'zz', zz)
+ call mpas_pool_get_array(mesh, 'hx', hx)
+ call mpas_pool_get_array(mesh, 'dss', dss)
+ call mpas_pool_get_array(mesh, 't_init', t_init)
+ call mpas_pool_get_array(mesh, 'u_init', u_init)
+ call mpas_pool_get_array(mesh, 'v_init', v_init)
+ call mpas_pool_get_array(mesh, 'qv_init', qv_init)
+ call mpas_pool_get_array(mesh, 'angleEdge', angleEdge)
+ call mpas_pool_get_array(mesh, 'fEdge', fEdge)
+ call mpas_pool_get_array(mesh, 'fVertex', fVertex)
+
+ call mpas_pool_get_array(mesh, 'cf1', cf1)
+ call mpas_pool_get_array(mesh, 'cf2', cf2)
+ call mpas_pool_get_array(mesh, 'cf3', cf3)
+
+ call mpas_pool_get_array(diag, 'pressure_base', ppb)
+ call mpas_pool_get_array(diag, 'exner_base', pb)
+ call mpas_pool_get_array(diag, 'rho_base', rb)
+ call mpas_pool_get_array(diag, 'theta_base', tb)
+ call mpas_pool_get_array(diag, 'rtheta_base', rtb)
+ call mpas_pool_get_array(diag, 'exner', p)
+ call mpas_pool_get_array(diag, 'cqw', cqw)
+
+ call mpas_pool_get_array(diag, 'pressure_p', pp)
+ call mpas_pool_get_array(diag, 'rho_p', rr)
+ call mpas_pool_get_array(diag, 'rtheta_p', rt)
+ call mpas_pool_get_array(diag, 'ru', ru)
+ call mpas_pool_get_array(diag, 'rw', rw)
+ call mpas_pool_get_array(diag, 'v', v)
+ call mpas_pool_get_array(diag, 'rho', rho)
+ call mpas_pool_get_array(diag, 'theta', theta)
+
+ call mpas_pool_get_array(state, 'rho_zz', rho_zz)
+ call mpas_pool_get_array(state, 'theta_m', t)
+ call mpas_pool_get_array(state, 'u', u)
+ call mpas_pool_get_array(state, 'w', w)
+ call mpas_pool_get_array(state, 'scalars', scalars)
+
+ call mpas_pool_get_dimension(state, 'index_qv', index_qv)
+ call mpas_pool_get_dimension(state, 'index_tke', index_tke)
+
+ call mpas_pool_get_array(fg, 'xland', xland)
+ call mpas_pool_get_array(mesh, 'landmask', landmask)
+ call mpas_pool_get_array(mesh, 'lu_index', lu_index)
+
+ scalars(:,:,:) = 0.
+
+ call atm_initialize_advection_rk(mesh, nCells, nEdges, maxEdges, on_a_sphere, sphere_radius )
+ call atm_initialize_deformation_weights(mesh, nCells, on_a_sphere, sphere_radius)
+
+ xnutr = 0.
+ zd = 12000.
+
+ p0 = 1.e+05
+ rcp = rgas/cp
+ rcv = rgas/(cp-rgas)
+
+ call mpas_log_write(' point 1 in test case setup ')
+
+! We may pass in an hx(:,:) that has been precomputed elsewhere.
+! For now it is independent of k
+
+ do iCell=1,nCells
+ do k=1,nz
+ hx(k,iCell) = 0. ! les on a flat Cartesian plane
+ end do
+ end do
+
+! write(0,*) ' dz = ',dz
+ call mpas_log_write(' hx computation complete ')
+
+ ! metrics for hybrid coordinate and vertical stretching
+
+ str = 1.0 ! no stretching in les case: constant dz
+ ! zt = 20000.
+ zt = config_ztop
+ dz = zt/float(nz1)
+
+
+ do k=1,nz
+
+ ! zw(k) = zt*(real(k-1)*dz/zt)**str
+ zw(k) = float(k-1)*dz
+ zc(k) = zw(k)
+!
+! ah(k) governs the transition between terrain-following
+! and pureheight coordinates
+! ah(k) = 0 is a terrain-following coordinate
+! ah(k) = 1 is a height coordinate
+
+! ah(k) = 1.-cos(.5*pii*(k-1)*dz/zt)**6
+ ah(k) = 1.
+! call mpas_log_write(' k, zc, zw, ah = $i $r $r $r', intArgs=(/k/), realArgs=(/zc(k),zw(k),ah(k)/))
+ end do
+ do k=1,nz1
+ dzw (k) = zw(k+1)-zw(k)
+ rdzw(k) = 1./dzw(k)
+ zu(k ) = .5*(zw(k)+zw(k+1))
+ end do
+ do k=2,nz1
+ dzu (k) = .5*(dzw(k)+dzw(k-1))
+ rdzu(k) = 1./dzu(k)
+ fzp (k) = .5* dzw(k )/dzu(k)
+ fzm (k) = .5* dzw(k-1)/dzu(k)
+ rdzwp(k) = dzw(k-1)/(dzw(k )*(dzw(k)+dzw(k-1)))
+ rdzwm(k) = dzw(k )/(dzw(k-1)*(dzw(k)+dzw(k-1)))
+ end do
+
+!********** how are we storing cf1, cf2 and cf3?
+
+ COF1 = (2.*DZU(2)+DZU(3))/(DZU(2)+DZU(3))*DZW(1)/DZU(2)
+ COF2 = DZU(2) /(DZU(2)+DZU(3))*DZW(1)/DZU(3)
+ CF1 = FZP(2) + COF1
+ CF2 = FZM(2) - COF1 - COF2
+ CF3 = COF2
+
+ do iCell=1,nCells
+ do k=1,nz
+ zgrid(k,iCell) = ah(k)*(zc(k)*(1.-hx(k,iCell)/zt)+hx(k,iCell)) &
+ + (1.-ah(k)) * zc(k)
+ end do
+ do k=1,nz1
+ zz (k,iCell) = (zw(k+1)-zw(k))/(zgrid(k+1,iCell)-zgrid(k,iCell))
+ end do
+ end do
+
+ do i=1, nEdges
+ iCell1 = cellsOnEdge(1,i)
+ iCell2 = cellsOnEdge(2,i)
+ do k=1,nz1
+ zxu (k,i) = 0.5 * (zgrid(k,iCell2)-zgrid(k,iCell1) + zgrid(k+1,iCell2)-zgrid(k+1,iCell1)) / dcEdge(i)
+ end do
+ end do
+ do i=1, nCells
+ do k=1,nz1
+ ztemp = .5*(zgrid(k+1,i)+zgrid(k,i))
+ dss(k,i) = 0.
+ ztemp = zgrid(k,i)
+ if(ztemp.gt.zd+.1) then
+ dss(k,i) = dss(k,i)+xnutr*sin(.5*pii*(ztemp-zd)/(zt-zd))**2
+ end if
+ end do
+ end do
+
+!
+! initialization
+!
+ do i=1,nCells
+ do k=1,nz1
+
+ ztemp = .5*(zgrid(k,i)+zgrid(k+1,i))
+
+ ! if(ztemp .gt. ztr) then
+ ! t (k,i) = thetar*exp(9.8*(ztemp-ztr)/(1003.*ttr))
+ ! relhum(k,i) = 0.25
+ ! else
+ ! t (k,i) = 300.+43.*(ztemp/ztr)**1.25
+ ! relhum(k,i) = (1.-0.75*(ztemp/ztr)**1.25)
+ ! if(t(k,i).lt.thetas) t(k,i) = thetas
+ ! end if
+
+ t(k,i) = atm_get_sounding('theta',ztemp)
+ scalars(index_qv,k,i) = atm_get_sounding('qv',ztemp)
+
+ tb(k,i) = t(k,i)
+ thi(k,i) = t(k,i)
+ tbi(k,i) = t(k,i)
+ cqw(k,i) = 1.
+ cqwb(k,i) = 1.
+ end do
+ end do
+
+! set the velocity field - we are on a plane here.
+
+ do i=1, nEdges
+ cell1 = cellsOnEdge(1,i)
+ cell2 = cellsOnEdge(2,i)
+ if(cell1 <= nCellsSolve .or. cell2 <= nCellsSolve) then
+ do k=1,nz1
+ ztemp = .25*( zgrid(k,cell1 )+zgrid(k+1,cell1 ) &
+ +zgrid(k,cell2)+zgrid(k+1,cell2))
+ u_vel = atm_get_sounding('u',ztemp)
+ v_vel = atm_get_sounding('v',ztemp)
+ u(k,i) = cos(angleEdge(i))*u_vel - sin(angleEdge(i))*v_vel
+ if(i == 1 ) then
+ u_init(k) = u_vel
+ v_init(k) = v_vel
+ end if
+ end do
+ end if
+ end do
+
+ call mpas_dmpar_bcast_reals(dminfo, nz1, u_init)
+
+!
+! for reference sounding
+!
+ do itr=1,30
+
+ pitop = 1.-.5*dzw(1)*gravity*(1.+scalars(index_qv,1,1))/(cp*t(1,1)*zz(1,1))
+ pibtop = 1.-.5*dzw(1)*gravity*(1.+qvb(1))/(cp*tb(1,1)*zz(1,1))
+ do k=2,nz1
+ pitop = pitop-dzu(k)*gravity/(cp*cqw(k,1)*.5*(t(k,1)+t(k-1,1)) &
+ *.5*(zz(k,1)+zz(k-1,1)))
+ pibtop = pibtop-dzu(k)*gravity/(cp*cqwb(k,1)*.5*(tb(k,1)+tb(k-1,1)) &
+ *.5*(zz(k,1)+zz(k-1,1)))
+
+ !call mpas_log_write('$i $r $r $r $r', intArgs=(/k/), realArgs=(/pitop,tb(k,1),dzu(k),tb(k,1)/))
+ end do
+ pitop = pitop-.5*dzw(nz1)*gravity*(1.+scalars(index_qv,nz1,1))/(cp*t(nz1,1)*zz(nz1,1))
+ pibtop = pibtop-.5*dzw(nz1)*gravity*(1.+qvb(nz1))/(cp*tb(nz1,1)*zz(nz1,1))
+
+ call mpas_dmpar_bcast_real(dminfo, pitop)
+ call mpas_dmpar_bcast_real(dminfo, pibtop)
+
+ ptopb = p0*pibtop**(1./rcp)
+! call mpas_log_write('ptopb = $r', realArgs=(/0.01_RKIND*ptopb/))
+
+ do i=1, nCells
+ pb(nz1,i) = pibtop+.5*dzw(nz1)*gravity*(1.+qvb(nz1))/(cp*tb(nz1,i)*zz(nz1,i))
+ p (nz1,i) = pitop+.5*dzw(nz1)*gravity*(1.+scalars(index_qv,nz1,i))/(cp*t (nz1,i)*zz(nz1,i))
+ do k=nz1-1,1,-1
+ pb(k,i) = pb(k+1,i) + dzu(k+1)*gravity/(cp*cqwb(k+1,i)*.5*(tb(k,i)+tb(k+1,i)) &
+ *.5*(zz(k,i)+zz(k+1,i)))
+ p (k,i) = p (k+1,i) + dzu(k+1)*gravity/(cp*cqw(k+1,i)*.5*(t (k,i)+t (k+1,i)) &
+ *.5*(zz(k,i)+zz(k+1,i)))
+ end do
+ do k=1,nz1
+ rb (k,i) = pb(k,i)**(1./rcv)/((rgas/p0)*tb(k,i)*zz(k,i))
+ rtb(k,i) = rb(k,i)*tb(k,i)
+ rr (k,i) = p (k,i)**(1./rcv)/((rgas/p0)*t (k,i)*zz(k,i))-rb(k,i)
+ ppb(k,i) = p0*(zz(k,i)*rgas*rtb(k,i)/p0)**(cp/cv)
+ end do
+ end do
+
+ !
+ ! update water vapor mixing ratio from humidity profile
+ !
+ ! do i= 1,nCells
+ ! do k=1,nz1
+ ! temp = p(k,i)*thi(k,i)
+ ! pres = p0*p(k,i)**(1./rcp)
+ ! qvs = 380.*exp(17.27*(temp-273.)/(temp-36.))/pres
+ ! scalars(index_qv,k,i) = min(0.014_RKIND,relhum(k,i)*qvs)
+ ! end do
+ ! end do
+
+ do k=1,nz1
+!*********************************************************************
+! QVB = QV INCLUDES MOISTURE IN REFERENCE STATE
+! qvb(k) = scalars(index_qv,k,1)
+! QVB = 0 PRODUCES DRY REFERENCE STATE
+ qvb(k) = 0.
+ qvp(k) = scalars(index_qv,k,1)*1000.
+ zg(k) = .5*(zgrid(k,1)+zgrid(k+1,1))
+!*********************************************************************
+ end do
+
+ do i= 1,nCells
+ do k=1,nz1
+ t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i))
+ tb(k,i) = tbi(k,i)*(1.+1.61*qvb(k))
+ end do
+ do k=2,nz1
+ cqw (k,i) = 1./(1.+.5*(scalars(index_qv,k,i)+scalars(index_qv,k-1,i)))
+ cqwb(k,i) = 1./(1.+.5*(qvb(k)+qvb(k-1)))
+ end do
+ end do
+
+ end do !end of iteration loop
+
+ call mpas_log_write(' sounding ')
+ call mpas_log_write(' k, zg, rb, tb, rtb, t, rr, p, qvp')
+ do k=1,nVertLevels
+ call mpas_log_write('$i $r $r $r $r $r $r $r $r', intArgs=(/k/), realArgs=(/zg(k),rb(k,1),tb(k,1),rtb(k,1),t(k,1),rr(k,1),p(k,1),qvp(k)/))
+ end do
+
+!
+! potential temperature perturbation
+!
+
+! do i=1,nCells
+! do k = 1,nz1 ! same as in WRF
+! call random_number(randx)
+! thi(k,i) = thi(k,i) + 0.1*(randx-0.5)
+! t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i))
+! end do
+! end do
+
+ do k = 1,nz1 ! same as in WRF
+ if(zg(k) .le. 397.0) then ! the SAS initial PBL height
+ do i=1,nCells
+ call random_number(randx)
+ thi(k,i) = thi(k,i) + 1.0*(randx-0.5)
+ t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i))
+ end do
+ end if
+ end do
+!
+! initial seed for tke
+!
+ scalars(index_tke,:,:) = 0.
+
+ do k = 1,nz1
+ if( zg(k) .le. 255.) then
+ do i=1,nCells
+ !! scalars(index_tke,k,i) = 0.1
+ scalars(index_tke,k,i) = 0.4*((1.-(zg(k)/255.))**3)
+ end do
+ end if
+ end do
+
+ do itr=1,30
+
+ pitop = 1.-.5*dzw(1)*gravity*(1.+scalars(index_qv,1,1))/(cp*t(1,1)*zz(1,1))
+ do k=2,nz1
+ pitop = pitop-dzu(k)*gravity/(cp*cqw(k,1)*.5*(t (k,1)+t (k-1,1)) &
+ *.5*(zz(k,1)+zz(k-1,1)))
+ end do
+ pitop = pitop - .5*dzw(nz1)*gravity*(1.+scalars(index_qv,nz1,1))/(cp*t(nz1,1)*zz(nz1,1))
+ ptop = p0*pitop**(1./rcp)
+! call mpas_log_write('ptop = $r $r', realArgs=(/0.01_RKIND*ptop, 0.01_RKIND*ptopb/))
+
+ call mpas_dmpar_bcast_real(dminfo, ptop)
+
+ do i = 1, nCells
+
+ pp(nz1,i) = ptop-ptopb+.5*dzw(nz1)*gravity* &
+ (rr(nz1,i)+(rr(nz1,i)+rb(nz1,i))*scalars(index_qv,nz1,i))
+ do k=nz1-1,1,-1
+! pp(k,i) = pp(k+1,i)+.5*dzu(k+1)*gravity* &
+! (rr(k ,i)+(rr(k ,i)+rb(k ,i))*scalars(index_qv,k ,i) &
+! +rr(k+1,i)+(rr(k+1,i)+rb(k+1,i))*scalars(index_qv,k+1,i))
+ pp(k,i) = pp(k+1,i)+dzu(k+1)*gravity*( &
+ fzm(k+1)*(rb(k+1,i)*(scalars(index_qv,k+1,i)-qvb(k+1)) &
+ +rr(k+1,i)*(1.+scalars(index_qv,k+1,i))) &
+ +fzp(k+1)*(rb(k ,i)*(scalars(index_qv,k ,i)-qvb(k)) &
+ +rr(k ,i)*(1.+scalars(index_qv,k ,i))))
+ end do
+! if (itr==1.and.i==1) then
+! do k=1,nz1
+! call mpas_log_write('pp-check $r', realArgs=(/pp(k,i)/))
+! end do
+! end if
+ do k=1,nz1
+ rt(k,i) = (pp(k,i)/(rgas*zz(k,i)) &
+ -rtb(k,i)*(p(k,i)-pb(k,i)))/p(k,i)
+ p (k,i) = (zz(k,i)*(rgas/p0)*(rtb(k,i)+rt(k,i)))**rcv
+ rr(k,i) = (rt(k,i)-rb(k,i)*(t(k,i)-tb(k,i)))/t(k,i)
+ end do
+
+ end do ! loop over cells
+
+ end do ! iteration loop
+!----------------------------------------------------------------------
+!
+ do k=1,nz1
+ qv_init(k) = scalars(index_qv,k,1)
+ end do
+
+ t_init_1d(:) = t(:,1)
+ call mpas_dmpar_bcast_reals(dminfo, nz1, t_init_1d)
+ call mpas_dmpar_bcast_reals(dminfo, nz1, qv_init)
+
+ do i=1,nCells
+ do k=1,nz1
+ t_init(k,i) = t_init_1d(k)
+ rho_zz(k,i) = rb(k,i)+rr(k,i)
+ end do
+ end do
+
+ do i=1,nEdges
+ cell1 = cellsOnEdge(1,i)
+ cell2 = cellsOnEdge(2,i)
+ if(cell1 <= nCellsSolve .or. cell2 <= nCellsSolve) then
+ do k=1,nz1
+ ru (k,i) = 0.5*(rho_zz(k,cell1)+rho_zz(k,cell2))*u(k,i)
+ end do
+ end if
+ end do
+
+
+ !
+ ! we are assuming w and rw are zero for this initialization
+ ! i.e., no terrain
+ !
+ rw = 0.0
+ w = 0.0
+
+ zb = 0.0
+ zb3 = 0.0
+
+ !
+ ! Generate rotated Coriolis field - same settings as in WRF
+ !
+ do iEdge=1,nEdges
+ ! fEdge(iEdge) = 1.e-04
+ fEdge(iEdge) = 7.2921e-05
+ end do
+
+ do iVtx=1,nVertices
+ ! fVertex(iVtx) = 1.e-04
+ fVertex(iVtx) = 7.2921e-05
+ end do
+
+ !
+ ! Compute mass fluxes tangential to each edge (i.e., through the faces of dual grid cells)
+ !
+ v(:,:) = 0.0
+ do iEdge = 1, nEdges
+ do i=1,nEdgesOnEdge(iEdge)
+ eoe = edgesOnEdge(i,iEdge)
+ if (eoe > 0) then
+ do k = 1, nVertLevels
+ v(k,iEdge) = v(k,iEdge) + weightsOnEdge(i,iEdge) * u(k, eoe)
+ end do
+ end if
+ end do
+ end do
+
+ ! call mpas_log_write(' k,u_init, t_init, qv_init ')
+ ! do k=1,nVertLevels
+ ! call mpas_log_write('$i $r $r $r', intArgs=(/k/), realArgs=(/u_init(k),t_init(k,1),qv_init(k)/))
+ ! end do
+
+ ! Compute rho and theta from rho_zz and theta_m
+ do iCell=1,nCells
+ do k=1,nVertLevels
+ rho(k,iCell) = rho_zz(k,iCell) * zz(k,iCell)
+ theta(k,iCell) = t(k,iCell) / (1.0 + 1.61 * scalars(index_qv,k,iCell))
+ end do
+ end do
+
+ do iCell = 1, nCells
+ !land category
+ landmask(iCell) = 1
+ lu_index(iCell) = 1
+ xland(iCell) = 1.0
+ if (iCell == 1) call mpas_log_write(' landmask, lu_index, xland $i $i $r', intArgs=(/landmask(iCell),lu_index(iCell)/), realArgs=(/xland(iCell)/))
+ end do
+
+ end subroutine init_atm_case_les
+
+
+ real (kind=RKIND) function atm_get_sounding( variable, height )
+
+ implicit none
+ real (kind=RKIND), intent(in) :: height
+ character(len=*), intent(in) :: variable
+
+ atm_get_sounding = -999.
+! SAS case sounding
+
+ if(variable == 'u') then
+ atm_get_sounding = 2.0 ! SAS value
+
+ else if (variable == 'v') then
+ atm_get_sounding = 0.
+
+ else if (variable == 'theta') then
+
+ if(height .le. 352.5) then
+ atm_get_sounding = 296.6
+ else if(height .le. 442.5) then
+ atm_get_sounding = 296.6 + (height-352.5)*1.5/90.
+ else
+ atm_get_sounding = 298.1 + (height-442.5)*0.003
+ end if
+
+ else if (variable == 'qv') then
+
+ if(height .le. 352.5) then
+ atm_get_sounding = 11.8/1000.
+ else if(height .le. 442.5) then
+ atm_get_sounding = 11.8/1000. - (height-352.5)*4.0/90./1000.
+ else
+ atm_get_sounding = max(7.8/1000. - (height-442.5)*0.004/1000.,0.0)
+ end if
+
+ end if
+
+ end function atm_get_sounding
+
+
+ real (kind=RKIND) function atm_get_sounding_1( variable, height )
+
+ implicit none
+ real (kind=RKIND), intent(in) :: height
+ character(len=*), intent(in) :: variable
+
+ atm_get_sounding_1 = -999.
+
+ if(variable == 'u') then
+ atm_get_sounding_1 = 15.0
+ else if (variable == 'v') then
+ atm_get_sounding_1 = 0.
+ else if (variable == 'qv') then
+
+ atm_get_sounding_1 = 0. ! dry sounding
+ ! atm_get_sounding_1 = 0.010
+ ! if(height .gt. 500.) atm_get_sounding_1 = 0.004
+
+ else if (variable == 'theta') then
+
+ if(height .le. 500.) then
+ atm_get_sounding_1 = 300.
+ else if(height .le. 600.) then
+ atm_get_sounding_1 = 300. + (height-500.)*3./100.
+ else
+ atm_get_sounding_1 = 303. + (height-600.)*3./1000.
+ end if
+ ! atm_get_sounding_1 = atm_get_sounding_1 - 10.0 ! for water case
+
+ end if
+
+ end function atm_get_sounding_1
+
+ real (kind=RKIND) function atm_get_sounding_2( variable, height )
+
+ implicit none
+ real (kind=RKIND), intent(in) :: height
+ character(len=*), intent(in) :: variable
+
+ atm_get_sounding_2 = -999.
+
+ if(variable == 'u') then
+ atm_get_sounding_2 = 10.0
+ else if (variable == 'v') then
+ atm_get_sounding_2 = 0.
+ else if (variable == 'qv') then
+
+ ! atm_get_sounding_2 = 0. ! dry sounding_2
+ atm_get_sounding_2 = 0.010
+ if(height .gt. 1000.) atm_get_sounding_2 = 0.004
+
+ else if (variable == 'theta') then
+
+ if(height .le. 1000.) then
+ atm_get_sounding_2 = 300.
+ else if(height .le. 1150.) then
+ atm_get_sounding_2 = 300. + (height-1000.)*8./150.
+ else
+ atm_get_sounding_2 = 308. + (height-1150.)*3./1000.
+ end if
+ atm_get_sounding_2 = atm_get_sounding_2 - 10.0 ! for water case
+
+ end if
+
+ end function atm_get_sounding_2
+
+!-----------
!-----------------------------------------------------------------------
! routine init_atm_case_cam_mpas
diff --git a/src/framework/Makefile b/src/framework/Makefile
index 2d8e7dc92b..64d3d6faf4 100644
--- a/src/framework/Makefile
+++ b/src/framework/Makefile
@@ -110,7 +110,7 @@ mpas_c_interfacing.o:
xml_stream_parser.o: xml_stream_parser.c
$(CC) $(CFLAGS) $(CPPFLAGS) $(CPPINCLUDES) -I../external/ezxml -c xml_stream_parser.c
-mpas_halo.o: mpas_derived_types.o mpas_pool_routines.o mpas_log.o
+mpas_halo.o: mpas_derived_types.o mpas_pool_routines.o mpas_log.o mpas_timer.o
mpas_stream_inquiry.o : mpas_derived_types.o mpas_log.o mpas_c_interfacing.o
diff --git a/src/framework/mpas_dmpar.F b/src/framework/mpas_dmpar.F
index 6d68c0c656..5d9b48d53b 100644
--- a/src/framework/mpas_dmpar.F
+++ b/src/framework/mpas_dmpar.F
@@ -7448,19 +7448,28 @@ end subroutine mpas_dmpar_exch_group_end_halo_exch!}}}
!> exchange is complete.
!
!-----------------------------------------------------------------------
- subroutine mpas_dmpar_exch_group_full_halo_exch(domain, groupName, iErr)!{{{
+ subroutine mpas_dmpar_exch_group_full_halo_exch(domain, groupName, withGPUAwareMPI, iErr)!{{{
type (domain_type), intent(inout) :: domain
character (len=*), intent(in) :: groupName
+ logical, optional, intent(in) :: withGPUAwareMPI
integer, optional, intent(out) :: iErr
type (mpas_exchange_group), pointer :: exchGroupPtr
integer :: nLen
+ logical :: useGPUAwareMPI
if ( present(iErr) ) then
iErr = MPAS_DMPAR_NOERR
end if
+ useGPUAwareMPI = .false.
+ if (present(withGPUAwareMPI)) then
+ if (withGPUAwareMPI) then
+ call mpas_log_write(' GPU-aware MPI not implemented in this module', MPAS_LOG_CRIT)
+ end if
+ end if
+
nLen = len_trim(groupName)
DMPAR_DEBUG_WRITE(' -- Trying to perform a full exchange for group ' // trim(groupName))
diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F
index 4ab8817c23..8877d6330d 100644
--- a/src/framework/mpas_halo.F
+++ b/src/framework/mpas_halo.F
@@ -280,6 +280,30 @@ subroutine mpas_halo_exch_group_complete(domain, groupName, iErr)
call refactor_lists(domain, groupName, iErr)
+ ! Logic to return early if there are no neighbors to send to
+ if ( newGroup% nGroupSendNeighbors <=0 ) then
+ return
+ end if
+
+
+ ! Always copy in the main data member first
+ !$acc enter data copyin(newGroup)
+ ! Then the data in the members of the type
+ !$acc enter data copyin(newGroup % recvBuf(:), newGroup % sendBuf(:))
+ ! !$acc enter data copyin(newGroup % sendBuf(:))
+ !$acc enter data copyin(newGroup % fields(:))
+ do i = 1, newGroup % nFields
+ !$acc enter data copyin(newGroup % fields(i))
+ !$acc enter data copyin(newGroup % fields(i) % nSendLists(:,:))
+ !$acc enter data copyin(newGroup % fields(i) % packOffsets(:))
+ !$acc enter data copyin(newGroup % fields(i) % sendListSrc(:,:,:))
+ !$acc enter data copyin(newGroup % fields(i) % sendListDst(:,:,:))
+ !$acc enter data copyin(newGroup % fields(i) % nRecvLists(:,:))
+ !$acc enter data copyin(newGroup % fields(i) % unpackOffsets(:))
+ !$acc enter data copyin(newGroup % fields(i) % recvListSrc(:,:,:))
+ !$acc enter data copyin(newGroup % fields(i) % recvListDst(:,:,:))
+ end do
+
end subroutine mpas_halo_exch_group_complete
@@ -349,15 +373,26 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr)
deallocate(cursor % fields(i) % compactHaloInfo)
deallocate(cursor % fields(i) % compactSendLists)
deallocate(cursor % fields(i) % compactRecvLists)
+ !$acc exit data delete(cursor % fields(i) % nSendLists(:,:))
deallocate(cursor % fields(i) % nSendLists)
+ !$acc exit data delete(cursor % fields(i) % sendListSrc(:,:,:))
deallocate(cursor % fields(i) % sendListSrc)
+ !$acc exit data delete(cursor % fields(i) % sendListDst(:,:,:))
deallocate(cursor % fields(i) % sendListDst)
+ !$acc exit data delete(cursor % fields(i) % packOffsets(:))
deallocate(cursor % fields(i) % packOffsets)
+ !$acc exit data delete(cursor % fields(i) % nRecvLists(:,:))
deallocate(cursor % fields(i) % nRecvLists)
+ !$acc exit data delete(cursor % fields(i) % recvListSrc(:,:,:))
deallocate(cursor % fields(i) % recvListSrc)
+ !$acc exit data delete(cursor % fields(i) % recvListDst(:,:,:))
deallocate(cursor % fields(i) % recvListDst)
+ !$acc exit data delete(cursor % fields(i) % unpackOffsets(:))
deallocate(cursor % fields(i) % unpackOffsets)
+ !$acc exit data delete(cursor % fields(i))
end do
+ ! Use finalize here in-case the copyins in ..._complete increment the reference counter
+ !$acc exit data finalize delete(cursor % fields(:))
deallocate(cursor % fields)
deallocate(cursor % groupPackOffsets)
deallocate(cursor % groupSendNeighbors)
@@ -368,10 +403,14 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr)
deallocate(cursor % groupToFieldRecvIdx)
deallocate(cursor % groupRecvOffsets)
deallocate(cursor % groupRecvCounts)
+ !$acc exit data delete(cursor % sendBuf(:))
deallocate(cursor % sendBuf)
+ !$acc exit data delete(cursor % recvBuf(:))
deallocate(cursor % recvBuf)
deallocate(cursor % sendRequests)
deallocate(cursor % recvRequests)
+ ! Finalize here as well, just in-case
+ !$acc exit data finalize delete(cursor)
deallocate(cursor)
end subroutine mpas_halo_exch_group_destroy
@@ -495,7 +534,7 @@ end subroutine mpas_halo_exch_group_add_field
!> exchange group.
!
!-----------------------------------------------------------------------
- subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
+ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMPI, iErr)
#ifdef MPAS_USE_MPI_F08
use mpi_f08, only : MPI_Datatype, MPI_Comm
@@ -508,6 +547,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
use mpas_derived_types, only : domain_type, mpas_halo_group, MPAS_HALO_REAL, MPAS_LOG_CRIT
use mpas_pool_routines, only : mpas_pool_get_array
use mpas_log, only : mpas_log_write
+ use mpas_timer, only : mpas_timer_start, mpas_timer_stop
! Parameters
#ifdef MPAS_USE_MPI_F08
@@ -527,6 +567,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
! Arguments
type (domain_type), intent(inout) :: domain
character (len=*), intent(in) :: groupName
+ logical, optional, intent(in) :: withGPUAwareMPI
integer, optional, intent(out) :: iErr
! Local variables
@@ -542,6 +583,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
integer :: comm
#endif
integer :: mpi_ierr
+ logical:: useGPUAwareMPI
type (mpas_halo_group), pointer :: group
integer, dimension(:), pointer :: compactHaloInfo
integer, dimension(:), pointer :: compactSendLists
@@ -554,12 +596,17 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
integer :: maxNRecvList
integer, dimension(:,:,:), CONTIGUOUS pointer :: recvListSrc, recvListDst
integer, dimension(:), CONTIGUOUS pointer :: unpackOffsets
-
+
if (present(iErr)) then
iErr = 0
end if
+ useGPUAwareMPI = .false.
+ if (present(withGPUAwareMPI)) then
+ useGPUAwareMPI = withGPUAwareMPI
+ end if
+
!
! Find this halo exhange group in the list of groups
!
@@ -577,6 +624,12 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
messageType=MPAS_LOG_CRIT)
end if
+ ! Logic to return early if there are no neighbors to send to
+ if ( group% nGroupSendNeighbors <= 0 ) then
+ return
+ end if
+
+ call mpas_timer_start('full_halo_exch')
!
! Get the rank of this task and the MPI communicator to use from the first field in
! the group; all fields should be using the same communicator, so this should not
@@ -589,6 +642,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
#endif
rank = group % fields(1) % compactHaloInfo(8)
+ !$acc data present(group % recvBuf(:), group % sendBuf(:)) if(useGPUAwareMPI)
!
! Initiate non-blocking MPI receives for all neighbors
@@ -598,9 +652,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
bufstart = group % groupRecvOffsets(i)
bufend = group % groupRecvOffsets(i) + group % groupRecvCounts(i) - 1
!TO DO: how do we determine appropriate type here?
+ !$acc host_data use_device(group % recvBuf) if(useGPUAwareMPI)
call MPI_Irecv(group % recvBuf(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
group % groupRecvNeighbors(i), group % groupRecvNeighbors(i), comm, &
group % recvRequests(i), mpi_ierr)
+ !$acc end host_data
else
group % recvRequests(i) = MPI_REQUEST_NULL
end if
@@ -638,12 +694,16 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
case (1)
call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
group % fields(i) % r1arr, timeLevel=group % fields(i) % timeLevel)
-
+
!
! Pack send buffer for all neighbors for current field
!
+ call mpas_timer_start('packing_halo_exch')
+ !$acc parallel default(present) attach(group % fields(i) % r1arr) if(useGPUAwareMPI)
+ !$acc loop gang collapse(2)
do iEndp = 1, nSendEndpts
do iHalo = 1, nHalos
+ !$acc loop vector
do j = 1, maxNSendList
if (j <= nSendLists(iHalo,iEndp)) then
group % sendBuf(packOffsets(iEndp) + sendListDst(j,iHalo,iEndp)) = &
@@ -652,20 +712,31 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
end do
end do
end do
-
+ !$acc end parallel
+ call mpas_timer_stop('packing_halo_exch')
+
!
! Packing code for 2-d real-valued fields
!
case (2)
call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
group % fields(i) % r2arr, timeLevel=group % fields(i) % timeLevel)
-
+
!
! Pack send buffer for all neighbors for current field
- !
+ !
+ ! Use data regions for specificity and so the reference or attachment counters are easier to make sense of
+ ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action'
+ ! !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
+
+
+ call mpas_timer_start('packing_halo_exch')
+ !$acc parallel default(present) attach(group % fields(i) % r2arr) if(useGPUAwareMPI)
+ !$acc loop gang collapse(3)
do iEndp = 1, nSendEndpts
do iHalo = 1, nHalos
do j = 1, maxNSendList
+ !$acc loop vector
do i1 = 1, dim1
if (j <= nSendLists(iHalo,iEndp)) then
group % sendBuf(packOffsets(iEndp) + dim1 * (sendListDst(j,iHalo,iEndp) - 1) + i1) = &
@@ -675,21 +746,27 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
end do
end do
end do
-
+ !$acc end parallel
+ call mpas_timer_stop('packing_halo_exch')
+
!
! Packing code for 3-d real-valued fields
!
case (3)
call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), &
- group % fields(i) % r3arr, group % fields(i) % timeLevel)
-
+ group % fields(i) % r3arr, group % fields(i) % timeLevel)
+
!
! Pack send buffer for all neighbors for current field
!
+ call mpas_timer_start('packing_halo_exch')
+ !$acc parallel default(present) attach(group % fields(i) % r3arr) if(useGPUAwareMPI)
+ !$acc loop gang collapse(4)
do iEndp = 1, nSendEndpts
do iHalo = 1, nHalos
do j = 1, maxNSendList
do i2 = 1, dim2
+ !$acc loop vector
do i1 = 1, dim1
if (j <= nSendLists(iHalo,iEndp)) then
group % sendBuf(packOffsets(iEndp) + dim1*dim2*(sendListDst(j,iHalo,iEndp) - 1) &
@@ -701,6 +778,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
end do
end do
end do
+ !$acc end parallel
+ call mpas_timer_stop('packing_halo_exch')
end select
end if
@@ -714,9 +793,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
bufstart = group % groupSendOffsets(i)
bufend = group % groupSendOffsets(i) + group % groupSendCounts(i) - 1
!TO DO: how do we determine appropriate type here?
+ !$acc host_data use_device(group % sendBuf) if(useGPUAwareMPI)
call MPI_Isend(group % sendBuf(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
group % groupSendNeighbors(i), rank, comm, &
group % sendRequests(i), mpi_ierr)
+ !$acc end host_data
else
group % sendRequests(i) = MPI_REQUEST_NULL
end if
@@ -771,7 +852,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
!
! Unpack recv buffer from all neighbors for current field
!
+ call mpas_timer_start('unpacking_halo_exch')
+ !$acc parallel default(present) attach(group % fields(i) % r1arr) if(useGPUAwareMPI)
+ !$acc loop gang
do iHalo = 1, nHalos
+ !$acc loop vector
do j = 1, maxNRecvList
if (j <= nRecvLists(iHalo,iEndp)) then
group % fields(i) % r1arr(recvListDst(j,iHalo,iEndp)) = &
@@ -779,6 +864,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
end if
end do
end do
+ !$acc end parallel
+ call mpas_timer_stop('unpacking_halo_exch')
!
! Unpacking code for 2-d real-valued fields
@@ -787,8 +874,13 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
!
! Unpack recv buffer from all neighbors for current field
!
+ call mpas_timer_start('unpacking_halo_exch')
+ !$acc parallel default(present) attach(group % fields(i) % r2arr) if(useGPUAwareMPI)
+ !$acc loop gang
do iHalo = 1, nHalos
+ !$acc loop worker
do j = 1, maxNRecvList
+ !$acc loop vector
do i1 = 1, dim1
if (j <= nRecvLists(iHalo,iEndp)) then
group % fields(i) % r2arr(i1, recvListDst(j,iHalo,iEndp)) = &
@@ -797,6 +889,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
end do
end do
end do
+ !$acc end parallel
+ call mpas_timer_stop('unpacking_halo_exch')
!
! Unpacking code for 3-d real-valued fields
@@ -805,8 +899,12 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
!
! Unpack recv buffer from all neighbors for current field
!
+ call mpas_timer_start('unpacking_halo_exch')
+ !$acc parallel default(present) attach(group % fields(i) % r3arr) if(useGPUAwareMPI)
+ !$acc loop gang collapse(2)
do iHalo = 1, nHalos
do j = 1, maxNRecvList
+ !$acc loop vector collapse(2)
do i2 = 1, dim2
do i1 = 1, dim1
if (j <= nRecvLists(iHalo,iEndp)) then
@@ -818,12 +916,16 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
end do
end do
end do
+ !$acc end parallel
+ call mpas_timer_stop('unpacking_halo_exch')
end select
end if
end do
end do
-
+ ! For the present(group % recvBuf(:), group % sendBuf(:))
+ !$acc end data
+
!
! Nullify array pointers - not necessary for correctness, but helpful when debugging
! to not leave pointers to what might later be incorrect targets
@@ -843,6 +945,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
!
call MPI_Waitall(group % nGroupSendNeighbors, group % sendRequests, MPI_STATUSES_IGNORE, mpi_ierr)
+ call mpas_timer_stop('full_halo_exch')
+
end subroutine mpas_halo_exch_group_full_halo_exch
diff --git a/src/operators/mpas_vector_reconstruction.F b/src/operators/mpas_vector_reconstruction.F
index 605da9cd6d..88d87474ab 100644
--- a/src/operators/mpas_vector_reconstruction.F
+++ b/src/operators/mpas_vector_reconstruction.F
@@ -202,7 +202,8 @@ end subroutine mpas_init_reconstruct!}}}
!> Input: grid meta data and vector component data residing at cell edges
!> Output: reconstructed vector field (measured in X,Y,Z) located at cell centers
!-----------------------------------------------------------------------
- subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, uReconstructZonal, uReconstructMeridional, includeHalos)!{{{
+ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, &
+ uReconstructZonal, uReconstructMeridional, includeHalos, lACC)!{{{
implicit none
@@ -214,9 +215,11 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZonal !< Output: Zonal Component of velocity reconstructed to cell centers
real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructMeridional !< Output: Meridional Component of velocity reconstructed to cell centers
logical, optional, intent(in) :: includeHalos !< Input: Optional logical that allows reconstruction over halo regions
+ logical, optional, intent(in) :: lACC !< Input: Optional logical that controls execution on the GPU with OpenACC
! temporary arrays needed in the compute procedure
logical :: includeHalosLocal
+ logical :: lACCLocal
integer, pointer :: nCells_ptr, nVertLevels_ptr
integer :: nCells, nVertLevels
integer, dimension(:,:), pointer :: edgesOnCell
@@ -236,6 +239,12 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
includeHalosLocal = .false.
end if
+ if ( present(lACC) ) then
+ lACCLocal = lACC
+ else
+ lACCLocal = .false.
+ end if
+
! stored arrays used during compute procedure
call mpas_pool_get_array(meshPool, 'coeffs_reconstruct', coeffs_reconstruct)
@@ -258,19 +267,9 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
call mpas_pool_get_config(meshPool, 'on_a_sphere', on_a_sphere)
- MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]')
- ! Only use sections needed, nCells may be all cells or only non-halo cells
- !$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), &
- !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells))
- !$acc enter data copyin(u(:,:))
- !$acc enter data create(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), &
- !$acc uReconstructZ(:,1:nCells),uReconstructZonal(:,1:nCells), &
- !$acc uReconstructMeridional(:,1:nCells))
- MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]')
-
! loop over cell centers
!$omp do schedule(runtime)
- !$acc parallel default(present)
+ !$acc parallel default(present) if(lACCLocal)
!$acc loop gang
do iCell = 1, nCells
! initialize the reconstructed vectors
@@ -305,7 +304,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
if (on_a_sphere) then
!$omp do schedule(runtime)
- !$acc parallel default(present)
+ !$acc parallel default(present) if(lACCLocal)
!$acc loop gang
do iCell = 1, nCells
clat = cos(latCell(iCell))
@@ -325,7 +324,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
!$omp end do
else
!$omp do schedule(runtime)
- !$acc parallel default(present)
+ !$acc parallel default(present) if(lACCLocal)
!$acc loop gang vector collapse(2)
do iCell = 1, nCells
do k = 1, nVertLevels
@@ -337,6 +336,109 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
!$omp end do
end if
+ end subroutine mpas_reconstruct_2d!}}}
+
+
+ subroutine mpas_reconstruct_2d_h2d(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, uReconstructZonal, uReconstructMeridional, includeHalos)!{{{
+
+ implicit none
+
+ type (mpas_pool_type), intent(in) :: meshPool !< Input: Mesh information
+ real (kind=RKIND), dimension(:,:), intent(in) :: u !< Input: Velocity field on edges
+ real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructX !< Output: X Component of velocity reconstructed to cell centers
+ real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructY !< Output: Y Component of velocity reconstructed to cell centers
+ real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZ !< Output: Z Component of velocity reconstructed to cell centers
+ real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZonal !< Output: Zonal Component of velocity reconstructed to cell centers
+ real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructMeridional !< Output: Meridional Component of velocity reconstructed to cell centers
+ logical, optional, intent(in) :: includeHalos !< Input: Optional logical that allows reconstruction over halo regions
+
+ logical :: includeHalosLocal
+ integer, dimension(:,:), pointer :: edgesOnCell
+ integer, dimension(:), pointer :: nEdgesOnCell
+ integer :: nCells
+ integer, pointer :: nCells_ptr
+ real(kind=RKIND), dimension(:), pointer :: latCell, lonCell
+ real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct
+
+ if ( present(includeHalos) ) then
+ includeHalosLocal = includeHalos
+ else
+ includeHalosLocal = .false.
+ end if
+
+ ! stored arrays used during compute procedure
+ call mpas_pool_get_array(meshPool, 'coeffs_reconstruct', coeffs_reconstruct)
+
+ ! temporary variables
+ call mpas_pool_get_array(meshPool, 'nEdgesOnCell', nEdgesOnCell)
+ call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell)
+ call mpas_pool_get_array(meshPool, 'latCell', latCell)
+ call mpas_pool_get_array(meshPool, 'lonCell', lonCell)
+
+ if ( includeHalosLocal ) then
+ call mpas_pool_get_dimension(meshPool, 'nCells', nCells_ptr)
+ else
+ call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCells_ptr)
+ end if
+ nCells = nCells_ptr
+
+ MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]')
+ ! Only use sections needed, nCells may be all cells or only non-halo cells
+ !$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), &
+ !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells))
+ !$acc enter data copyin(u(:,:))
+ !$acc enter data create(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), &
+ !$acc uReconstructZ(:,1:nCells),uReconstructZonal(:,1:nCells), &
+ !$acc uReconstructMeridional(:,1:nCells))
+ MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]')
+
+ end subroutine mpas_reconstruct_2d_h2d
+
+
+
+ subroutine mpas_reconstruct_2d_d2h(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, uReconstructZonal, uReconstructMeridional, includeHalos)!{{{
+
+ implicit none
+
+ type (mpas_pool_type), intent(in) :: meshPool !< Input: Mesh information
+ real (kind=RKIND), dimension(:,:), intent(in) :: u !< Input: Velocity field on edges
+ real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructX !< Output: X Component of velocity reconstructed to cell centers
+ real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructY !< Output: Y Component of velocity reconstructed to cell centers
+ real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZ !< Output: Z Component of velocity reconstructed to cell centers
+ real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZonal !< Output: Zonal Component of velocity reconstructed to cell centers
+ real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructMeridional !< Output: Meridional Component of velocity reconstructed to cell centers
+ logical, optional, intent(in) :: includeHalos !< Input: Optional logical that allows reconstruction over halo regions
+
+ logical :: includeHalosLocal
+ integer, dimension(:,:), pointer :: edgesOnCell
+ integer, dimension(:), pointer :: nEdgesOnCell
+ integer :: nCells
+ integer, pointer :: nCells_ptr
+ real(kind=RKIND), dimension(:), pointer :: latCell, lonCell
+ real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct
+
+ if ( present(includeHalos) ) then
+ includeHalosLocal = includeHalos
+ else
+ includeHalosLocal = .false.
+ end if
+
+ ! stored arrays used during compute procedure
+ call mpas_pool_get_array(meshPool, 'coeffs_reconstruct', coeffs_reconstruct)
+
+ ! temporary variables
+ call mpas_pool_get_array(meshPool, 'nEdgesOnCell', nEdgesOnCell)
+ call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell)
+ call mpas_pool_get_array(meshPool, 'latCell', latCell)
+ call mpas_pool_get_array(meshPool, 'lonCell', lonCell)
+
+ if ( includeHalosLocal ) then
+ call mpas_pool_get_dimension(meshPool, 'nCells', nCells_ptr)
+ else
+ call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCells_ptr)
+ end if
+ nCells = nCells_ptr
+
MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]')
!$acc exit data delete(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), &
!$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells))
@@ -346,7 +448,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon
!$acc uReconstructMeridional(:,1:nCells))
MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]')
- end subroutine mpas_reconstruct_2d!}}}
+ end subroutine mpas_reconstruct_2d_d2h
!***********************************************************************