diff --git a/src/core_atmosphere/Registry.xml b/src/core_atmosphere/Registry.xml index 4281c40bba..9bfc99f5c8 100644 --- a/src/core_atmosphere/Registry.xml +++ b/src/core_atmosphere/Registry.xml @@ -146,6 +146,31 @@ description="Formulation of horizontal mixing" possible_values="`2d_fixed' or `2d_smagorinsky'"/> + + + + + + + + + + + + + @@ -494,6 +528,11 @@ + + + + + #ifdef MPAS_CAM_DYCORE @@ -1561,6 +1600,20 @@ #endif + + + + + + + + + + + #endif @@ -1921,8 +1977,20 @@ - + + + + + + + + + @@ -2024,6 +2092,9 @@ + + #endif @@ -2102,6 +2173,10 @@ + + diff --git a/src/core_atmosphere/dynamics/Makefile b/src/core_atmosphere/dynamics/Makefile index 6892633c68..1bf21fff9d 100644 --- a/src/core_atmosphere/dynamics/Makefile +++ b/src/core_atmosphere/dynamics/Makefile @@ -1,11 +1,12 @@ .SUFFIXES: .F .o OBJS = mpas_atm_time_integration.o \ - mpas_atm_boundaries.o + mpas_atm_boundaries.o \ + mpas_atm_dissipation_models.o all: $(OBJS) -mpas_atm_time_integration.o: mpas_atm_boundaries.o mpas_atm_iau.o +mpas_atm_time_integration.o: mpas_atm_boundaries.o mpas_atm_iau.o mpas_atm_dissipation_models.o mpas_atm_boundaries.o: diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F index 787e7719a1..6c19ed7931 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F +++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F @@ -395,18 +395,14 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t nullify(tend) call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1) - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]') if (associated(tend)) then - !$acc enter data copyin(tend) else call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1) - !$acc enter data copyin(tend_scalars) ! Ensure the integer pointed to by idx_ptr is copied to the gpu device call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr) idx = idx_ptr end if - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]') !$acc parallel default(present) if (associated(tend)) then @@ -426,13 +422,6 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t end if !$acc end parallel - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]') - if (associated(tend)) then - !$acc exit data delete(tend) - else - !$acc exit data delete(tend_scalars) - end if - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]') end subroutine mpas_atm_get_bdy_tend @@ -533,9 +522,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del ! query the field as a scalar constituent ! if (associated(tend) .and. associated(state)) then - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc enter data copyin(tend, state) - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang vector collapse(2) @@ -546,9 +532,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del end do !$acc end parallel - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc exit data delete(tend, state) - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') else call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1) call mpas_pool_get_array(lbc, 'lbc_scalars', state_scalars, 2) @@ -556,10 +539,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del idx=idx_ptr ! Avoid non-array pointer for OpenACC - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc enter data copyin(tend_scalars, state_scalars) - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang vector collapse(2) do i=1, horizDim+1 @@ -569,9 +548,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del end do !$acc end parallel - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') - !$acc exit data delete(tend_scalars, state_scalars) - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]') end if end subroutine mpas_atm_get_bdy_state_2d @@ -652,10 +628,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1) call mpas_pool_get_array(lbc, 'lbc_'//trim(field), state, 2) - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - !$acc enter data copyin(tend, state) - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang vector collapse(3) do i=1, horizDim+1 @@ -667,10 +639,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim, end do !$acc end parallel - MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - !$acc exit data delete(tend, state) - MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]') - end subroutine mpas_atm_get_bdy_state_3d diff --git a/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F new file mode 100644 index 0000000000..450ed045c1 --- /dev/null +++ b/src/core_atmosphere/dynamics/mpas_atm_dissipation_models.F @@ -0,0 +1,1622 @@ +! Copyright (c) 2026, The University Corporation for Atmospheric Research (UCAR). +! +! Unless noted otherwise source code is licensed under the BSD license. +! Additional copyright and license information can be found in the LICENSE file +! distributed with this code, or at http://mpas-dev.github.com/license.html +! + +#define COMMA , +#define DEBUG_WRITE(M) ! call mpas_log_write(M) + +module mpas_atm_dissipation_models + + use mpas_kind_types, only : RKIND + use mpas_atmphys_constants + use mpas_constants + use mpas_log + use mpas_timekeeping, only : mpas_get_clock_time, mpas_get_time + use mpas_derived_types, only : MPAS_Clock_type, MPAS_Time_type, MPAS_NOW, MPAS_LOG_CRIT + + logical, parameter :: les_test = .true., les_sas_test = .false. + !! real (kind=RKIND), parameter :: tke_heat_flux = 0.03 ! shear case from Moeng et al., first hour + ! real (kind=RKIND), parameter :: tke_heat_flux = 0.03 + ! real (kind=RKIND), parameter :: tke_heat_flux = 0.0 + !! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0013 ! ocean roughness length + ! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.006 + ! real (kind=RKIND), parameter :: tke_drag_coefficient = 0.0 + real (kind=RKIND), parameter :: epsilon_bv = 1.e-06 + ! real (kind=RKIND), parameter :: c_k = 0.1 + real (kind=RKIND), parameter :: c_k = 0.25 + + integer, parameter :: LES_INVALID_OPT = -1 + + integer, parameter :: LES_MODEL_NONE = 0, & + LES_MODEL_3D_SMAGORINSKY = 1, & + LES_MODEL_PROGNOSTIC_15_ORDER = 2 + + integer, parameter :: LES_SURFACE_NONE = 0, & + LES_SURFACE_SPECIFIED = 1, & + LES_SURFACE_VARYING = 2 + +contains + + + !----------------------------------------------------------------------- + ! routine les_model_from_string + ! + !> \brief Converts an LES model option from a string to an integer parameter + !> \author Michael Duda + !> \date 13 February 2026 + !> \details + !> Given a string that contains the name of a valid LES model option, this + !> routine returns an integer parameter corresponding to that option. + !> + !> If the given string is not recognized as a valid LES model option, the + !> integer parameter LES_INVALID_OPT is returned. + ! + !----------------------------------------------------------------------- + pure function les_model_from_string(les_model_str) result(les_model_opt) + + implicit none + + ! Arguments + character(len=*), intent(in) :: les_model_str + + ! Return value + integer :: les_model_opt + + + if (trim(les_model_str) == 'none') then + les_model_opt = LES_MODEL_NONE + else if (trim(les_model_str) == '3d_smagorinsky') then + les_model_opt = LES_MODEL_3D_SMAGORINSKY + else if (trim(les_model_str) == 'prognostic_1.5_order') then + les_model_opt = LES_MODEL_PROGNOSTIC_15_ORDER + else + les_model_opt = LES_INVALID_OPT + end if + + end function les_model_from_string + + + !----------------------------------------------------------------------- + ! routine les_surface_from_string + ! + !> \brief Converts an LES surface option from a string to an integer parameter + !> \author Michael Duda + !> \date 13 February 2026 + !> \details + !> Given a string that contains the name of a valid LES surface option, this + !> routine returns an integer parameter corresponding to that option. + !> + !> If the given string is not recognized as a valid LES surface option, the + !> integer parameter LES_INVALID_OPT is returned. + ! + !----------------------------------------------------------------------- + pure function les_surface_from_string(les_surface_str) result(les_surface_opt) + + implicit none + + ! Arguments + character(len=*), intent(in) :: les_surface_str + + ! Return value + integer :: les_surface_opt + + + if (trim(les_surface_str) == 'none') then + les_surface_opt = LES_SURFACE_NONE + else if (trim(les_surface_str) == 'specified') then + les_surface_opt = LES_SURFACE_SPECIFIED + else if (trim(les_surface_str) == 'varying') then + les_surface_opt = LES_SURFACE_VARYING + else + les_surface_opt = LES_INVALID_OPT + end if + + end function les_surface_from_string + + + subroutine smagorinsky_2d( kdiff, u, v, c_s, config_len_disp, & + deformation_coef_c2, deformation_coef_s2, deformation_coef_cs, & + invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4, & + cellStart, cellEnd, nEdgesOnCell, edgesOnCell, & + nCells, nEdges ) + + use mpas_atm_dimensions ! pull nVertLevels and maxEdges from here + + implicit none + + integer, intent(in) :: cellStart, cellEnd, nCells, nEdges + real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u + real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v + real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_c2 + real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_s2 + real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_cs + real (kind=RKIND), intent(in) :: c_s, config_len_disp, invDt, config_visc4_2dsmag + integer, dimension(nCells+1), intent(in) :: nEdgesOnCell + integer, dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell + + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: kdiff + real (kind=RKIND), intent(out) :: h_mom_eddy_visc4, h_theta_eddy_visc4 + + ! local variables + + integer :: iCell, iEdge, k + real (kind=RKIND), dimension(nVertLevels) :: d_11, d_22, d_12, dudx, dudy, dvdx, dvdy + + + DEBUG_WRITE(' begin smagorinsky_2d ') + + !$acc enter data create(dudx, dudy, dvdx, dvdy) + !$acc enter data create(d_11, d_22, d_12) + + !$acc parallel default(present) + + !$acc loop gang worker private(dudx, dudy, dvdx, dvdy, d_11, d_22, d_12) + do iCell = cellStart,cellEnd + + !$acc loop vector + do k = 1, nVertLevels + dudx(k) = 0.0_RKIND + dudy(k) = 0.0_RKIND + dvdx(k) = 0.0_RKIND + dvdy(k) = 0.0_RKIND + end do + + !$acc loop seq + do iEdge=1,nEdgesOnCell(iCell) + !$acc loop vector + do k=1,nVertLevels + dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) & + - deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell)) + dudy(k) = dudy(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) & + - deformation_coef_s2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell)) + dvdx(k) = dvdx(k) + deformation_coef_cs(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) & + + deformation_coef_c2(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell)) + dvdy(k) = dvdy(k) + deformation_coef_s2(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) & + + deformation_coef_cs(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell)) + end do + end do + +!DIR$ IVDEP + !$acc loop vector + do k=1, nVertLevels + ! here is the Smagorinsky formulation, + ! followed by imposition of an upper bound on the eddy viscosity + d_11(k) = 2*dudx(k) + d_22(k) = 2*dvdy(k) + d_12(k) = dudy(k) + dvdx(k) + kdiff(k,iCell) = (c_s * config_len_disp)**2 * sqrt(0.25*(d_11(k)-d_22(k))**2 + d_12(k)**2) + kdiff(k,iCell) = min(kdiff(k,iCell),(0.01*config_len_disp**2) * invDt) + end do + end do + + !$acc end parallel + + !$acc exit data delete(dudx, dudy, dvdx, dvdy) + !$acc exit data delete(d_11, d_22, d_12) + + h_mom_eddy_visc4 = config_visc4_2dsmag * config_len_disp**3 + h_theta_eddy_visc4 = h_mom_eddy_visc4 + + DEBUG_WRITE(' exiting smagorinsky_2d ') + + end subroutine smagorinsky_2d + +!--------------------------------------- + + subroutine les_models( les_model_opt, les_surface_opt, dynamics_substep, eddy_visc_horz, eddy_visc_vert, & + u, v, uCell, vCell, & + w, c_s, bv_freq2, zgrid, config_len_disp, & + deformation_coef_c2, deformation_coef_s2, deformation_coef_cs, & + deformation_coef_c, deformation_coef_s, prandtl_3d_inv, & + invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4, & + scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2, & + cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge, & + nCells, nEdges, nVertLevels, maxEdges, num_scalars ) + + implicit none + + integer, intent(in) :: les_model_opt + integer, intent(in) :: les_surface_opt + + integer, intent(in) :: cellStart, cellEnd, nCells, nEdges, nVertLevels, maxEdges, index_tke, num_scalars + integer, intent(in) :: dynamics_substep + real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u + real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: uCell + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: vCell + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: bv_freq2 + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: prandtl_3d_inv + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz + real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(inout) :: scalars + real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1) :: tend_scalars + real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w + real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid + real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_c2 + real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_s2 + real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_cs + real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_c + real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: deformation_coef_s + real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2 + real (kind=RKIND), intent(in) :: c_s, config_len_disp, invDt, config_visc4_2dsmag + integer, dimension(nCells+1), intent(in) :: nEdgesOnCell + integer, dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell + integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge + + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: eddy_visc_horz, eddy_visc_vert + real (kind=RKIND), intent(out) :: h_mom_eddy_visc4, h_theta_eddy_visc4 + + ! local variables + + integer :: iCell, iEdge, k, ie, cell1, cell2 + real (kind=RKIND), dimension(nVertLevels) :: d_11, d_22, d_33, d_12, d_13, d_23 + real (kind=RKIND), dimension(nVertLevels) :: dudx, dudy, dvdx, dvdy + real (kind=RKIND), dimension(nVertLevels+1) :: dwdx, dwdy + real (kind=RKIND), dimension(nVertLevels) :: dudz, dvdz, dwdz + real (kind=RKIND) :: rdz, def2, pr_inv, wk + real (kind=RKIND) :: shear_production, buoyancy, dissipation, delta_z, delta_s, bv, tke_length, diss_length + real (kind=RKIND) :: l_horizontal, l_vertical, c_dissipation + real (kind=RKIND) :: prandtl_horizontal_inv + real (kind=RKIND) :: eddy_visc_h, eddy_visc_v + + logical, parameter :: test_tke=.true. + ! real (kind=RKIND), parameter :: epsilon_bv = 1.e-06 + + + !$acc enter data create(dudx, dudy, dvdx, dvdy, dwdx, dwdy, dudz, dvdz, dwdz) + !$acc enter data create(d_11, d_22, d_33, d_12, d_13, d_23) + + pr_inv = 1./prandtl + + ! set up coefficients for 4th-order horizontal background filter + + h_mom_eddy_visc4 = config_visc4_2dsmag * config_len_disp**3 + h_theta_eddy_visc4 = h_mom_eddy_visc4 + + !$acc parallel default(present) + + !$acc loop gang worker private(dudx, dudy, dvdx, dvdy, dwdx, dwdy, dudz, dvdz, dwdz, d_11, d_22, d_33, d_12, d_13, d_23) + do iCell = cellStart,cellEnd + + !$acc loop vector + do k = 1, nVertLevels + dudx(k) = 0.0_RKIND + dudy(k) = 0.0_RKIND + dvdx(k) = 0.0_RKIND + dvdy(k) = 0.0_RKIND + + dudz(k) = 0.0_RKIND + dvdz(k) = 0.0_RKIND + dwdz(k) = 0.0_RKIND + end do + + !$acc loop vector + do k = 1, nVertLevels+1 + dwdx(k) = 0.0_RKIND + dwdy(k) = 0.0_RKIND + end do + + !$acc loop seq + do iEdge=1,nEdgesOnCell(iCell) + + ie = EdgesOnCell(iEdge,iCell) + cell1 = cellsOnEdge(1,ie) + cell2 = cellsOnEdge(2,ie) + + !$acc loop vector + do k=1,nVertLevels + dudx(k) = dudx(k) + deformation_coef_c2(iEdge,iCell)*u(k,ie) & + - deformation_coef_cs(iEdge,iCell)*v(k,ie) + dudy(k) = dudy(k) + deformation_coef_cs(iEdge,iCell)*u(k,ie) & + - deformation_coef_s2(iEdge,iCell)*v(k,ie) + dvdx(k) = dvdx(k) + deformation_coef_cs(iEdge,iCell)*u(k,ie) & + + deformation_coef_c2(iEdge,iCell)*v(k,ie) + dvdy(k) = dvdy(k) + deformation_coef_s2(iEdge,iCell)*u(k,ie) & + + deformation_coef_cs(iEdge,iCell)*v(k,ie) + end do + + !$acc loop vector + do k=1,nVertLevels+1 + wk = 0.5*(w(k,cell1)+w(k,cell2)) + dwdx(k) = dwdx(k) + deformation_coef_c(iEdge,iCell)*wk + dwdy(k) = dwdy(k) + deformation_coef_s(iEdge,iCell)*wk + end do + + end do + + !$acc loop vector + do k=1,nVertLevels + rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell)) + dwdz(k) = (w(k+1,iCell)-w(k,iCell))*rdz + end do + + !$acc loop vector + do k=2,nVertLevels-1 + rdz = 1./(zgrid(k+2,iCell)+zgrid(k+1,iCell)-zgrid(k,iCell)-zgrid(k-1,iCell)) + dudz(k) = (uCell(k+1,iCell)-uCell(k-1,iCell))*rdz + dvdz(k) = (vCell(k+1,iCell)-vCell(k-1,iCell))*rdz + end do + + k = 1 + rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell)) + dudz(k) = (uCell(k+1,iCell)-uCell(k,iCell))*rdz + dvdz(k) = (vCell(k+1,iCell)-vCell(k,iCell))*rdz + + k = nVertLevels-1 + rdz = 1./(zgrid(k+1,iCell)-zgrid(k,iCell)) + dudz(k+1) = (uCell(k+1,iCell)-uCell(k,iCell))*rdz + dvdz(k+1) = (vCell(k+1,iCell)-vCell(k,iCell))*rdz + + !$acc loop vector + do k=1, nVertLevels + d_11(k) = 2.*dudx(k) + d_22(k) = 2.*dvdy(k) + d_33(k) = 2.*dwdz(k) + d_12(k) = dudy(k) + dvdx(k) + d_13(k) = dwdx(k) + dudz(k) + d_23(k) = dwdy(k) + dvdz(k) + end do + + if (les_model_opt == LES_MODEL_3D_SMAGORINSKY) then + + !$acc loop vector + do k=1, nVertLevels + def2 = 0.5*(d_11(k)**2 + d_22(k)**2 + d_33(k)**2) + d_12(k)**2 + d_13(k)**2 + d_23(k)**2 + eddy_visc_horz(k,iCell) = (c_s * config_len_disp)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell))) + eddy_visc_horz(k,iCell) = min(eddy_visc_horz(k,iCell),(0.01*config_len_disp**2) * invDt) + delta_z = zgrid(k+1,iCell)-zgrid(k,iCell) + eddy_visc_vert(k,iCell) = (c_s * delta_z)**2 * sqrt(max(0.,def2 - pr_inv*bv_freq2(k,iCell))) + ! eddy_visc_vert(k,iCell) = eddy_visc_horz(k,iCell) + end do + + else if (les_model_opt == LES_MODEL_PROGNOSTIC_15_ORDER) then + + !$acc loop vector + do k=1,nVertLevels ! bound the tke here, currently hardwired + ! scalars(index_tke,k,iCell) = max(0.,min(100.,scalars(index_tke,k,iCell))) + scalars(index_tke,k,iCell) = max(0.,scalars(index_tke,k,iCell)) + end do + + !$acc loop vector + do k=1,nVertLevels + + delta_z = zgrid(k+1,iCell)-zgrid(k,iCell) + delta_s = ((config_len_disp**2)*delta_z)**(1./3.) + bv = max( sqrt(abs(bv_freq2(k,iCell))), epsilon_bv ) + tke_length = delta_s + ! isentropic mixing formulation + if(bv_freq2(k,iCell) .gt. 1.e-06) & + tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/bv + tke_length = min(tke_length, delta_z) + diss_length = min(delta_s,max(tke_length,0.01*delta_s)) + if(bv_freq2(k,iCell) <= 0) diss_length = delta_s + + ! non-isotropic mixing + + l_horizontal = config_len_disp + l_vertical = min(delta_z,tke_length) + if(bv_freq2(k,iCell) <= 0) diss_length = delta_z + + ! isotropic mixing + + ! l_horizontal = min(delta_s,tke_length) + ! if(bv_freq2(k,iCell) <= 0) diss_length = delta_s + ! l_vertical = l_horizontal + + ! eddy viscocities set here if we are running the 1.5 order prognostic tke scheme + eddy_visc_h = c_k*l_horizontal*sqrt(scalars(index_tke,k,iCell)) + eddy_visc_h = min(eddy_visc_h,(0.01*config_len_disp**2) * invDt) + eddy_visc_v = c_k*l_vertical*sqrt(scalars(index_tke,k,iCell)) + eddy_visc_v = min(eddy_visc_v,(0.01*delta_z**2) * invDt) + + eddy_visc_horz(k,iCell) = eddy_visc_h + eddy_visc_vert(k,iCell) = eddy_visc_v + + ! terms for the prognostic tke integration + + shear_production = eddy_visc_h*(d_11(k)**2 + d_22(k)**2 + d_12(k)**2) & + +eddy_visc_v*(d_33(k)**2 + d_13(k)**2 + d_23(k)**2) + + buoyancy = -eddy_visc_v*bv_freq2(k,iCell) + + ! dissipation + + c_dissipation = 1.9*c_k + max( 0.0, 0.93 - 1.9*c_k )*diss_length/delta_s + ! if( (k.eq. 1) .or. (k.eq.nVertLevels) ) c_dissipation = 3.9 + + dissipation = -c_dissipation*(scalars(index_tke,k,iCell)**(1.5))/diss_length + + ! computing eddy viscosities ********* + + prandtl_horizontal_inv = 3. + prandtl_3d_inv(k,iCell) = 1.0+(2.0*l_vertical/delta_z) + + + ! RHS term for the subgrid ke. + + if(dynamics_substep == 1) & + tend_scalars(index_tke,k,iCell) = rho_zz(k,iCell)*( shear_production + buoyancy + dissipation ) + + end do + + else + +!MGD call mpas_log_write(' in les_models, no les scheme for '//trim(config_les_model), messageType=MPAS_LOG_CRIT) + + end if ! end of les_model_opt test + + end do ! loop over all owned cells (columns) + + !$acc end parallel + + !$acc exit data delete(dudx, dudy, dvdx, dvdy, dwdx, dwdy, dudz, dvdz, dwdz) + !$acc exit data delete(d_11, d_22, d_33, d_12, d_13, d_23) + + DEBUG_WRITE(' les_models ') + + end subroutine les_models + +!--------------------------------------- + + subroutine calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, & + cellStart, cellEnd, nCells) + + use mpas_atm_dimensions ! pull nVertLevels and num_scalars from here + + integer, intent(in) :: cellStart, cellEnd, nCells + integer, intent(in) :: index_qv, index_qc + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(out) :: bn2 + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m, exner, pressure_b, pp, qtot + real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid + real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars +! local + integer :: iCell, k + real (kind=RKIND) :: dz, rdz, esw, p + real (kind=RKIND), parameter :: qc_cr = 0.00001 ! in kg/kg + real (kind=RKIND), dimension(nVertLevels) :: theta, qvsw, temp, coefa + logical :: dry_bv_frequency + + + DEBUG_WRITE(' begin BV frequency calculations ') + + !$acc enter data create(theta, temp, qvsw, coefa) + + !$acc parallel default(present) + + !$acc loop gang worker private(theta, temp, qvsw, coefa) + do iCell = cellStart,cellEnd + +!DIR$ IVDEP + !$acc loop vector + do k=1, nVertLevels + + theta(k) = theta_m(k,iCell) / (1._RKIND + rvord * scalars(index_qv,k,iCell)) + + temp(k) = exner(k,iCell) * theta(k) + + p = pressure_b(k,iCell) + pp(k,iCell) + esw = 1000. * svp1 * exp(svp2 * (temp(k) - svpt0) / (temp(k) - svp3)) + if (p < esw) esw = p * 0.99 ! fix for pressure < esw + qvsw(k) = ep_2 * esw / (p - esw) + + coefa(k) = ( 1.0 + xlv * qvsw(k)/ R_d / temp(k) ) / & + ( 1.0 + xlv * xlv *qvsw(k) / Cp / R_v / temp(k) / temp(k) ) + + end do + + !$acc loop vector + do k=2, nVertLevels-1 + dz = 0.5 * (zgrid(k+2,iCell)+zgrid(k+1,iCell)) - 0.5 * (zgrid(k,iCell)+zgrid(k-1,iCell)) + rdz = 1.0/dz + + ! if ( scalars(index_qc,k,iCell) < qc_cr ) then + ! ! Dry Brunt-Vaisala frequency + ! bn2(k,iCell) = gravity * ((theta(k+1) - theta(k-1) ) / theta(k) / dz & + ! + rvord * (scalars(index_qv,k+1,iCell) - scalars(index_qv,k-1,iCell)) / dz & + ! - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz ) + ! else + ! ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36 + ! bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) / dz & + ! + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) / dz ) & + ! - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) / dz ) + ! endif + + dry_bv_frequency = .true. + if(index_qc .gt. 0) then ! if moist simulation, qc exists + if ( scalars(index_qc,k,iCell) .ge. qc_cr ) dry_bv_frequency = .false. + end if + + if (dry_bv_frequency) then + ! Dry Brunt-Vaisala frequency + bn2(k,iCell) = gravity * ((theta(k+1) - theta(k-1) ) / theta(k) * rdz & + + rvord * (scalars(index_qv,k+1,iCell) - scalars(index_qv,k-1,iCell)) * rdz & + - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) * rdz ) + else + ! Moist Brunt-Vaisala frequency according to Durran and Klemp (1982) Eq. 36 + bn2(k,iCell) = gravity * ( coefa(k) * ((theta(k+1) - theta(k-1) ) / theta(k) * rdz & + + xlv / cp / temp(k) * ( qvsw(k+1) - qvsw(k-1)) * rdz ) & + - ( qtot(k+1, iCell) - qtot(k-1, iCell) ) * rdz ) + endif + + end do + + bn2(1,iCell) = bn2(2,iCell) + bn2(nVertLevels,iCell) = bn2(nVertLevels-1,iCell) + + end do + + !$acc end parallel + + !$acc exit data delete(theta, temp, qvsw, coefa) + + DEBUG_WRITE(' exiting BV frequency calculations ') + + end subroutine calculate_n2 + +!--------------------------------------- + + subroutine u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd, & + cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree, & + cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex, & + nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign, & + invAreaCell, invAreaTriangle, invDvEdge, invDcEdge, & + angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4, & + config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2, & + config_del4u_div_factor, zgrid, & + eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw, & + fzm, fzp, les_model_opt, les_surface_opt, & + config_surface_drag_coefficient, & + delsq_u, delsq_vorticity, delsq_divergence, & + u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, & + tend_u_euler ) + + use mpas_atm_dimensions ! pull nVertLevels and maxEdges from here + + implicit none + + integer, intent(in) :: edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd + integer, intent(in) :: vertexStart, vertexEnd, vertexDegree + integer, intent(in) :: cellStart, cellEnd + integer, intent(in) :: nCells, nEdges, nVertices + logical, intent(in) :: config_mix_full + + integer, intent(in) :: les_model_opt + integer, intent(in) :: les_surface_opt + + integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge + integer, dimension(2,nEdges+1), intent(in) :: verticesOnEdge + integer, dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell + integer, dimension(nCells+1), intent(in) :: nEdgesOnCell + integer, dimension(vertexDegree,nVertices+1), intent(in) :: edgesOnVertex + + real (kind=RKIND), intent(in) :: h_mom_eddy_visc4 + real (kind=RKIND), intent(in) :: v_mom_eddy_visc2 + real (kind=RKIND), intent(in) :: config_del4u_div_factor + real (kind=RKIND), intent(in) :: config_surface_drag_coefficient + + real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign + real (kind=RKIND), dimension(vertexDegree,nVertices+1), intent(in) :: edgesOnVertex_sign + real (kind=RKIND), dimension(nVertices+1), intent(in) :: invAreaTriangle + real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell + real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge + real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDvEdge + real (kind=RKIND), dimension(nEdges+1), intent(in) :: angleEdge + real (kind=RKIND), dimension(nEdges+1), intent(in) :: dcEdge + real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge + real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2 + real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4 + real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: zgrid + + real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: u + real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: v + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: divergence + real (kind=RKIND), dimension(nVertLevels,nVertices+1), intent(in) :: vorticity + real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz + real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu + real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw + real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzm + real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzp + + + ! scratch space from calling routine + real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: delsq_u + real (kind=RKIND), dimension(nVertLevels,nVertices+1) :: delsq_vorticity + real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_divergence + + real (kind=RKIND), dimension(nVertLevels), intent(in) :: u_init, v_init + real (kind=RKIND), dimension(:), intent(in) :: ustm + + real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(out) :: tend_u_euler + + ! local variables + + integer :: iEdge, cell1, cell2, vertex1, vertex2, iVertex, iCell, i, k + real (kind=RKIND) :: r_dc, r_dv, u_diffusion, u_diffusion_les, kdiffu, r, edge_sign, u_mix_scale + real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp + real (kind=RKIND), dimension(nVertLevels) :: u_mix + + real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux + real (kind=RKIND) :: rho_k_cell1, rho_k_cell2, rho_k_at_w + real (kind=RKIND) :: zz_cell1, zz_cell2, zz_at_w + real (kind=RKIND) :: ust_edge + + real (kind=RKIND) :: velocity_magnitude + real (kind=RKIND) :: tau_12_factor + + + DEBUG_WRITE(' begin u_dissipation_3d ') + DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_mom_eddy_visc4/)) + DEBUG_WRITE(' 4th order divergence factor is $r ' COMMA realArgs=(/config_del4u_div_factor/)) + +!$OMP BARRIER + + ! del^4 horizontal filter. We compute this as del^2 ( del^2 (u) ). + ! First, storage to hold the result from the first del^2 computation. + + !$acc enter data create(u_mix) + !$acc enter data create(turb_vflux) + + !$acc parallel default(present) + + tau_12_factor = 0.0 + if(les_model_opt /= LES_MODEL_NONE) tau_12_factor = 1.0 + + !$acc loop gang worker + do iEdge=edgeStart,edgeEnd + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + vertex1 = verticesOnEdge(1,iEdge) + vertex2 = verticesOnEdge(2,iEdge) + r_dc = invDcEdge(iEdge) + r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge)) + + !$acc loop vector + do k = 1, nVertLevels + delsq_u(k,iEdge) = 0.0_RKIND + end do + +!DIR$ IVDEP + !$acc loop vector + do k=1,nVertLevels + + ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity + ! only valid for h_mom_eddy_visc4 == constant + u_diffusion = ( divergence(k,cell2) - divergence(k,cell1) ) * r_dc & + -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv + ! for LES models we need 2 times the gradient of divergence, in contrast to what is + ! saved and used to calculate the 4th-order horizontal filter + u_diffusion_les = u_diffusion + tau_12_factor * ( divergence(k,cell2) - divergence(k,cell1) ) * r_dc + + delsq_u(k,iEdge) = delsq_u(k,iEdge) + u_diffusion + + kdiffu = 0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) + + ! include 2nd-order diffusion here + tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) & + + rho_edge(k,iEdge)* kdiffu * u_diffusion_les * meshScalingDel2(iEdge) + + end do + end do + + !$acc end parallel + + if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active + +!$OMP BARRIER + + !$acc parallel default(present) + + !$acc loop gang worker + do iVertex=vertexStart,vertexEnd + !$acc loop vector + do k = 1, nVertLevels + delsq_vorticity(k,iVertex) = 0.0_RKIND + end do + + !$acc loop seq + do i=1,vertexDegree + iEdge = edgesOnVertex(i,iVertex) + edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge) * edgesOnVertex_sign(i,iVertex) + + !$acc loop vector + do k=1,nVertLevels + delsq_vorticity(k,iVertex) = delsq_vorticity(k,iVertex) + edge_sign * delsq_u(k,iEdge) + end do + end do + end do + + !$acc loop gang worker + do iCell=cellStart,cellEnd + !$acc loop vector + do k = 1, nVertLevels + delsq_divergence(k,iCell) = 0.0_RKIND + end do + + r = invAreaCell(iCell) + + !$acc loop seq + do i=1,nEdgesOnCell(iCell) + iEdge = edgesOnCell(i,iCell) + edge_sign = r * dvEdge(iEdge) * edgesOnCell_sign(i,iCell) + + !$acc loop vector + do k=1,nVertLevels + delsq_divergence(k,iCell) = delsq_divergence(k,iCell) + edge_sign * delsq_u(k,iEdge) + end do + end do + end do + + !$acc end parallel + +!$OMP BARRIER + + !$acc parallel default(present) + + !$acc loop gang worker + do iEdge=edgeSolveStart,edgeSolveEnd + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + vertex1 = verticesOnEdge(1,iEdge) + vertex2 = verticesOnEdge(2,iEdge) + + u_mix_scale = meshScalingDel4(iEdge)*h_mom_eddy_visc4 + r_dc = u_mix_scale * config_del4u_div_factor * invDcEdge(iEdge) + r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge)) + +!DIR$ IVDEP + !$acc loop vector + do k=1,nVertLevels + + ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity + ! only valid for h_mom_eddy_visc4 == constant + ! + ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor + ! relative to the rotational part. The stability constraint on the divergence component is much less + ! stringent than the rotational part, and this flexibility may be useful. + ! + u_diffusion = rho_edge(k,iEdge) * ( ( delsq_divergence(k,cell2) - delsq_divergence(k,cell1) ) * r_dc & + -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) * r_dv ) + tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - u_diffusion + + end do + end do + + !$acc end parallel + + end if ! 4th order mixing is active + + ! + ! vertical mixing for u - 2nd order filter in physical (z) space + ! + if ( v_mom_eddy_visc2 > 0.0 ) then + + if (config_mix_full) then ! mix full state + + !$acc parallel default(present) + + !$acc loop gang worker + do iEdge=edgeSolveStart,edgeSolveEnd + + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + + !$acc loop vector + do k=2,nVertLevels-1 + + z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2)) + z2 = 0.5*(zgrid(k ,cell1)+zgrid(k ,cell2)) + z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2)) + z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2)) + + zm = 0.5*(z1+z2) + z0 = 0.5*(z2+z3) + zp = 0.5*(z3+z4) + + tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*( & + (u(k+1,iEdge)-u(k ,iEdge))/(zp-z0) & + -(u(k ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm)) + end do + end do + + !$acc end parallel + + else ! idealized cases where we mix on the perturbation from the initial 1-D state + + !$acc parallel default(present) + + !$acc loop gang worker private(u_mix) + do iEdge=edgeSolveStart,edgeSolveEnd + + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + + !$acc loop vector + do k=1,nVertLevels + u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) & + + v_init(k) * sin( angleEdge(iEdge) ) + end do + + !$acc loop vector + do k=2,nVertLevels-1 + + z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2)) + z2 = 0.5*(zgrid(k ,cell1)+zgrid(k ,cell2)) + z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2)) + z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2)) + + zm = 0.5*(z1+z2) + z0 = 0.5*(z2+z3) + zp = 0.5*(z3+z4) + + tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*( & + (u_mix(k+1)-u_mix(k ))/(zp-z0) & + -(u_mix(k )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm)) + end do + end do + + !$acc end parallel + + end if ! mix perturbation state + + end if ! vertical mixing of horizontal momentum for les formulation + + if ( les_model_opt /= LES_MODEL_NONE ) then + + !$acc parallel default(present) + + !$acc loop gang worker private(turb_vflux) + do iEdge=edgeSolveStart,edgeSolveEnd + + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + turb_vflux(nVertlevels+1) = 0.0_RKIND ! no turbulent flux out of the domain + turb_vflux(1) = 0.0_RKIND ! lower bc flux handled where ??? + + !$acc loop vector + do k=2,nVertLevels + rho_k_cell1 = fzm(k)*rho_zz(k ,cell1)*zz(k ,cell1)*eddy_visc_vert(k ,cell1) & + +fzp(k)*rho_zz(k-1,cell1)*zz(k-1,cell1)*eddy_visc_vert(k-1,cell1) + rho_k_cell2 = fzm(k)*rho_zz(k ,cell2)*zz(k ,cell2)*eddy_visc_vert(k ,cell2) & + +fzp(k)*rho_zz(k-1,cell2)*zz(k-1,cell2)*eddy_visc_vert(k-1,cell2) + rho_k_at_w = 0.5*(rho_k_cell1+rho_k_cell2) + + zz_cell1 = fzm(k)*zz(k,cell1)+fzp(k)*zz(k-1,cell1) + zz_cell2 = fzm(k)*zz(k,cell2)+fzp(k)*zz(k-1,cell2) + zz_at_w = 0.5*(zz_cell1+zz_cell2) + turb_vflux(k) = - rho_k_at_w*zz_at_w*rdzu(k)*(u(k,iEdge)-u(k-1,iEdge)) + end do + + if( les_surface_opt == LES_SURFACE_SPECIFIED ) then + velocity_magnitude = sqrt(u(1,iEdge)**2 + v(1,iEdge)**2) + turb_vflux(1) = -rho_edge(1,iEdge)*config_surface_drag_coefficient*u(1,iEdge)*velocity_magnitude + turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels) + else if ( les_surface_opt == LES_SURFACE_VARYING ) then + ust_edge = 0.5*(ustm(cell1) + ustm(cell2)) + velocity_magnitude = max(sqrt(u(1,iEdge)**2 + v(1,iEdge)**2),0.1) + turb_vflux(1) = -rho_edge(1,iEdge)*ust_edge*ust_edge*(u(1,iEdge)/velocity_magnitude) + turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels) + ! end test conditions + else + ! test conditions for supercell case + turb_vflux(1) = turb_vflux(2) + turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels) + ! end test conditions + end if + + !$acc loop vector + do k=1,nVertLevels + tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k)) + end do + + end do + + !$acc end parallel + + end if + + !$acc exit data delete(turb_vflux) + !$acc exit data delete(u_mix) + + DEBUG_WRITE(' exiting u_dissipation_3d ') + + end subroutine u_dissipation_3d + +!------------------------ + + subroutine w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd, & + nCells, nEdges, & + nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, & + invAreaCell, invDcEdge, dvEdge, & + meshScalingDel2, meshScalingDel4, & + rdzw, rdzu, & + v_mom_eddy_visc2, h_mom_eddy_visc4, & + delsq_w, & + w, rho_edge, rho_zz, divergence, zz, & + eddy_visc_horz, eddy_visc_vert, & + les_model_opt, les_surface_opt, & + tend_w_euler ) + + + ! 3D w dissipation using the 3D smagorinsky eddy viscosities. + ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter + + use mpas_atm_dimensions ! pull nVertLevels and maxEdges from here + + implicit none + + integer, intent(in) :: cellStart, cellEnd + integer, intent(in) :: cellSolveStart, cellSolveEnd + integer, intent(in) :: nCells, nEdges + + integer, dimension(nCells+1), intent(in) :: nEdgesOnCell + integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell + + integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge + + integer, intent(in) :: les_model_opt + integer, intent(in) :: les_surface_opt + + real (kind=RKIND), intent(in) :: h_mom_eddy_visc4 + real (kind=RKIND), intent(in) :: v_mom_eddy_visc2 + + real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign + real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell + real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge + real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge + real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2 + real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4 + real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw + real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu + + real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(in) :: w + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: divergence + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz + real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge + + real (kind=RKIND), dimension(nVertLevels+1,nCells+1), intent(inout) :: tend_w_euler + + ! storage passed in from calling routine + real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_w + real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux + + ! local variables + + integer :: cell1, cell2, iEdge, iCell, i, k + real (kind=RKIND) :: r_areaCell, edge_sign, w_turb_flux + + +! !OMP BARRIER why is this openmp barrier here??? + + ! del^4 horizontal filter. We compute this as del^2 ( del^2 (w) ). + ! + ! First, storage to hold the result from the first del^2 computation. + ! we copied code from the theta mixing, hence the theta* names. + + + DEBUG_WRITE(' begin w_dissipation_3d ') + DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_mom_eddy_visc4/)) + + !$acc enter data create(turb_vflux) + + !$acc parallel default(present) + + !$acc loop gang worker + do iCell=cellStart,cellEnd + + !$acc loop vector + do k = 1, nVertLevels + delsq_w(k,iCell) = 0.0_RKIND + end do + + !$acc loop vector + do k = 1, nVertLevels+1 + tend_w_euler(k,iCell) = 0.0_RKIND + end do + + r_areaCell = invAreaCell(iCell) + + !$acc loop seq + do i=1,nEdgesOnCell(iCell) + iEdge = edgesOnCell(i,iCell) + + edge_sign = 0.5 * r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge) + + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + +!DIR$ IVDEP + !$acc loop vector + do k=2,nVertLevels + + w_turb_flux = edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1)) + delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux + w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * & + ( eddy_visc_horz(k ,cell1)+eddy_visc_horz(k ,cell2) & + +eddy_visc_horz(k-1,cell1)+eddy_visc_horz(k-1,cell2) ) + tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux + end do + end do + end do + + !$acc end parallel + +!$OMP BARRIER + + if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active + + !$acc parallel default(present) + + !$acc loop gang worker + do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... + + r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell) + + !$acc loop seq + do i=1,nEdgesOnCell(iCell) + iEdge = edgesOnCell(i,iCell) + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + + edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge) + + !$acc loop vector + do k=2,nVertLevels + tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1)) + end do + + end do + end do + + !$acc end parallel + + end if ! 4th order mixing is active + + if ( v_mom_eddy_visc2 > 0.0 ) then ! vertical mixing + + !$acc parallel default(present) + + !$acc loop gang worker + do iCell=cellSolveStart,cellSolveEnd +!DIR$ IVDEP + !$acc loop vector + do k=2,nVertLevels + tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*( & + (w(k+1,iCell)-w(k ,iCell))*rdzw(k) & + -(w(k ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k) + end do + end do + + !$acc end parallel + + end if + + if ( les_model_opt /= LES_MODEL_NONE ) then + + !$acc parallel default(present) + + !$acc loop gang worker private(turb_vflux) + do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column + ! compute turbulent fluxes + + !$acc loop vector + do k=1,nVertLevels + turb_vflux(k) = - rho_zz(k,iCell)*eddy_visc_vert(k,iCell)*zz(k,iCell)*( & + 2.0*zz(k,iCell)*rdzw(k)*(w(k+1,iCell)-w(k,iCell)) & + + divergence(k,iCell) ) + end do + + turb_vflux(nVertLevels+1) = 0.0 + + !$acc loop vector + do k=2,nVertLevels + tend_w_euler(k,iCell) = tend_w_euler(k,iCell) & + - rdzu(k)*(turb_vflux(k)-turb_vflux(k-1)) + end do + end do + + !$acc end parallel + + end if + + !$acc exit data delete(turb_vflux) + + DEBUG_WRITE(' exiting w_dissipation_3d ') + + end subroutine w_dissipation_3d + +!----------------------------------------------------- + + subroutine scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSolveEnd, & + nCells, nEdges, & + nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, & + invAreaCell, invDcEdge, dvEdge, & + meshScalingDel2, meshScalingDel4, & + config_mix_full, t_init, zgrid, & + rdzw, rdzu, fzm, fzp, & + v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv, & + prandtl_3d_inv, & + delsq_theta, & + theta_m, rho_edge, rho_zz, zz, & + eddy_visc_horz, eddy_visc_vert, & + bv_freq2, config_len_disp, scalars, tend_scalars, & + index_tke, index_qv, num_scalars_dummy, mix_scalars, & + les_model_opt, les_surface_opt, clock, dt, & + config_surface_heat_flux, config_surface_moisture_flux, & + uReconstructZonal, uReconstructMeridional, & + hfx, qfx, & + tend_theta_euler, dynamics_substep ) + + + ! 3D theta_m dissipation using the 3D smagorinsky eddy viscosities. + ! This routine also includes the simpler mixing models, and the 4th-order horizontal filter + + use mpas_atm_dimensions ! pull nVertLevels and maxEdges from here + + implicit none + + integer, intent(in) :: cellStart, cellEnd + integer, intent(in) :: cellSolveStart, cellSolveEnd + integer, intent(in) :: nCells, nEdges + integer, intent(in) :: num_scalars_dummy + integer, intent(in) :: index_tke, index_qv + integer, intent(in) :: dynamics_substep + + real (kind=RKIND), intent(in) :: config_surface_heat_flux + real (kind=RKIND), intent(in) :: config_surface_moisture_flux + + logical, intent(in) :: config_mix_full, mix_scalars + + integer, intent(in) :: les_model_opt + integer, intent(in) :: les_surface_opt + + type (MPAS_Clock_type), intent(in) :: clock + real (kind=RKIND), intent(in) :: dt + + integer, dimension(nCells+1), intent(in) :: nEdgesOnCell + integer, dimension(maxEdges,nCells+1), intent(in) :: EdgesOnCell + + integer, dimension(2,nEdges+1), intent(in) :: cellsOnEdge + + real (kind=RKIND), intent(in) :: h_theta_eddy_visc4 + real (kind=RKIND), intent(in) :: v_theta_eddy_visc2 + real (kind=RKIND), intent(in) :: prandtl_inv + real (kind=RKIND), intent(in) :: config_len_disp + + real (kind=RKIND), dimension(maxEdges,nCells+1), intent(in) :: edgesOnCell_sign + real (kind=RKIND), dimension(nCells+1), intent(in) :: invAreaCell + real (kind=RKIND), dimension(nEdges+1), intent(in) :: dvEdge + real (kind=RKIND), dimension(nEdges+1), intent(in) :: invDcEdge + real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel2 + real (kind=RKIND), dimension(nEdges+1), intent(in) :: meshScalingDel4 + real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzw + real (kind=RKIND), dimension(nVertLevels), intent(in) :: rdzu + real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzm + real (kind=RKIND), dimension(nVertLevels), intent(in) :: fzp + real (kind=RKIND), dimension(nVertLevels+1, nCells+1), intent(in) :: zgrid + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: zz + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: uReconstructZonal, uReconstructMeridional + + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: t_init + + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: bv_freq2 + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: prandtl_3d_inv + real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(in) :: scalars + real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1), intent(inout) :: tend_scalars + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: theta_m + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_horz + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: eddy_visc_vert + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(in) :: rho_zz + real (kind=RKIND), dimension(nVertLevels,nEdges+1), intent(in) :: rho_edge + real (kind=RKIND), dimension(:), intent(in) :: hfx, qfx + + real (kind=RKIND), dimension(nVertLevels,nCells+1), intent(inout) :: tend_theta_euler + + ! storage passed in from calling routine + real (kind=RKIND), dimension(nVertLevels,nCells+1) :: delsq_theta + + ! local variables + integer :: cell1, cell2, iEdge, iCell, i, k, iScalar + real (kind=RKIND) :: r_areaCell, edge_sign, theta_turb_flux, pr_scale + real (kind=RKIND) :: z1, z2, z3, z4, zm, z0, zp + real (kind=RKIND), dimension(nVertLevels+1) :: turb_vflux, prandtl_1d_inverse + real (kind=RKIND), dimension(num_scalars,nVertLevels+1) :: turb_vflux_scalars + real (kind=RKIND), dimension(nVertLevels) :: rho_k_at_w, zz_at_w + + real (kind=RKIND) :: moisture_flux, heat_flux, theta_m_flux + real (kind=RKIND) :: qv_cell, theta_m_cell, theta_cell + + + DEBUG_WRITE(' begin scalar_dissipation_3d ') + DEBUG_WRITE(' 4th order hyperviscosity is $r ' COMMA realArgs=(/h_theta_eddy_visc4/)) + + if( mix_scalars .and. (dynamics_substep == 1)) call mpas_log_write(' scalar mixing on ') + + !$acc enter data create(turb_vflux_scalars) + !$acc enter data create(turb_vflux, prandtl_1d_inverse) + + !$acc parallel default(present) + + !$acc loop gang worker + do iCell=cellStart,cellEnd + + !$acc loop vector + do k = 1, nVertLevels + delsq_theta(k,iCell) = 0.0_RKIND + tend_theta_euler(k,iCell) = 0.0_RKIND + end do + + r_areaCell = invAreaCell(iCell) + + !$acc loop seq + do i=1,nEdgesOnCell(iCell) + iEdge = edgesOnCell(i,iCell) + edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge) + pr_scale = prandtl_inv * meshScalingDel2(iEdge) + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + +!DIR$ IVDEP + !$acc loop vector + do k=1,nVertLevels + +! we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below. +! This is in conservative form. + + theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge) + delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux + theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) * pr_scale + tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux + + end do + end do + end do + + !$acc end parallel + +!$OMP BARRIER + + if (h_theta_eddy_visc4 > 0.0) then ! 4th order mixing is active + + !$acc parallel default(present) + + !$acc loop gang worker + do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... + r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell) + + !$acc loop seq + do i=1,nEdgesOnCell(iCell) + + iEdge = edgesOnCell(i,iCell) + edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge) + + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + + !$acc loop vector + do k=1,nVertLevels + tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1)) + end do + end do + end do + + !$acc end parallel + + end if ! 4th order mixing is active + + if(mix_scalars .and. (dynamics_substep == 1)) then ! dissipation for scalars, including 4th-order filter. Likely needs optimization + + do iScalar=1,num_scalars + + !$acc parallel default(present) + + !$acc loop gang worker + do iCell=cellStart,cellEnd + !$acc loop vector + do k = 1, nVertLevels + delsq_theta(k,iCell) = 0.0_RKIND + end do + + ! tend_theta_euler(1:nVertLevels,iCell) = 0.0 + r_areaCell = invAreaCell(iCell) + + !$acc loop seq + do i=1,nEdgesOnCell(iCell) + iEdge = edgesOnCell(i,iCell) + edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge) + pr_scale = prandtl_inv * meshScalingDel2(iEdge) + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + +!DIR$ IVDEP + !$acc loop vector + do k=1,nVertLevels + +! we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below. +! This is in conservative form. + + theta_turb_flux = edge_sign*(scalars(iScalar,k,cell2) - scalars(iScalar,k,cell1))*rho_edge(k,iEdge) + delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux + theta_turb_flux = theta_turb_flux*0.5*(eddy_visc_horz(k,cell1)+eddy_visc_horz(k,cell2)) * pr_scale + tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) + theta_turb_flux + + end do + end do + end do + + !$acc end parallel + +!$OMP BARRIER + + if (h_theta_eddy_visc4 > 0.0) then ! 4th order mixing is active + + !$acc parallel default(present) + + !$acc loop gang worker + do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... + + r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell) + + !$acc loop seq + do i=1,nEdgesOnCell(iCell) + + iEdge = edgesOnCell(i,iCell) + edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge) + + cell1 = cellsOnEdge(1,iEdge) + cell2 = cellsOnEdge(2,iEdge) + + !$acc loop vector + do k=1,nVertLevels + tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1)) + end do + end do + end do + + !$acc end parallel + + end if ! 4th order mixing is active + + end do ! loop over scalars for horizontal mixing + + end if ! horizontal scalar mixing + + + ! idealized case vertical mixing. No scalar mixing here. + + if ( v_theta_eddy_visc2 > 0.0 ) then ! vertical mixing for theta_m + + if (config_mix_full) then + + !$acc parallel default(present) + + !$acc loop gang worker + do iCell = cellSolveStart,cellSolveEnd + + !$acc loop vector + do k=2,nVertLevels-1 + z1 = zgrid(k-1,iCell) + z2 = zgrid(k ,iCell) + z3 = zgrid(k+1,iCell) + z4 = zgrid(k+2,iCell) + + zm = 0.5*(z1+z2) + z0 = 0.5*(z2+z3) + zp = 0.5*(z3+z4) + + tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(& + (theta_m(k+1,iCell)-theta_m(k ,iCell))/(zp-z0) & + -(theta_m(k ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm)) + end do + end do + + !$acc end parallel + + else ! idealized cases where we mix on the perturbation from the initial 1-D state + + !$acc parallel default(present) + + !$acc loop gang worker + do iCell = cellSolveStart,cellSolveEnd + + !$acc loop vector + do k=2,nVertLevels-1 + z1 = zgrid(k-1,iCell) + z2 = zgrid(k ,iCell) + z3 = zgrid(k+1,iCell) + z4 = zgrid(k+2,iCell) + + zm = 0.5*(z1+z2) + z0 = 0.5*(z2+z3) + zp = 0.5*(z3+z4) + + tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(& + ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k ,iCell)-t_init(k,iCell)))/(zp-z0) & + -((theta_m(k ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm)) + end do + end do + + !$acc end parallel + + end if + + end if + + if ( les_model_opt /= LES_MODEL_NONE ) then + + !$acc parallel default(present) + + !$acc loop gang worker private(turb_vflux, turb_vflux_scalars, prandtl_1d_inverse, rho_k_at_w, zz_at_w) + do iCell = cellSolveStart,cellSolveEnd ! vertical mixing for each column + ! compute turbulent fluxes + + turb_vflux(nVertlevels+1) = 0. ! no turbulent flux out of the domain + turb_vflux(1) = 0. ! lower bc flux handled where ??? + + if ( les_model_opt == LES_MODEL_3D_SMAGORINSKY ) then + !$acc loop vector + do k=2,nVertLevels + prandtl_1d_inverse(k) = prandtl_inv + end do + else ! prognostic_1.5_order, isentropic mixing length + ! do k=2,nVertLevels + ! delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell)) + ! delta_s = ((config_len_disp**2)*delta_z)**(1./3.) + ! bv_frequency2 = 0.5*(bv_freq2(k,iCell)+bv_freq2(k-1,iCell)) + ! tke_length = delta_s + ! if(bv_frequency2 .gt. 1.e-06) & + ! tke_length = 0.76*sqrt(scalars(index_tke,k,iCell))/sqrt(bv_frequency2) + ! tke_length = min(delta_z,tke_length) + ! prandtl_inverse(k) = 1. + 2.*tke_length/delta_z + ! end do + + !$acc loop vector + do k=2,nVertLevels + ! prandtl_1d_inverse(k) = 0.5*(prandtl_3d_inv(k,iCell)+prandtl_3d_inv(k-1,iCell)) + prandtl_1d_inverse(k) = fzm(k)*prandtl_3d_inv(k,iCell)+fzp(k)*prandtl_3d_inv(k-1,iCell) + end do + + end if + + !$acc loop vector + do k=2,nVertLevels + + ! delta_z = 0.5*(zgrid(k+1,iCell)-zgrid(k-1,iCell)) + ! delta_s = ((config_len_disp**2)*delta_z)**(1./3.) + ! bv_frequency2 = 0.5*(bv_freq2(k)+bv_freq(k-1)) + ! bv = max( sqrt(abs(bv_frequency2)), epsilon_bv ) + rho_k_at_w(k) = fzm(k)*rho_zz(k ,iCell)*zz(k ,iCell)*zz(k ,iCell)*eddy_visc_vert(k ,iCell) & + +fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell) + zz_at_w(k) = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell) + turb_vflux(k) = - prandtl_1d_inverse(k)*rho_k_at_w(k)*zz_at_w(k)*rdzu(k)*(theta_m(k,iCell)-theta_m(k-1,iCell)) + end do + + ! test boundary conditions for supercell and les test cases + + if( les_surface_opt == LES_SURFACE_SPECIFIED .or. les_surface_opt == LES_SURFACE_VARYING ) then + + if( les_surface_opt == LES_SURFACE_SPECIFIED ) then + moisture_flux = config_surface_moisture_flux + heat_flux = config_surface_heat_flux + +! place holder routine for time-varying specified +! call flux_les_sas( heat_flux, moisture_flux, clock, dt ) + + else if ( les_surface_opt == LES_SURFACE_VARYING ) then + heat_flux = hfx(iCell)/rho_zz(1,iCell)/cp + moisture_flux = qfx(iCell)/rho_zz(1,iCell) + endif + + qv_cell = scalars(index_qv,1,iCell) + theta_m_cell = theta_m(1,iCell) + theta_cell = theta_m_cell/(1.0+(rv/rgas)*qv_cell) + + theta_m_flux = heat_flux*(1.0+(rv/rgas)*qv_cell)+(rv/rgas)*theta_cell*moisture_flux + turb_vflux(1) = theta_m_flux*rho_zz(1,iCell) + moisture_flux = moisture_flux*rho_zz(1,iCell) + turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels) + + else + + turb_vflux(1) = turb_vflux(2) + turb_vflux(nVertLevels+1) = turb_vflux(nVertLevels) + + end if + + !$acc loop vector + do k=1,nVertLevels + tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) & + - rdzw(k)*(turb_vflux(k+1)-turb_vflux(k)) + end do + + if (mix_scalars ) then + + ! compute turbulent fluxes + !$acc loop vector + do iScalar=1,num_scalars + turb_vflux_scalars(iScalar,nVertlevels+1) = 0.0_RKIND ! no turbulent flux out of the domain + turb_vflux_scalars(iScalar,1) = 0.0_RKIND ! lower bc flux handled where ??? + end do + + !$acc loop vector + do k=2,nVertLevels + rho_k_at_w(k) = fzm(k)*rho_zz(k ,iCell)*zz(k ,iCell)*zz(k ,iCell)*eddy_visc_vert(k ,iCell) & + + fzp(k)*rho_zz(k-1,iCell)*zz(k-1,iCell)*zz(k-1,iCell)*eddy_visc_vert(k-1,iCell) + zz_at_w(k) = fzm(k)*zz(k,iCell)+fzp(k)*zz(k-1,iCell) + end do + + !$acc loop vector collapse(2) + do k=2,nVertLevels + do iScalar=1,num_scalars + turb_vflux_scalars(iScalar,k) = - prandtl_1d_inverse(k)*rho_k_at_w(k)*zz_at_w(k)*rdzu(k)* & + (scalars(iScalar,k,iCell)-scalars(iScalar,k-1,iCell)) + end do + end do + + if( les_surface_opt == LES_SURFACE_SPECIFIED .or. les_surface_opt == LES_SURFACE_VARYING ) turb_vflux_scalars(index_qv,1) = moisture_flux ! lower b.c. for qv + + !$acc loop vector collapse(2) + do k=1,nVertLevels + do iScalar=1,num_scalars + tend_scalars(iScalar,k,iCell) = tend_scalars(iScalar,k,iCell) & + - rdzw(k)*(turb_vflux_scalars(iScalar,k+1)-turb_vflux_scalars(iScalar,k)) + end do + end do + + end if ! mix scalars + + end do ! loop over cells (columns) + + !$acc end parallel + + end if + + !$acc exit data delete(turb_vflux_scalars) + !$acc exit data delete(turb_vflux, prandtl_1d_inverse) + + DEBUG_WRITE(' exiting scalar_dissipation_3d ') + + end subroutine scalar_dissipation_3d_les + +!----------- + +! subroutine flux_les_sas(heat_flux, moisture_flux, clock, dt) + +! implicit none + +! real (kind=RKIND), intent(out) :: heat_flux, moisture_flux +! type (MPAS_Clock_type), intent(in) :: clock +! real (kind=RKIND), intent(in) :: dt + +! real (kind=RKIND), parameter:: t_start_t_flux = 3600.*6.0 +! real (kind=RKIND), parameter:: t_end_t_flux = 3600.*19.50 +! real (kind=RKIND), parameter:: t_start_q_flux = 3600.*7.0 +! real (kind=RKIND), parameter:: t_end_q_flux = 3600.*19.50 +! real (kind=RKIND) :: rel_time_t_flux, rel_time_q_flux +! real (kind=RKIND) :: time_of_day_seconds +! type (MPAS_Time_type) :: currTime +! integer :: H, M, S, S_n, S_d +! integer :: ierr + +! currTime = mpas_get_clock_time(clock, MPAS_NOW, ierr) +! call mpas_get_time(curr_time=currTime, H=H, M=M, S=S, S_n=S_n, S_d=S_d) +! time_of_day_seconds = real(H)*3600. + real(M)*60. + real(S) + real(S_n)/real(S_d) + 0.5*dt +! call mpas_log_write(' les integration, timestep midpoint time of day in seconds, $r ', realArgs=(/time_of_day_seconds/)) + +! rel_time_t_flux = max(0.,(time_of_day_seconds - t_start_t_flux)/(t_end_t_flux - t_start_t_flux)) +! rel_time_q_flux = max(0.,(time_of_day_seconds - t_start_q_flux)/(t_end_q_flux - t_start_q_flux)) + +! heat_flux = max(0., 0.1*sin(pii*rel_time_t_flux)) +! moisture_flux = max(0., 0.15*sin(pii*rel_time_q_flux))/1000. + +! end subroutine flux_les_sas + +end module mpas_atm_dissipation_models diff --git a/src/core_atmosphere/dynamics/mpas_atm_iau.F b/src/core_atmosphere/dynamics/mpas_atm_iau.F index 654fd3ae82..7459de89b4 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_iau.F +++ b/src/core_atmosphere/dynamics/mpas_atm_iau.F @@ -5,6 +5,15 @@ ! Additional copyright and license information can be found in the LICENSE file ! distributed with this code, or at http://mpas-dev.github.com/license.html ! + +#ifdef MPAS_OPENACC +#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X) +#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X) +#else +#define MPAS_ACC_TIMER_START(X) +#define MPAS_ACC_TIMER_STOP(X) +#endif + module mpas_atm_iau use mpas_derived_types @@ -13,9 +22,10 @@ module mpas_atm_iau use mpas_dmpar use mpas_constants use mpas_log, only : mpas_log_write + use mpas_timer + + !public :: atm_compute_iau_coef, atm_add_tend_anal_incr - !public :: atm_compute_iau_coef, atm_add_tend_anal_incr - contains !================================================================================================== @@ -76,6 +86,39 @@ real (kind=RKIND) function atm_iau_coef(configs, itimestep, dt) result(wgt_iau) end if end function atm_iau_coef + +!================================================================================================== + subroutine update_d2h_pre_add_tend_anal_incr(configs,structs) +!================================================================================================== + + implicit none + + type (mpas_pool_type), intent(in) :: configs + type (mpas_pool_type), intent(inout) :: structs + + type (mpas_pool_type), pointer :: tend + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: diag + + real (kind=RKIND), dimension(:,:), pointer :: rho_edge, rho_zz, theta_m + real(kind=RKIND),dimension(:,:,:), pointer :: scalars, tend_scalars + + call mpas_pool_get_subpool(structs, 'tend', tend) + call mpas_pool_get_subpool(structs, 'state', state) + call mpas_pool_get_subpool(structs, 'diag', diag) + + MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer') + call mpas_pool_get_array(state, 'theta_m', theta_m, 1) + call mpas_pool_get_array(state, 'scalars', scalars, 1) + call mpas_pool_get_array(state, 'rho_zz', rho_zz, 2) + call mpas_pool_get_array(diag , 'rho_edge', rho_edge) + !$acc update self(theta_m, scalars, rho_zz, rho_edge) + + call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars) + !$acc update self(tend_scalars) + MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer') + + end subroutine update_d2h_pre_add_tend_anal_incr !================================================================================================== subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, tend_rtheta, tend_rho) diff --git a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F index 4fe2faefc4..786f3a3416 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_time_integration.F +++ b/src/core_atmosphere/dynamics/mpas_atm_time_integration.F @@ -16,7 +16,6 @@ module atm_time_integration - use mpas_derived_types use mpas_pool_routines use mpas_kind_types use mpas_constants @@ -29,6 +28,7 @@ module atm_time_integration #ifdef DO_PHYSICS use mpas_atmphys_driver_microphysics + use mpas_atmphys_interface, only: update_d2h_pre_microphysics, update_h2d_post_microphysics use mpas_atmphys_todynamics use mpas_atmphys_utilities #endif @@ -36,18 +36,20 @@ module atm_time_integration use mpas_atm_boundaries, only : nSpecZone, nRelaxZone, nBdyZone, mpas_atm_get_bdy_state, mpas_atm_get_bdy_tend ! regional_MPAS addition use mpas_atm_iau + use mpas_atm_dissipation_models ! ! Abstract interface for routine used to communicate halos of fields ! in a named group ! abstract interface - subroutine halo_exchange_routine(domain, halo_group, ierr) + subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr) use mpas_derived_types, only : domain_type type (domain_type), intent(inout) :: domain character(len=*), intent(in) :: halo_group + logical, intent(in), optional :: withGPUAwareMPI integer, intent(out), optional :: ierr end subroutine halo_exchange_routine @@ -85,6 +87,7 @@ end subroutine halo_exchange_routine !$acc declare create(s_max_arr, s_min_arr) !$acc declare create(flux_array, flux_upwind_tmp_arr) !$acc declare create(flux_tmp_arr, wdtn_arr) + !$acc declare create(rho_zz_int) real (kind=RKIND), dimension(:,:), allocatable :: ru_driving_tend ! regional_MPAS addition real (kind=RKIND), dimension(:,:), allocatable :: rt_driving_tend ! regional_MPAS addition @@ -272,6 +275,13 @@ subroutine mpas_atm_dynamics_init(domain) real (kind=RKIND), dimension(:), pointer :: angleEdge real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 + real (kind=RKIND), dimension(:), pointer :: u_init, v_init, qv_init + real (kind=RKIND), dimension(:,:), pointer :: t_init + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2 + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s2 + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_cs + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s #endif #ifdef MPAS_CAM_DYCORE @@ -292,6 +302,7 @@ subroutine mpas_atm_dynamics_init(domain) nullify(mesh) call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) + MPAS_ACC_TIMER_START('mpas_dynamics_init [ACC_data_xfer]') call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) !$acc enter data copyin(dvEdge) @@ -456,9 +467,971 @@ subroutine mpas_atm_dynamics_init(domain) call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) !$acc enter data copyin(meshScalingDel4) + + call mpas_pool_get_array(mesh, 'u_init', u_init) + !$acc enter data copyin(u_init) + call mpas_pool_get_array(mesh, 'v_init', v_init) + !$acc enter data copyin(v_init) + call mpas_pool_get_array(mesh, 't_init', t_init) + !$acc enter data copyin(t_init) + call mpas_pool_get_array(mesh, 'qv_init', qv_init) + !$acc enter data copyin(qv_init) + + call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2) + !$acc enter data copyin(deformation_coef_c2) + + call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2) + !$acc enter data copyin(deformation_coef_s2) + + call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs) + !$acc enter data copyin(deformation_coef_cs) + + call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c) + !$acc enter data copyin(deformation_coef_c) + + call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s) + !$acc enter data copyin(deformation_coef_s) + + MPAS_ACC_TIMER_STOP('mpas_dynamics_init [ACC_data_xfer]') +#endif + + end subroutine mpas_atm_dynamics_init + + subroutine mpas_atm_pre_computesolvediag_h2d(block) + + implicit none + + type (block_type), intent(inout) :: block + + +#ifdef MPAS_OPENACC + type (mpas_pool_type), pointer :: mesh + type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: tend_physics + real (kind=RKIND), dimension(:,:), pointer :: rthdynten + + real (kind=RKIND), dimension(:,:), pointer :: h_edge, v, vorticity, ke, pv_edge, & + pv_vertex, pv_cell, gradPVn, gradPVt, divergence + real (kind=RKIND), dimension(:,:), pointer :: u, h + + real (kind=RKIND), dimension(:,:), pointer :: zz + real (kind=RKIND), dimension(:,:,:), pointer :: zb_cell + real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell + real (kind=RKIND), dimension(:), pointer :: fzm + real (kind=RKIND), dimension(:), pointer :: fzp + real (kind=RKIND), dimension(:,:,:), pointer :: zb + real (kind=RKIND), dimension(:,:,:), pointer :: zb3 + + + real (kind=RKIND), dimension(:), pointer :: dvEdge + integer, dimension(:,:), pointer :: cellsOnCell + integer, dimension(:,:), pointer :: cellsOnEdge + integer, dimension(:,:), pointer :: advCellsForEdge + integer, dimension(:,:), pointer :: edgesOnCell + integer, dimension(:), pointer :: nAdvCellsForEdge + integer, dimension(:), pointer :: nEdgesOnCell + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs_3rd + real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign + real (kind=RKIND), dimension(:), pointer :: invAreaCell + integer, dimension(:), pointer :: bdyMaskCell + integer, dimension(:), pointer :: bdyMaskEdge + real (kind=RKIND), dimension(:), pointer :: specZoneMaskEdge + real (kind=RKIND), dimension(:), pointer :: invDvEdge + real (kind=RKIND), dimension(:), pointer :: dcEdge + real (kind=RKIND), dimension(:), pointer :: invDcEdge + integer, dimension(:,:), pointer :: edgesOnEdge + integer, dimension(:,:), pointer :: edgesOnVertex + real (kind=RKIND), dimension(:,:), pointer :: edgesOnVertex_sign + integer, dimension(:), pointer :: nEdgesOnEdge + real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge + integer, dimension(:,:), pointer :: cellsOnVertex + integer, dimension(:,:), pointer :: verticesOnCell + integer, dimension(:,:), pointer :: verticesOnEdge + real (kind=RKIND), dimension(:), pointer :: invAreaTriangle + integer, dimension(:,:), pointer :: kiteForCell + real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex + real (kind=RKIND), dimension(:), pointer :: fEdge + real (kind=RKIND), dimension(:), pointer :: fVertex + + nullify(mesh) + call mpas_pool_get_subpool(block % structs, 'mesh', mesh) + nullify(state) + call mpas_pool_get_subpool(block % structs, 'state', state) + nullify(diag) + call mpas_pool_get_subpool(block % structs, 'diag', diag) + + MPAS_ACC_TIMER_START('first_compute_solve_diagnostics [ACC_data_xfer]') + call mpas_pool_get_array(state, 'rho_zz', h, 1) + !$acc enter data create(h) + call mpas_pool_get_array(state, 'u', u, 1) + !$acc enter data copyin(u) + + call mpas_pool_get_array(diag, 'v', v) + !$acc enter data copyin(v) + call mpas_pool_get_array(diag, 'rho_edge', h_edge) + !$acc enter data copyin(h_edge) + call mpas_pool_get_array(diag, 'vorticity', vorticity) + !$acc enter data copyin(vorticity) + call mpas_pool_get_array(diag, 'divergence', divergence) + !$acc enter data copyin(divergence) + call mpas_pool_get_array(diag, 'ke', ke) + !$acc enter data copyin(ke) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + !$acc enter data copyin(pv_edge) + call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) + !$acc enter data copyin(pv_vertex) + call mpas_pool_get_array(diag, 'pv_cell', pv_cell) + !$acc enter data copyin(pv_cell) + call mpas_pool_get_array(diag, 'gradPVn', gradPVn) + !$acc enter data copyin(gradPVn) + call mpas_pool_get_array(diag, 'gradPVt', gradPVt) + !$acc enter data copyin(gradPVt) + + ! Required by atm_init_coupled_diagnostics + call mpas_pool_get_array(mesh, 'zz', zz) + !$acc enter data copyin(zz) + + call mpas_pool_get_array(mesh, 'zb_cell', zb_cell) + !$acc enter data copyin(zb_cell) + + call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) + !$acc enter data copyin(zb3_cell) + + call mpas_pool_get_array(mesh, 'fzm', fzm) + !$acc enter data copyin(fzm) + + call mpas_pool_get_array(mesh, 'fzp', fzp) + !$acc enter data copyin(fzp) + + call mpas_pool_get_array(mesh, 'zb', zb) + !$acc enter data copyin(zb) + + call mpas_pool_get_array(mesh, 'zb3', zb3) + !$acc enter data copyin(zb3) + + ! Required by atm_compute_solve_diagnostics + call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) + !$acc enter data copyin(dvEdge) + + call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) + !$acc enter data copyin(cellsOnEdge) + + call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) + !$acc enter data copyin(edgesOnCell) + + call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) + !$acc enter data copyin(nEdgesOnCell) + + call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign) + !$acc enter data copyin(edgesOnCell_sign) + + call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell) + !$acc enter data copyin(invAreaCell) + + call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge) + !$acc enter data copyin(invDvEdge) + + call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) + !$acc enter data copyin(dcEdge) + + call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) + !$acc enter data copyin(invDcEdge) + + call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) + !$acc enter data copyin(edgesOnEdge) + + call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex) + !$acc enter data copyin(edgesOnVertex) + + call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign) + !$acc enter data copyin(edgesOnVertex_sign) + + call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) + !$acc enter data copyin(nEdgesOnEdge) + + call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) + !$acc enter data copyin(weightsOnEdge) + + call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell) + !$acc enter data copyin(verticesOnCell) + + call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) + !$acc enter data copyin(verticesOnEdge) + + call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle) + !$acc enter data copyin(invAreaTriangle) + + call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell) + !$acc enter data copyin(kiteForCell) + + call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) + !$acc enter data copyin(kiteAreasOnVertex) + + call mpas_pool_get_array(mesh, 'fVertex', fVertex) + !$acc enter data copyin(fVertex) + + MPAS_ACC_TIMER_STOP('first_compute_solve_diagnostics [ACC_data_xfer]') +#endif + + end subroutine mpas_atm_pre_computesolvediag_h2d + + + subroutine mpas_atm_post_computesolvediag_d2h(block) + + implicit none + + type (block_type), intent(inout) :: block + + +#ifdef MPAS_OPENACC + type (mpas_pool_type), pointer :: mesh + type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: tend_physics + real (kind=RKIND), dimension(:,:), pointer :: rthdynten + + real (kind=RKIND), dimension(:,:), pointer :: h_edge, v, vorticity, ke, pv_edge, & + pv_vertex, pv_cell, gradPVn, gradPVt, divergence + real (kind=RKIND), dimension(:,:), pointer :: u, h + + real (kind=RKIND), dimension(:,:), pointer :: zz + real (kind=RKIND), dimension(:,:,:), pointer :: zb_cell + real (kind=RKIND), dimension(:,:,:), pointer :: zb3_cell + real (kind=RKIND), dimension(:), pointer :: fzm + real (kind=RKIND), dimension(:), pointer :: fzp + real (kind=RKIND), dimension(:,:,:), pointer :: zb + real (kind=RKIND), dimension(:,:,:), pointer :: zb3 + + + real (kind=RKIND), dimension(:), pointer :: dvEdge + integer, dimension(:,:), pointer :: cellsOnCell + integer, dimension(:,:), pointer :: cellsOnEdge + integer, dimension(:,:), pointer :: advCellsForEdge + integer, dimension(:,:), pointer :: edgesOnCell + integer, dimension(:), pointer :: nAdvCellsForEdge + integer, dimension(:), pointer :: nEdgesOnCell + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs + real (kind=RKIND), dimension(:,:), pointer :: adv_coefs_3rd + real (kind=RKIND), dimension(:,:), pointer :: edgesOnCell_sign + real (kind=RKIND), dimension(:), pointer :: invAreaCell + integer, dimension(:), pointer :: bdyMaskCell + integer, dimension(:), pointer :: bdyMaskEdge + real (kind=RKIND), dimension(:), pointer :: specZoneMaskEdge + real (kind=RKIND), dimension(:), pointer :: invDvEdge + real (kind=RKIND), dimension(:), pointer :: dcEdge + real (kind=RKIND), dimension(:), pointer :: invDcEdge + integer, dimension(:,:), pointer :: edgesOnEdge + integer, dimension(:,:), pointer :: edgesOnVertex + real (kind=RKIND), dimension(:,:), pointer :: edgesOnVertex_sign + integer, dimension(:), pointer :: nEdgesOnEdge + real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge + integer, dimension(:,:), pointer :: cellsOnVertex + integer, dimension(:,:), pointer :: verticesOnCell + integer, dimension(:,:), pointer :: verticesOnEdge + real (kind=RKIND), dimension(:), pointer :: invAreaTriangle + integer, dimension(:,:), pointer :: kiteForCell + real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex + real (kind=RKIND), dimension(:), pointer :: fEdge + real (kind=RKIND), dimension(:), pointer :: fVertex + + nullify(mesh) + call mpas_pool_get_subpool(block % structs, 'mesh', mesh) + nullify(state) + call mpas_pool_get_subpool(block % structs, 'state', state) + nullify(diag) + call mpas_pool_get_subpool(block % structs, 'diag', diag) + + MPAS_ACC_TIMER_START('first_compute_solve_diagnostics [ACC_data_xfer]') + + call mpas_pool_get_array(state, 'rho_zz', h, 1) + !$acc exit data copyout(h) + call mpas_pool_get_array(state, 'u', u, 1) + !$acc exit data copyout(u) + + call mpas_pool_get_array(diag, 'v', v) + !$acc exit data copyout(v) + call mpas_pool_get_array(diag, 'rho_edge', h_edge) + !$acc exit data copyout(h_edge) + call mpas_pool_get_array(diag, 'vorticity', vorticity) + !$acc exit data copyout(vorticity) + call mpas_pool_get_array(diag, 'divergence', divergence) + !$acc exit data copyout(divergence) + call mpas_pool_get_array(diag, 'ke', ke) + !$acc exit data copyout(ke) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + !$acc exit data copyout(pv_edge) + call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) + !$acc exit data copyout(pv_vertex) + call mpas_pool_get_array(diag, 'pv_cell', pv_cell) + !$acc exit data copyout(pv_cell) + call mpas_pool_get_array(diag, 'gradPVn', gradPVn) + !$acc exit data copyout(gradPVn) + call mpas_pool_get_array(diag, 'gradPVt', gradPVt) + !$acc exit data copyout(gradPVt) + + ! Required by atm_init_coupled_diagnostics + call mpas_pool_get_array(mesh, 'zz', zz) + !$acc exit data delete(zz) + + call mpas_pool_get_array(mesh, 'zb_cell', zb_cell) + !$acc exit data delete(zb_cell) + + call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) + !$acc exit data delete(zb3_cell) + + call mpas_pool_get_array(mesh, 'fzm', fzm) + !$acc exit data delete(fzm) + + call mpas_pool_get_array(mesh, 'fzp', fzp) + !$acc exit data delete(fzp) + + call mpas_pool_get_array(mesh, 'zb', zb) + !$acc exit data delete(zb) + + call mpas_pool_get_array(mesh, 'zb3', zb3) + !$acc exit data delete(zb3) + + + call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) + !$acc exit data delete(dvEdge) + + call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) + !$acc exit data delete(cellsOnEdge) + + call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) + !$acc exit data delete(edgesOnCell) + + call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) + !$acc exit data delete(nEdgesOnCell) + + call mpas_pool_get_array(mesh, 'edgesOnCell_sign', edgesOnCell_sign) + !$acc exit data delete(edgesOnCell_sign) + + call mpas_pool_get_array(mesh, 'invAreaCell', invAreaCell) + !$acc exit data delete(invAreaCell) + + call mpas_pool_get_array(mesh, 'invDvEdge', invDvEdge) + !$acc exit data delete(invDvEdge) + + call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) + !$acc exit data delete(dcEdge) + + call mpas_pool_get_array(mesh, 'invDcEdge', invDcEdge) + !$acc exit data delete(invDcEdge) + + call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) + !$acc exit data delete(edgesOnEdge) + + call mpas_pool_get_array(mesh, 'edgesOnVertex', edgesOnVertex) + !$acc exit data delete(edgesOnVertex) + + call mpas_pool_get_array(mesh, 'edgesOnVertex_sign', edgesOnVertex_sign) + !$acc exit data delete(edgesOnVertex_sign) + + call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) + !$acc exit data delete(nEdgesOnEdge) + + call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) + !$acc exit data delete(weightsOnEdge) + + call mpas_pool_get_array(mesh, 'verticesOnCell', verticesOnCell) + !$acc exit data delete(verticesOnCell) + + call mpas_pool_get_array(mesh, 'verticesOnEdge', verticesOnEdge) + !$acc exit data delete(verticesOnEdge) + + call mpas_pool_get_array(mesh, 'invAreaTriangle', invAreaTriangle) + !$acc exit data delete(invAreaTriangle) + + call mpas_pool_get_array(mesh, 'kiteForCell', kiteForCell) + !$acc exit data delete(kiteForCell) + + call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) + !$acc exit data delete(kiteAreasOnVertex) + + call mpas_pool_get_array(mesh, 'fVertex', fVertex) + !$acc exit data delete(fVertex) + + MPAS_ACC_TIMER_STOP('first_compute_solve_diagnostics [ACC_data_xfer]') +#endif + + end subroutine mpas_atm_post_computesolvediag_d2h + + subroutine mpas_atm_pre_dynamics_h2d(domain) + + implicit none + + type (domain_type), intent(inout) :: domain + + +#ifdef MPAS_OPENACC + type (mpas_pool_type), pointer :: mesh + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: tend + type (mpas_pool_type), pointer :: tend_physics + type (mpas_pool_type), pointer :: lbc + + logical, pointer :: config_apply_lbcs_ptr + logical :: config_apply_lbcs + + real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p + real (kind=RKIND), dimension(:,:), pointer :: ru_save + real (kind=RKIND), dimension(:,:), pointer :: rw, rw_p + real (kind=RKIND), dimension(:,:), pointer :: rw_save + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p + real (kind=RKIND), dimension(:,:), pointer :: exner, exner_base + real (kind=RKIND), dimension(:,:), pointer :: rtheta_base, rho_base + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_p, rho_pp, rho, theta, theta_base + real (kind=RKIND), dimension(:,:), pointer :: rho_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split + real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp + real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, pressure, v + real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke + real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri + real (kind=RKIND), dimension(:), pointer :: cofrz + real (kind=RKIND), dimension(:,:), pointer :: gradPVn, gradPVt + + + real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 + real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2 + real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2 + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2 + real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2 + real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split + + integer, pointer :: nCells_ptr + integer :: nCells + real (kind=RKIND), dimension(:,:), pointer :: uReconstructZonal, uReconstructMeridional, uReconstructX, uReconstructY, uReconstructZ + + real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend + real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler + real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy + real(kind=RKIND), dimension(:,:,:), pointer :: scalar_tend_save + + real (kind=RKIND), dimension(:,:), pointer :: rthdynten, divergence, vorticity + + real (kind=RKIND), dimension(:,:), pointer :: lbc_u, lbc_w, lbc_ru, lbc_rho_edge, lbc_rho, lbc_rtheta_m, lbc_rho_zz, lbc_theta + real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_u, lbc_tend_w, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_rho + real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_rtheta_m, lbc_tend_rho_zz, lbc_tend_theta + + real (kind=RKIND), dimension(:,:,:), pointer :: lbc_scalars, lbc_tend_scalars + + nullify(mesh) + nullify(state) + nullify(diag) + nullify(tend) + nullify(tend_physics) + nullify(lbc) + call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) + call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) + call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) + call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc) + + call mpas_pool_get_config(domain % blocklist % configs, 'config_apply_lbcs', config_apply_lbcs_ptr) + config_apply_lbcs = config_apply_lbcs_ptr + + MPAS_ACC_TIMER_START('atm_srk3 [ACC_data_xfer]') + call mpas_pool_get_array(diag, 'ru', ru) + !$acc enter data copyin(ru) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'ru_p', ru_p) + !$acc enter data copyin(ru_p) + call mpas_pool_get_array(diag, 'ru_save', ru_save) + !$acc enter data copyin(ru_save) + call mpas_pool_get_array(diag, 'rw', rw) + !$acc enter data copyin(rw) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rw_p', rw_p) + !$acc enter data copyin(rw_p) + call mpas_pool_get_array(diag, 'rw_save', rw_save) + !$acc enter data copyin(rw_save) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + !$acc enter data copyin(rtheta_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save) + !$acc enter data copyin(rtheta_p_save) + call mpas_pool_get_array(diag, 'exner', exner) + !$acc enter data copyin(exner) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'exner_base', exner_base) + !$acc enter data copyin(exner_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) + !$acc enter data copyin(rtheta_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rho_base', rho_base) + !$acc enter data copyin(rho_base) + call mpas_pool_get_array(diag, 'rho', rho) + !$acc enter data copyin(rho) + call mpas_pool_get_array(diag, 'theta', theta) + !$acc enter data copyin(theta) + call mpas_pool_get_array(diag, 'theta_base', theta_base) + !$acc enter data copyin(theta_base) + call mpas_pool_get_array(diag, 'rho_p', rho_p) + !$acc enter data copyin(rho_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save) + !$acc enter data copyin(rho_p_save) + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) + !$acc enter data copyin(rho_pp) + call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split) + !$acc enter data copyin(rho_zz_old_split) + call mpas_pool_get_array(diag, 'cqw', cqw) + !$acc enter data copyin(cqw) + call mpas_pool_get_array(diag, 'cqu', cqu) + !$acc enter data copyin(cqu) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + !$acc enter data copyin(pressure_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'pressure_base', pressure_base) + !$acc enter data copyin(pressure_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'pressure', pressure) + !$acc enter data copyin(pressure) + call mpas_pool_get_array(diag, 'v', v) + !$acc enter data copyin(v) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) + !$acc enter data copyin(rtheta_pp) + call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old) + !$acc enter data copyin(rtheta_pp_old) + call mpas_pool_get_array(diag, 'kdiff', kdiff) + !$acc enter data copyin(kdiff) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + !$acc enter data copyin(pv_edge) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) + !$acc enter data copyin(pv_vertex) + call mpas_pool_get_array(diag, 'pv_cell', pv_cell) + !$acc enter data copyin(pv_cell) + call mpas_pool_get_array(diag, 'rho_edge', rho_edge) + !$acc enter data copyin(rho_edge) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'h_divergence', h_divergence) + !$acc enter data copyin(h_divergence) + call mpas_pool_get_array(diag, 'ke', ke) + !$acc enter data copyin(ke) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'gradPVn', gradPVn) + !$acc enter data copyin(gradPVn) + call mpas_pool_get_array(diag, 'gradPVt', gradPVt) + !$acc enter data copyin(gradPVt) + + call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri) + !$acc enter data copyin(alpha_tri) + call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri) + !$acc enter data copyin(gamma_tri) + call mpas_pool_get_array(diag, 'a_tri', a_tri) + !$acc enter data copyin(a_tri) + call mpas_pool_get_array(diag, 'cofwr', cofwr) + !$acc enter data copyin(cofwr) + call mpas_pool_get_array(diag, 'cofwz', cofwz) + !$acc enter data copyin(cofwz) + call mpas_pool_get_array(diag, 'coftz', coftz) + !$acc enter data copyin(coftz) + call mpas_pool_get_array(diag, 'cofwt', cofwt) + !$acc enter data copyin(cofwt) + call mpas_pool_get_array(diag, 'cofrz', cofrz) + !$acc enter data copyin(cofrz) + call mpas_pool_get_array(diag, 'vorticity', vorticity) + !$acc enter data copyin(vorticity) + call mpas_pool_get_array(diag, 'divergence', divergence) + !$acc enter data copyin(divergence) + call mpas_pool_get_array(diag, 'ruAvg', ruAvg) + !$acc enter data copyin(ruAvg) + call mpas_pool_get_array(diag, 'ruAvg_split', ruAvg_split) + !$acc enter data copyin(ruAvg_split) + call mpas_pool_get_array(diag, 'wwAvg', wwAvg) + !$acc enter data copyin(wwAvg) + call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split) + !$acc enter data copyin(wwAvg_split) + + call mpas_pool_get_dimension(mesh, 'nCellsSolve', nCells_ptr) + nCells = nCells_ptr + call mpas_pool_get_array(diag, 'uReconstructX', uReconstructX) + !$acc enter data create(uReconstructX(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructY', uReconstructY) + !$acc enter data create(uReconstructY(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ) + !$acc enter data create(uReconstructZ(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal) + !$acc enter data copyin(uReconstructZonal(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional) + !$acc enter data copyin(uReconstructMeridional(:,1:nCells)) + + call mpas_pool_get_array(state, 'u', u_1, 1) + !$acc enter data copyin(u_1) + call mpas_pool_get_array(state, 'u', u_2, 2) + !$acc enter data copyin(u_2) + call mpas_pool_get_array(state, 'w', w_1, 1) + !$acc enter data copyin(w_1) + call mpas_pool_get_array(state, 'w', w_2, 2) + !$acc enter data copyin(w_2) + call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1) + !$acc enter data copyin(theta_m_1) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2) + !$acc enter data copyin(theta_m_2) + call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) + !$acc enter data copyin(rho_zz_1) + call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) + !$acc enter data copyin(rho_zz_2) + call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + !$acc enter data copyin(scalars_1) + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc enter data copyin(scalars_2) + + + call mpas_pool_get_array(tend, 'u', tend_ru) + !$acc enter data copyin(tend_ru) + call mpas_pool_get_array(tend, 'rho_zz', tend_rho) + !$acc enter data copyin(tend_rho) + call mpas_pool_get_array(tend, 'theta_m', tend_rt) + !$acc enter data copyin(tend_rt) + call mpas_pool_get_array(tend, 'w', tend_rw) + !$acc enter data copyin(tend_rw) + call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) + !$acc enter data copyin(rt_diabatic_tend) + call mpas_pool_get_array(tend, 'u_euler', tend_u_euler) + !$acc enter data copyin(tend_u_euler) + call mpas_pool_get_array(tend, 'theta_euler', tend_theta_euler) + !$acc enter data copyin(tend_theta_euler) + call mpas_pool_get_array(tend, 'w_euler', tend_w_euler) + !$acc enter data copyin(tend_w_euler) + call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf) + !$acc enter data copyin(tend_w_pgf) + call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy) + !$acc enter data copyin(tend_w_buoy) + call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save) + !$acc enter data copyin(scalar_tend_save) + + + if(config_apply_lbcs) then + call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2) + !$acc enter data copyin(lbc_u) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2) + !$acc enter data copyin(lbc_w) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2) + !$acc enter data copyin(lbc_ru) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2) + !$acc enter data copyin(lbc_rho_edge) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2) + !$acc enter data copyin(lbc_theta) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2) + !$acc enter data copyin(lbc_rtheta_m) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2) + !$acc enter data copyin(lbc_rho_zz) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2) + !$acc enter data copyin(lbc_rho) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2) + !$acc enter data copyin(lbc_scalars) + + + call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) + !$acc enter data copyin(lbc_tend_u) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) + !$acc enter data copyin(lbc_tend_ru) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) + !$acc enter data copyin(lbc_tend_rho_edge) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) + !$acc enter data copyin(lbc_tend_w) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) + !$acc enter data copyin(lbc_tend_theta) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) + !$acc enter data copyin(lbc_tend_rtheta_m) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) + !$acc enter data copyin(lbc_tend_rho_zz) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) + !$acc enter data copyin(lbc_tend_rho) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) + !$acc enter data copyin(lbc_tend_scalars) + end if + + call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) + !$acc enter data copyin(rthdynten) + + MPAS_ACC_TIMER_STOP('atm_srk3 [ACC_data_xfer]') +#endif + + end subroutine mpas_atm_pre_dynamics_h2d + + + subroutine mpas_atm_post_dynamics_d2h(domain) + + implicit none + + type (domain_type), intent(inout) :: domain + + +#ifdef MPAS_OPENACC + type (mpas_pool_type), pointer :: mesh + type (mpas_pool_type), pointer :: state + type (mpas_pool_type), pointer :: diag + type (mpas_pool_type), pointer :: tend + type (mpas_pool_type), pointer :: tend_physics + type (mpas_pool_type), pointer :: lbc + + logical, pointer :: config_apply_lbcs_ptr + logical :: config_apply_lbcs + + real (kind=RKIND), dimension(:,:), pointer :: ru, ru_p + real (kind=RKIND), dimension(:,:), pointer :: ru_save + real (kind=RKIND), dimension(:,:), pointer :: rw, rw_p + real (kind=RKIND), dimension(:,:), pointer :: rw_save + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p + real (kind=RKIND), dimension(:,:), pointer :: exner, exner_base + real (kind=RKIND), dimension(:,:), pointer :: rtheta_base, rho_base + real (kind=RKIND), dimension(:,:), pointer :: rtheta_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_p, rho_pp, rho, theta, theta_base + real (kind=RKIND), dimension(:,:), pointer :: rho_p_save + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_old_split + real (kind=RKIND), dimension(:,:), pointer :: cqw, rtheta_pp_old, rtheta_pp + real (kind=RKIND), dimension(:,:), pointer :: cqu, pressure_base, pressure_p, pressure, v + real (kind=RKIND), dimension(:,:), pointer :: kdiff, pv_edge, pv_vertex, pv_cell, rho_edge, h_divergence, ke + real (kind=RKIND), dimension(:,:), pointer :: cofwr, cofwz, coftz, cofwt, a_tri, alpha_tri, gamma_tri + real (kind=RKIND), dimension(:), pointer :: cofrz + real (kind=RKIND), dimension(:,:), pointer :: gradPVn, gradPVt + + + real (kind=RKIND), dimension(:,:), pointer :: u_1, u_2 + real (kind=RKIND), dimension(:,:), pointer :: w_1, w_2 + real (kind=RKIND), dimension(:,:), pointer :: theta_m_1, theta_m_2 + real (kind=RKIND), dimension(:,:), pointer :: rho_zz_1, rho_zz_2 + real (kind=RKIND), dimension(:,:,:), pointer :: scalars_1, scalars_2 + real (kind=RKIND), dimension(:,:), pointer :: ruAvg, wwAvg, ruAvg_split, wwAvg_split + + integer, pointer :: nCells_ptr + integer :: nCells + real (kind=RKIND), dimension(:,:), pointer :: uReconstructZonal, uReconstructMeridional, uReconstructX, uReconstructY, uReconstructZ + + real (kind=RKIND), dimension(:,:), pointer :: tend_ru, tend_rt, tend_rho, tend_rw, rt_diabatic_tend + real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler + real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy + real(kind=RKIND), dimension(:,:,:), pointer :: scalar_tend_save + + real (kind=RKIND), dimension(:,:), pointer :: rthdynten, divergence, vorticity + + real (kind=RKIND), dimension(:,:), pointer :: lbc_u, lbc_w, lbc_ru, lbc_rho_edge, lbc_rho, lbc_rtheta_m, lbc_rho_zz, lbc_theta + real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_u, lbc_tend_w, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_rho + real (kind=RKIND), dimension(:,:), pointer :: lbc_tend_rtheta_m, lbc_tend_rho_zz, lbc_tend_theta + + real (kind=RKIND), dimension(:,:,:), pointer :: lbc_scalars, lbc_tend_scalars + + nullify(mesh) + nullify(state) + nullify(diag) + nullify(tend) + nullify(tend_physics) + nullify(lbc) + call mpas_pool_get_subpool(domain % blocklist % structs, 'mesh', mesh) + call mpas_pool_get_subpool(domain % blocklist % structs, 'state', state) + call mpas_pool_get_subpool(domain % blocklist % structs, 'diag', diag) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend', tend) + call mpas_pool_get_subpool(domain % blocklist % structs, 'tend_physics', tend_physics) + call mpas_pool_get_subpool(domain % blocklist % structs, 'lbc', lbc) + + call mpas_pool_get_config(domain % blocklist % configs, 'config_apply_lbcs', config_apply_lbcs_ptr) + config_apply_lbcs = config_apply_lbcs_ptr + + MPAS_ACC_TIMER_START('atm_srk3 [ACC_data_xfer]') + call mpas_pool_get_array(diag, 'ru', ru) + !$acc exit data copyout(ru) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'ru_p', ru_p) + !$acc exit data copyout(ru_p) + call mpas_pool_get_array(diag, 'ru_save', ru_save) + !$acc exit data delete(ru_save) + call mpas_pool_get_array(diag, 'rw', rw) + !$acc exit data copyout(rw) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rw_p', rw_p) + !$acc exit data copyout(rw_p) + call mpas_pool_get_array(diag, 'rw_save', rw_save) + !$acc exit data delete(rw_save) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + !$acc exit data copyout(rtheta_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rtheta_p_save', rtheta_p_save) + !$acc exit data delete(rtheta_p_save) + call mpas_pool_get_array(diag, 'exner', exner) + !$acc exit data copyout(exner) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'exner_base', exner_base) + !$acc exit data copyout(exner_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) + !$acc exit data copyout(rtheta_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rho_base', rho_base) + !$acc exit data copyout(rho_base) + call mpas_pool_get_array(diag, 'rho', rho) + !$acc exit data copyout(rho) + call mpas_pool_get_array(diag, 'theta', theta) + !$acc exit data copyout(theta) + call mpas_pool_get_array(diag, 'theta_base', theta_base) + !$acc exit data copyout(theta_base) + call mpas_pool_get_array(diag, 'rho_p', rho_p) + !$acc exit data copyout(rho_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'rho_p_save', rho_p_save) + !$acc exit data delete(rho_p_save) + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) + !$acc exit data copyout(rho_pp) + call mpas_pool_get_array(diag, 'rho_zz_old_split', rho_zz_old_split) + !$acc exit data delete(rho_zz_old_split) + call mpas_pool_get_array(diag, 'cqw', cqw) + !$acc exit data delete(cqw) + call mpas_pool_get_array(diag, 'cqu', cqu) + !$acc exit data copyout(cqu) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + !$acc exit data copyout(pressure_p) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'pressure_base', pressure_base) + !$acc exit data copyout(pressure_base) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(diag, 'pressure', pressure) + !$acc exit data copyout(pressure) + call mpas_pool_get_array(diag, 'v', v) + !$acc exit data copyout(v) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) + !$acc exit data copyout(rtheta_pp) + call mpas_pool_get_array(diag, 'rtheta_pp_old', rtheta_pp_old) + !$acc exit data copyout(rtheta_pp_old) + call mpas_pool_get_array(diag, 'kdiff', kdiff) + !$acc exit data copyout(kdiff) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + !$acc exit data copyout(pv_edge) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'pv_vertex', pv_vertex) + !$acc exit data copyout(pv_vertex) + call mpas_pool_get_array(diag, 'pv_cell', pv_cell) + !$acc exit data delete(pv_cell) + call mpas_pool_get_array(diag, 'rho_edge', rho_edge) + !$acc exit data copyout(rho_edge) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'h_divergence', h_divergence) + !$acc exit data copyout(h_divergence) + call mpas_pool_get_array(diag, 'ke', ke) + !$acc exit data copyout(ke) ! use values from atm_compute_solve_diagnostics + call mpas_pool_get_array(diag, 'gradPVn', gradPVn) + !$acc exit data delete(gradPVn) + call mpas_pool_get_array(diag, 'gradPVt', gradPVt) + !$acc exit data delete(gradPVt) + + call mpas_pool_get_array(diag, 'alpha_tri', alpha_tri) + !$acc exit data delete(alpha_tri) + call mpas_pool_get_array(diag, 'gamma_tri', gamma_tri) + !$acc exit data delete(gamma_tri) + call mpas_pool_get_array(diag, 'a_tri', a_tri) + !$acc exit data delete(a_tri) + call mpas_pool_get_array(diag, 'cofwr', cofwr) + !$acc exit data delete(cofwr) + call mpas_pool_get_array(diag, 'cofwz', cofwz) + !$acc exit data delete(cofwz) + call mpas_pool_get_array(diag, 'coftz', coftz) + !$acc exit data delete(coftz) + call mpas_pool_get_array(diag, 'cofwt', cofwt) + !$acc exit data delete(cofwt) + call mpas_pool_get_array(diag, 'cofrz', cofrz) + !$acc exit data delete(cofrz) + call mpas_pool_get_array(diag, 'vorticity', vorticity) + !$acc exit data copyout(vorticity) + call mpas_pool_get_array(diag, 'divergence', divergence) + !$acc exit data copyout(divergence) + call mpas_pool_get_array(diag, 'ruAvg', ruAvg) + !$acc exit data copyout(ruAvg) + call mpas_pool_get_array(diag, 'ruAvg_split', ruAvg_split) + !$acc exit data copyout(ruAvg_split) + call mpas_pool_get_array(diag, 'wwAvg', wwAvg) + !$acc exit data copyout(wwAvg) + call mpas_pool_get_array(diag, 'wwAvg_split', wwAvg_split) + !$acc exit data copyout(wwAvg_split) + + call mpas_pool_get_dimension(mesh, 'nCellsSolve', nCells_ptr) + nCells = nCells_ptr + call mpas_pool_get_array(diag, 'uReconstructX', uReconstructX) + !$acc exit data copyout(uReconstructX(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructY', uReconstructY) + !$acc exit data copyout(uReconstructY(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ) + !$acc exit data copyout(uReconstructZ(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal) + !$acc exit data copyout(uReconstructZonal(:,1:nCells)) + call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional) + !$acc exit data copyout(uReconstructMeridional(:,1:nCells)) + + call mpas_pool_get_array(state, 'u', u_1, 1) + !$acc exit data copyout(u_1) + call mpas_pool_get_array(state, 'u', u_2, 2) + !$acc exit data copyout(u_2) + call mpas_pool_get_array(state, 'w', w_1, 1) + !$acc exit data copyout(w_1) + call mpas_pool_get_array(state, 'w', w_2, 2) + !$acc exit data copyout(w_2) + call mpas_pool_get_array(state, 'theta_m', theta_m_1, 1) + !$acc exit data copyout(theta_m_1) ! use values from atm_init_coupled_diagnostics + call mpas_pool_get_array(state, 'theta_m', theta_m_2, 2) + !$acc exit data copyout(theta_m_2) ! Delete gives incorrect results + call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) + !$acc exit data copyout(rho_zz_1) + call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) + !$acc exit data copyout(rho_zz_2) + call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + !$acc exit data copyout(scalars_1) + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc exit data copyout(scalars_2) ! Delete gives incorrect results + + + call mpas_pool_get_array(tend, 'u', tend_ru) + !$acc exit data copyout(tend_ru) + call mpas_pool_get_array(tend, 'rho_zz', tend_rho) + !$acc exit data copyout(tend_rho) + call mpas_pool_get_array(tend, 'theta_m', tend_rt) + !$acc exit data copyout(tend_rt) + call mpas_pool_get_array(tend, 'w', tend_rw) + !$acc exit data copyout(tend_rw) + call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) + !$acc exit data copyout(rt_diabatic_tend) + call mpas_pool_get_array(tend, 'u_euler', tend_u_euler) + !$acc exit data copyout(tend_u_euler) + call mpas_pool_get_array(tend, 'theta_euler', tend_theta_euler) + !$acc exit data copyout(tend_theta_euler) + call mpas_pool_get_array(tend, 'w_euler', tend_w_euler) + !$acc exit data copyout(tend_w_euler) + call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf) + !$acc exit data copyout(tend_w_pgf) + call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy) + !$acc exit data copyout(tend_w_buoy) + call mpas_pool_get_array(tend, 'scalars_tend', scalar_tend_save) + !$acc exit data copyout(scalar_tend_save) + + if(config_apply_lbcs) then + call mpas_pool_get_array(lbc, 'lbc_u', lbc_u, 2) + !$acc exit data delete(lbc_u) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_w, 2) + !$acc exit data delete(lbc_w) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_ru, 2) + !$acc exit data delete(lbc_ru) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_rho_edge, 2) + !$acc exit data delete(lbc_rho_edge) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_theta, 2) + !$acc exit data delete(lbc_theta) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_rtheta_m, 2) + !$acc exit data delete(lbc_rtheta_m) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_rho_zz, 2) + !$acc exit data delete(lbc_rho_zz) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_rho, 2) + !$acc exit data delete(lbc_rho) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_scalars, 2) + !$acc exit data delete(lbc_scalars) + + call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) + !$acc exit data delete(lbc_tend_u) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) + !$acc exit data delete(lbc_tend_ru) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) + !$acc exit data delete(lbc_tend_rho_edge) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) + !$acc exit data delete(lbc_tend_w) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) + !$acc exit data delete(lbc_tend_theta) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) + !$acc exit data delete(lbc_tend_rtheta_m) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) + !$acc exit data delete(lbc_tend_rho_zz) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) + !$acc exit data delete(lbc_tend_rho) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) + !$acc exit data delete(lbc_tend_scalars) + end if + + call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) + !$acc exit data copyout(rthdynten) + MPAS_ACC_TIMER_STOP('atm_srk3 [ACC_data_xfer]') #endif - end subroutine mpas_atm_dynamics_init + end subroutine mpas_atm_post_dynamics_d2h !---------------------------------------------------------------------------- @@ -546,6 +1519,11 @@ subroutine mpas_atm_dynamics_finalize(domain) real (kind=RKIND), dimension(:), pointer :: angleEdge real (kind=RKIND), dimension(:), pointer :: meshScalingDel2 real (kind=RKIND), dimension(:), pointer :: meshScalingDel4 + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2 + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s2 + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_cs + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_s #endif @@ -731,6 +1709,21 @@ subroutine mpas_atm_dynamics_finalize(domain) call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) !$acc exit data delete(meshScalingDel4) + + call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2) + !$acc exit data delete(deformation_coef_c2) + + call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2) + !$acc exit data delete(deformation_coef_s2) + + call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs) + !$acc exit data delete(deformation_coef_cs) + + call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c) + !$acc exit data delete(deformation_coef_c) + + call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s) + !$acc exit data delete(deformation_coef_s) #endif end subroutine mpas_atm_dynamics_finalize @@ -774,12 +1767,14 @@ subroutine atm_timestep(domain, dt, nowTime, itimestep, exchange_halo_group) config_apply_lbcs = config_apply_lbcs_ptr + call mpas_atm_pre_dynamics_h2d(domain) if (trim(config_time_integration) == 'SRK3') then call atm_srk3(domain, dt, itimestep, exchange_halo_group) else call mpas_log_write('Unknown time integration option '//trim(config_time_integration), messageType=MPAS_LOG_ERR) call mpas_log_write('Currently, only ''SRK3'' is supported.', messageType=MPAS_LOG_CRIT) end if + call mpas_atm_post_dynamics_d2h(domain) call mpas_set_timeInterval(dtInterval, dt=dt) currTime = nowTime + dtInterval @@ -850,6 +1845,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) logical, pointer :: config_scalar_advection logical, pointer :: config_positive_definite logical, pointer :: config_monotonic + logical, pointer :: config_gpu_aware_mpi character (len=StrKIND), pointer :: config_microp_scheme character (len=StrKIND), pointer :: config_convection_scheme @@ -873,6 +1869,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) real (kind=RKIND), dimension(:,:,:), pointer :: scalars, scalars_1, scalars_2 real (kind=RKIND), dimension(:,:), pointer :: rqvdynten, rthdynten, theta_m + real (kind=RKIND), dimension(:,:), pointer :: pressure_p, rtheta_p, exner, tend_u + real (kind=RKIND), dimension(:,:), pointer :: rho_pp, rtheta_pp, ru_p, rw_p, pv_edge, rho_edge real (kind=RKIND) :: theta_local, fac_m #ifndef MPAS_CAM_DYCORE @@ -892,6 +1890,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_pool_get_config(block % configs, 'config_scalar_advection', config_scalar_advection) call mpas_pool_get_config(block % configs, 'config_positive_definite', config_positive_definite) call mpas_pool_get_config(block % configs, 'config_monotonic', config_monotonic) + call mpas_pool_get_config(block % configs, 'config_gpu_aware_mpi', config_gpu_aware_mpi) call mpas_pool_get_config(block % configs, 'config_IAU_option', config_IAU_option) ! config variables for dynamics-transport splitting, WCS 18 November 2014 call mpas_pool_get_config(block % configs, 'config_split_dynamics_transport', config_split_dynamics_transport) @@ -1040,7 +2039,15 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! ! Communicate halos for theta_m, scalars, pressure_p, and rtheta_p ! - call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'theta_m', theta_m, 1) + call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + !$acc update self(theta_m,scalars_1,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:theta_m,scalars,pressure_p,rtheta_p', config_gpu_aware_mpi) + !$acc update device(theta_m,scalars_1,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_timer_start('atm_rk_integration_setup') @@ -1075,6 +2082,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_timer_stop('atm_compute_moist_coefficients') #ifdef DO_PHYSICS + call update_d2h_pre_physics_get_tend(block % configs, state, diag, tend) call mpas_timer_start('physics_get_tend') rk_step = 1 dynamics_substep = 1 @@ -1083,6 +2091,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) tend_ru_physics, tend_rtheta_physics, tend_rho_physics, & exchange_halo_group ) call mpas_timer_stop('physics_get_tend') + call update_h2d_post_physics_get_tend(block % configs, state, diag, tend) #else #ifndef MPAS_CAM_DYCORE ! @@ -1098,10 +2107,13 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! IAU - Incremental Analysis Update ! if (trim(config_IAU_option) /= 'off') then + call update_d2h_pre_add_tend_anal_incr(block % configs, block % structs) call atm_add_tend_anal_incr(block % configs, block % structs, itimestep, dt, & tend_ru_physics, tend_rtheta_physics, tend_rho_physics) end if + !$acc enter data copyin(tend_rtheta_physics,tend_rho_physics,tend_ru_physics) + DYNAMICS_SUBSTEPS : do dynamics_substep = 1, dynamics_split @@ -1121,8 +2133,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) !$OMP END PARALLEL DO call mpas_timer_stop('atm_compute_vert_imp_coefs') - call exchange_halo_group(domain, 'dynamics:exner') - + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(diag, 'exner', exner) + !$acc update self(exner) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:exner', config_gpu_aware_mpi) + !$acc update device(exner) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! BEGIN Runge-Kutta loop @@ -1171,7 +2187,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) !$OMP PARALLEL DO do thread=1,nThreads - call atm_compute_dyn_tend( tend, tend_physics, state, diag, mesh, block % configs, nVertLevels, rk_step, dt, & + call atm_compute_dyn_tend( tend, tend_physics, state, diag, mesh, diag_physics, & + block % configs, nVertLevels, rk_step, dynamics_substep, dt, & cellThreadStart(thread), cellThreadEnd(thread), & vertexThreadStart(thread), vertexThreadEnd(thread), & edgeThreadStart(thread), edgeThreadEnd(thread), & @@ -1200,7 +2217,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) !*********************************** ! tend_u - call exchange_halo_group(domain, 'dynamics:tend_u') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(tend, 'u', tend_u) + !$acc update self(tend_u) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:tend_u', config_gpu_aware_mpi) + !$acc update device(tend_u) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_timer_start('small_step_prep') @@ -1276,7 +2298,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) do small_step = 1, number_sub_steps(rk_step) - call exchange_halo_group(domain, 'dynamics:rho_pp') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) + !$acc update self(rho_pp) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:rho_pp', config_gpu_aware_mpi) + !$acc update device(rho_pp) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_timer_start('atm_advance_acoustic_step') @@ -1298,8 +2325,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! rtheta_pp ! This is the only communications needed during the acoustic steps because we solve for u on all edges of owned cells - - call exchange_halo_group(domain, 'dynamics:rtheta_pp') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) + !$acc update self(rtheta_pp) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:rtheta_pp', config_gpu_aware_mpi) + !$acc update device(rtheta_pp) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') ! complete update of horizontal momentum by including 3d divergence damping at the end of the acoustic step @@ -1319,7 +2350,15 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! ! Communicate halos for rw_p[1,2], ru_p[1,2], rho_pp[1,2], rtheta_pp[2] ! - call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(diag, 'ru_p', ru_p) + call mpas_pool_get_array(diag, 'rw_p', rw_p) + call mpas_pool_get_array(diag, 'rho_pp', rho_pp) + call mpas_pool_get_array(diag, 'rtheta_pp', rtheta_pp) + !$acc update self(rw_p,ru_p,rho_pp,rtheta_pp) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:rw_p,ru_p,rho_pp,rtheta_pp', config_gpu_aware_mpi) + !$acc update device(rw_p,ru_p,rho_pp,rtheta_pp) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') call mpas_timer_start('atm_recover_large_step_variables') @@ -1354,7 +2393,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_atm_get_bdy_state(clock, block, nVertLevels, nEdges, 'u', time_dyn_step, ru_driving_values) ! do this inline at present - it is simple enough - !$acc enter data copyin(u) !$acc parallel default(present) !$acc loop gang worker do iEdge = 1, nEdgesSolve @@ -1366,12 +2404,10 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) end if end do !$acc end parallel - !$acc exit data copyout(u) call mpas_atm_get_bdy_state(clock, block, nVertLevels, nEdges, 'ru', time_dyn_step, ru_driving_values) call mpas_pool_get_array(diag, 'ru', u) ! do this inline at present - it is simple enough - !$acc enter data copyin(u) !$acc parallel default(present) !$acc loop gang worker do iEdge = 1, nEdges @@ -1383,7 +2419,6 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) end if end do !$acc end parallel - !$acc exit data copyout(u) deallocate(ru_driving_values) @@ -1391,12 +2426,17 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) !------------------------------------------------------------------- + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'u', u, 2) + !$acc update self(u) if (.not. config_gpu_aware_mpi) ! u if (config_apply_lbcs) then - call exchange_halo_group(domain, 'dynamics:u_123') + call exchange_halo_group(domain, 'dynamics:u_123', config_gpu_aware_mpi) else - call exchange_halo_group(domain, 'dynamics:u_3') + call exchange_halo_group(domain, 'dynamics:u_3', config_gpu_aware_mpi) end if + !$acc update device(u) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') ! scalar advection: RK3 scheme of Skamarock and Gassmann (2011). ! PD or monotonicity constraints applied only on the final Runge-Kutta substep. @@ -1404,11 +2444,16 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & - config_time_integration_order, config_split_dynamics_transport, exchange_halo_group) + config_time_integration_order, config_split_dynamics_transport, config_gpu_aware_mpi, exchange_halo_group) if (config_apply_lbcs) then ! adjust boundary tendencies for regional_MPAS scalar transport - call exchange_halo_group(domain, 'dynamics:scalars') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -1460,17 +2505,27 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call mpas_timer_stop('atm_compute_solve_diagnostics') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'w', w, 2) + call mpas_pool_get_array(diag, 'pv_edge', pv_edge) + call mpas_pool_get_array(diag, 'rho_edge', rho_edge) + !$acc update self(w,pv_edge,rho_edge) if (.not. config_gpu_aware_mpi) if (config_scalar_advection .and. (.not. config_split_dynamics_transport) ) then ! ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2], scalars[1,2] ! - call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars') + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge,scalars', config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) else ! ! Communicate halos for w[1,2], pv_edge[1,2], rho_edge[1,2] ! - call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge') + call exchange_halo_group(domain, 'dynamics:w,pv_edge,rho_edge', config_gpu_aware_mpi) end if + !$acc update device(w,pv_edge,rho_edge) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') ! set the zero-gradient condition on w for regional_MPAS @@ -1483,8 +2538,13 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) end do !$OMP END PARALLEL DO + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') ! w halo values needs resetting after regional boundary update - call exchange_halo_group(domain, 'dynamics:w') + call mpas_pool_get_array(state, 'w', w, 2) + !$acc update self(w) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:w', config_gpu_aware_mpi) + !$acc update device(w) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') end if ! end of regional_MPAS addition @@ -1495,7 +2555,14 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! ! Communicate halos for theta_m[1,2], pressure_p[1,2], and rtheta_p[1,2] ! - call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'theta_m', theta_m, 2) + call mpas_pool_get_array(diag, 'pressure_p', pressure_p) + call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) + !$acc update self(theta_m,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:theta_m,pressure_p,rtheta_p', config_gpu_aware_mpi) + !$acc update device(theta_m,pressure_p,rtheta_p) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') ! ! Note: A halo exchange for 'exner' here as well as after the call @@ -1532,6 +2599,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) deallocate(qtot) ! we are finished with these now + !$acc exit data delete(tend_rtheta_physics,tend_rho_physics,tend_ru_physics) #ifndef MPAS_CAM_DYCORE call mpas_deallocate_scratch_field(tend_rtheta_physicsField) call mpas_deallocate_scratch_field(tend_rho_physicsField) @@ -1555,12 +2623,17 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) call advance_scalars('scalars', domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & - config_time_integration_order, config_split_dynamics_transport, exchange_halo_group) + config_time_integration_order, config_split_dynamics_transport, config_gpu_aware_mpi, exchange_halo_group) if (config_apply_lbcs) then ! adjust boundary tendencies for regional_MPAS scalar transport + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') ! need to fill halo for horizontal filter - call exchange_halo_group(domain, 'dynamics:scalars') + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -1586,7 +2659,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) !------------------------------------------------------------------------------------------------------------------------ if (rk_step < 3) then - call exchange_halo_group(domain, 'dynamics:scalars') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') end if end do RK3_SPLIT_TRANSPORT @@ -1608,7 +2686,8 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) uReconstructY, & uReconstructZ, & uReconstructZonal, & - uReconstructMeridional & + uReconstructMeridional, & + lACC = .true. & ) @@ -1618,16 +2697,24 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) ! #ifdef DO_PHYSICS + MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer') call mpas_pool_get_array(state, 'scalars', scalars_1, 1) + !$acc update self(scalars_1) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) + MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer') if(config_convection_scheme == 'cu_grell_freitas' .or. & config_convection_scheme == 'cu_ntiedtke') then + MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer') call mpas_pool_get_array(tend_physics, 'rqvdynten', rqvdynten) call mpas_pool_get_array(state, 'theta_m', theta_m, 2) + !$acc update self(theta_m) call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) + !$acc update self(rthdynten) + MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer') !NOTE: The calculation of the tendency due to horizontal and vertical advection for the water vapor mixing ratio !requires that the subroutine atm_advance_scalars_mono was called on the third Runge Kutta step, so that a halo @@ -1652,8 +2739,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) where ( scalars_2(:,:,:) < 0.0) & scalars_2(:,:,:) = 0.0 + MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer') + !$acc update device(scalars_2, rthdynten) + MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer') !call microphysics schemes: if (trim(config_microp_scheme) /= 'off') then + call update_d2h_pre_microphysics( block % configs, state, diag, 2) call mpas_timer_start('microphysics') !$OMP PARALLEL DO do thread=1,nThreads @@ -1662,6 +2753,7 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) end do !$OMP END PARALLEL DO call mpas_timer_stop('microphysics') + call update_h2d_post_microphysics( block % configs, state, diag, tend, 2) end if ! @@ -1699,7 +2791,12 @@ subroutine atm_srk3(domain, dt, itimestep, exchange_halo_group) if (config_apply_lbcs) then ! adjust boundary values for regional_MPAS scalar transport - call exchange_halo_group(domain, 'dynamics:scalars') + MPAS_ACC_TIMER_START('atm_srk3: halo_exchanges + ACC_data_xfer') + call mpas_pool_get_array(state, 'scalars', scalars_2, 2) + !$acc update self(scalars_2) if (.not. config_gpu_aware_mpi) + call exchange_halo_group(domain, 'dynamics:scalars', config_gpu_aware_mpi) + !$acc update device(scalars_2) if (.not. config_gpu_aware_mpi) + MPAS_ACC_TIMER_STOP('atm_srk3: halo_exchanges + ACC_data_xfer') allocate(scalars_driving(num_scalars,nVertLevels,nCells+1)) @@ -1752,7 +2849,7 @@ end subroutine atm_srk3 ! !----------------------------------------------------------------------- subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_monotonic, config_positive_definite, & - config_time_integration_order, config_split_dynamics_transport, exchange_halo_group) + config_time_integration_order, config_split_dynamics_transport, config_gpu_aware_mpi, exchange_halo_group) implicit none @@ -1765,6 +2862,7 @@ subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_mono logical, intent(in) :: config_positive_definite integer, intent(in) :: config_time_integration_order logical, intent(in) :: config_split_dynamics_transport + logical, intent(in) :: config_gpu_aware_mpi procedure (halo_exchange_routine) :: exchange_halo_group ! Local variables @@ -1896,7 +2994,7 @@ subroutine advance_scalars(field_name, domain, rk_step, rk_timestep, config_mono edgeThreadStart(thread), edgeThreadEnd(thread), & cellSolveThreadStart(thread), cellSolveThreadEnd(thread), & scalar_old_arr, scalar_new_arr, s_max_arr, s_min_arr, wdtn_arr, & - flux_array, flux_upwind_tmp_arr, flux_tmp_arr, & + flux_array, flux_upwind_tmp_arr, flux_tmp_arr, config_gpu_aware_mpi, & exchange_halo_group, & advance_density=config_split_dynamics_transport, rho_zz_int=rho_zz_int) end if @@ -1976,12 +3074,6 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & call mpas_pool_get_array(state, 'scalars', scalars_1, 1) call mpas_pool_get_array(state, 'scalars', scalars_2, 2) - MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]') - !$acc enter data create(ru_save, u_2, rw_save, rtheta_p_save, rho_p_save, & - !$acc w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) & - !$acc copyin(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, & - !$acc rho_zz_1, scalars_1) - MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]') !$acc kernels theta_m_2(:,cellEnd+1) = 0.0_RKIND @@ -2029,12 +3121,6 @@ subroutine atm_rk_integration_setup( state, diag, nVertLevels, num_scalars, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_rk_integration_setup [ACC_data_xfer]') - !$acc exit data copyout(ru_save, rw_save, rtheta_p_save, rho_p_save, u_2, & - !$acc w_2, theta_m_2, rho_zz_2, rho_zz_old_split, scalars_2) & - !$acc delete(ru, rw, rtheta_p, rho_p, u_1, w_1, theta_m_1, & - !$acc rho_zz_1, scalars_1) - MPAS_ACC_TIMER_STOP('atm_rk_integration_setup [ACC_data_xfer]') end subroutine atm_rk_integration_setup @@ -2085,11 +3171,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & moist_start = moist_start_ptr moist_end = moist_end_ptr - MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') - !$acc enter data create(cqw, cqu) & - !$acc copyin(scalars) - MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang worker ! do iCell = cellSolveStart,cellSolveEnd @@ -2138,10 +3219,6 @@ subroutine atm_compute_moist_coefficients( dims, state, diag, mesh, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_compute_moist_coefficients [ACC_data_xfer]') - !$acc exit data copyout(cqw, cqu) & - !$acc delete(scalars) - MPAS_ACC_TIMER_STOP('atm_compute_moist_coefficients [ACC_data_xfer]') end subroutine atm_compute_moist_coefficients @@ -2273,11 +3350,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, real (kind=RKIND) :: dtseps, c2, qtotal, rcv real (kind=RKIND), dimension( nVertLevels ) :: b_tri, c_tri - MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') - !$acc enter data copyin(cqw, p, t, rb, rtb, rt, pb) - !$acc enter data create(cofrz, cofwr, cofwz, coftz, cofwt, a_tri, b_tri, & - !$acc c_tri, alpha_tri, gamma_tri) - MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') + !$acc enter data create(b_tri, c_tri) ! set coefficients dtseps = .5*dts*(1.+epssm) @@ -2357,11 +3430,7 @@ subroutine atm_compute_vert_imp_coefs_work(nCells, moist_start, moist_end, dts, end do ! loop over cells !$acc end parallel - MPAS_ACC_TIMER_START('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') - !$acc exit data copyout(cofrz, cofwr, cofwz, coftz, cofwt, a_tri, b_tri, & - !$acc c_tri, alpha_tri, gamma_tri) - !$acc exit data delete(cqw, p, t, rb, rtb, rt, pb) - MPAS_ACC_TIMER_STOP('atm_compute_vert_imp_coefs_work [ACC_data_xfer]') + !$acc exit data delete(b_tri, c_tri) end subroutine atm_compute_vert_imp_coefs_work @@ -2465,9 +3534,6 @@ subroutine atm_set_smlstep_pert_variables_work(nCells, nEdges, & integer :: iCell, iEdge, i, k real (kind=RKIND) :: flux - MPAS_ACC_TIMER_START('atm_set_smlstep_pert_variables [ACC_data_xfer]') - !$acc enter data copyin(u_tend, w_tend) - MPAS_ACC_TIMER_STOP('atm_set_smlstep_pert_variables [ACC_data_xfer]') ! we solve for omega instead of w (see Klemp et al MWR 2007), ! so here we change the w_p tendency to an omega_p tendency @@ -2500,10 +3566,6 @@ subroutine atm_set_smlstep_pert_variables_work(nCells, nEdges, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_set_smlstep_pert_variables [ACC_data_xfer]') - !$acc exit data delete(u_tend) - !$acc exit data copyout(w_tend) - MPAS_ACC_TIMER_STOP('atm_set_smlstep_pert_variables [ACC_data_xfer]') end subroutine atm_set_smlstep_pert_variables_work @@ -2736,17 +3798,6 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart resm = (1.0 - epssm) / (1.0 + epssm) rdts = 1./dts - MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]') - !$acc enter data copyin(exner,cqu,cofwt,coftz,cofrz,cofwr,cofwz, & - !$acc a_tri,alpha_tri,gamma_tri,rho_zz,theta_m,w, & - !$acc tend_ru,tend_rho,tend_rt,tend_rw,rw,rw_save) - !$acc enter data create(rtheta_pp_old) - if(small_step == 1) then - !$acc enter data create(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p) - else - !$acc enter data copyin(ru_p,ruAvg,rho_pp,rtheta_pp,wwAvg,rw_p) - end if - MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]') if(small_step /= 1) then ! not needed on first small step @@ -2973,13 +4024,6 @@ subroutine atm_advance_acoustic_step_work(nCells, nEdges, nCellsSolve, cellStart end do ! end of loop over cells !$acc end parallel - MPAS_ACC_TIMER_START('atm_advance_acoustic_step [ACC_data_xfer]') - !$acc exit data delete(exner,cqu,cofwt,coftz,cofrz,cofwr,cofwz, & - !$acc a_tri,alpha_tri,gamma_tri,rho_zz,theta_m,w, & - !$acc tend_ru,tend_rho,tend_rt,tend_rw,rw,rw_save) - !$acc exit data copyout(rtheta_pp_old,ru_p,ruAvg,rho_pp, & - !$acc rtheta_pp,wwAvg,rw_p) - MPAS_ACC_TIMER_STOP('atm_advance_acoustic_step [ACC_data_xfer]') end subroutine atm_advance_acoustic_step_work @@ -3031,9 +4075,6 @@ subroutine atm_divergence_damping_3d( state, diag, mesh, configs, dts, edgeStart nCellsSolve = nCellsSolve_ptr nVertLevels = nVertLevels_ptr - MPAS_ACC_TIMER_START('atm_divergence_damping_3d [ACC_data_xfer]') - !$acc enter data copyin(ru_p, rtheta_pp, rtheta_pp_old, theta_m) - MPAS_ACC_TIMER_STOP('atm_divergence_damping_3d [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang worker @@ -3066,10 +4107,6 @@ subroutine atm_divergence_damping_3d( state, diag, mesh, configs, dts, edgeStart end do ! end loop over edges !$acc end parallel - MPAS_ACC_TIMER_START('atm_divergence_damping_3d [ACC_data_xfer]') - !$acc exit data copyout(ru_p) & - !$acc delete(rtheta_pp, rtheta_pp_old, theta_m) - MPAS_ACC_TIMER_STOP('atm_divergence_damping_3d [ACC_data_xfer]') end subroutine atm_divergence_damping_3d @@ -3260,17 +4297,6 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE integer :: i, iCell, iEdge, k, cell1, cell2 real (kind=RKIND) :: invNs, rcv, p0, flux - MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]') - !$acc enter data copyin(rho_p_save,rho_pp,rho_base,rw_save,rw_p, & - !$acc rtheta_p_save,rtheta_pp,rtheta_base, & - !$acc ru_save,ru_p,wwAvg,ruAvg) & - !$acc create(rho_zz,rho_p,rw,w,rtheta_p,theta_m, & - !$acc ru,u) - if (rk_step == 3) then - !$acc enter data copyin(rt_diabatic_tend,exner_base) & - !$acc create(exner,pressure_p) - end if - MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]') rcv = rgas/(cp-rgas) p0 = 1.0e+05 ! this should come from somewhere else... @@ -3416,17 +4442,6 @@ subroutine atm_recover_large_step_variables_work(nCells, nEdges, nCellsSolve, nE end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_recover_large_step_variables [ACC_data_xfer]') - !$acc exit data delete(rho_p_save,rho_pp,rho_base,rw_save,rw_p, & - !$acc rtheta_p_save,rtheta_pp,rtheta_base, & - !$acc ru_save,ru_p) & - !$acc copyout(rho_zz,rho_p,rw,w,rtheta_p,theta_m, & - !$acc ru,u,wwAvg,ruAvg) - if (rk_step == 3) then - !$acc exit data delete(rt_diabatic_tend,exner_base) & - !$acc copyout(exner,pressure_p) - end if - MPAS_ACC_TIMER_STOP('atm_recover_large_step_variables [ACC_data_xfer]') end subroutine atm_recover_large_step_variables_work @@ -3661,10 +4676,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & weight_time_old = 1. - weight_time_new - MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') - !$acc enter data copyin(uhAvg, scalar_new) - MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') - !$acc parallel async !$acc loop gang worker private(scalar_weight2, ica) do iEdge=edgeStart,edgeEnd @@ -3759,12 +4770,6 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & ! MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') -#ifndef DO_PHYSICS - !$acc enter data create(scalar_tend_save) -#else - !$acc enter data copyin(scalar_tend_save) -#endif - !$acc enter data copyin(scalar_old, fnm, fnp, rdnw, wwAvg, rho_zz_old, rho_zz_new) !$acc enter data create(scalar_tend_column) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') @@ -3847,9 +4852,7 @@ subroutine atm_advance_scalars_work(nCells, num_scalars, dt, & !$acc end parallel MPAS_ACC_TIMER_START('atm_advance_scalars [ACC_data_xfer]') - !$acc exit data copyout(scalar_new) - !$acc exit data delete(scalar_tend_column, uhAvg, wwAvg, scalar_old, fnm, fnp, & - !$acc rdnw, rho_zz_old, rho_zz_new, scalar_tend_save) + !$acc exit data delete(scalar_tend_column) MPAS_ACC_TIMER_STOP('atm_advance_scalars [ACC_data_xfer]') end subroutine atm_advance_scalars_work @@ -3870,7 +4873,7 @@ subroutine atm_advance_scalars_mono(field_name, block, tend, state, diag, mesh, cellStart, cellEnd, edgeStart, edgeEnd, & cellSolveStart, cellSolveEnd, & scalar_old, scalar_new, s_max, s_min, wdtn, flux_arr, & - flux_upwind_tmp, flux_tmp, exchange_halo_group, advance_density, rho_zz_int) + flux_upwind_tmp, flux_tmp, config_gpu_aware_mpi, exchange_halo_group, advance_density, rho_zz_int) implicit none @@ -3891,6 +4894,7 @@ subroutine atm_advance_scalars_mono(field_name, block, tend, state, diag, mesh, real (kind=RKIND), dimension(:,:), intent(inout) :: wdtn real (kind=RKIND), dimension(:,:), intent(inout) :: flux_arr real (kind=RKIND), dimension(:,:), intent(inout) :: flux_upwind_tmp, flux_tmp + logical, intent(in) :: config_gpu_aware_mpi procedure (halo_exchange_routine) :: exchange_halo_group logical, intent(in), optional :: advance_density real (kind=RKIND), dimension(:,:), intent(inout), optional :: rho_zz_int @@ -3969,7 +4973,7 @@ subroutine atm_advance_scalars_mono(field_name, block, tend, state, diag, mesh, edgesOnCell, edgesOnCell_sign, nEdgesOnCell, fnm, fnp, rdnw, nAdvCellsForEdge, & advCellsForEdge, adv_coefs, adv_coefs_3rd, scalar_old, scalar_new, s_max, s_min, & wdtn, scale_arr, flux_arr, flux_upwind_tmp, flux_tmp, & - bdyMaskCell, bdyMaskEdge, & + bdyMaskCell, bdyMaskEdge, config_gpu_aware_mpi, & exchange_halo_group, advance_density, rho_zz_int) call mpas_deallocate_scratch_field(scale) @@ -4017,7 +5021,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge edgesOnCell, edgesOnCell_sign, nEdgesOnCell, fnm, fnp, rdnw, nAdvCellsForEdge, & advCellsForEdge, adv_coefs, adv_coefs_3rd, scalar_old, scalar_new, s_max, s_min, & wdtn, scale_arr, flux_arr, flux_upwind_tmp, flux_tmp, & - bdyMaskCell, bdyMaskEdge, & + bdyMaskCell, bdyMaskEdge, config_gpu_aware_mpi, & exchange_halo_group, advance_density, rho_zz_int) use mpas_atm_dimensions, only : nVertLevels @@ -4033,6 +5037,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge real (kind=RKIND), intent(in) :: dt integer, intent(in) :: cellStart, cellEnd, edgeStart, edgeEnd integer, intent(in) :: cellSolveStart, cellSolveEnd + logical, intent(in) :: config_gpu_aware_mpi procedure (halo_exchange_routine) :: exchange_halo_group logical, intent(in), optional :: advance_density real (kind=RKIND), dimension(:,:), intent(inout), optional :: rho_zz_int @@ -4107,22 +5112,6 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge ! The transport will maintain this positive definite solution and optionally, shape preservation (monotonicity). - MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc data present(nEdgesOnCell, edgesOnCell, edgesOnCell_sign, & - !$acc invAreaCell, cellsOnCell, cellsOnEdge, nAdvCellsForEdge, & - !$acc advCellsForEdge, adv_coefs, adv_coefs_3rd, dvEdge, bdyMaskCell) - -#ifdef DO_PHYSICS - !$acc enter data copyin(scalar_tend) -#else - !$acc enter data create(scalar_tend) -#endif - if (local_advance_density) then - !$acc enter data copyin(rho_zz_int) - end if - !$acc enter data copyin(scalars_old, rho_zz_old, rdnw, uhAvg, wwAvg) - MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc parallel !$acc loop gang worker @@ -4145,19 +5134,17 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge !$acc end parallel MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc exit data copyout(scalar_tend) - - !$acc update self(scalars_old) + !$acc update self(scalars_old) if (.not. config_gpu_aware_mpi) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') !$OMP BARRIER !$OMP MASTER - call exchange_halo_group(block % domain, 'dynamics:'//trim(field_name)//'_old') + call exchange_halo_group(block % domain, 'dynamics:'//trim(field_name)//'_old', config_gpu_aware_mpi) !$OMP END MASTER !$OMP BARRIER MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc update device(scalars_old) + !$acc update device(scalars_old) if (.not. config_gpu_aware_mpi) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') ! @@ -4209,13 +5196,7 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge end if - MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - if (.not. local_advance_density) then - !$acc enter data copyin(rho_zz_new) - end if - !$acc enter data copyin(scalars_new, fnm, fnp) !$acc enter data create(scale_arr) - MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') do iScalar = 1, num_scalars @@ -4560,17 +5541,17 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge ! MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc update self(scale_arr) + !$acc update self(scale_arr) if (.not. config_gpu_aware_mpi) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') !$OMP BARRIER !$OMP MASTER - call exchange_halo_group(block % domain, 'dynamics:scale') + call exchange_halo_group(block % domain, 'dynamics:scale', config_gpu_aware_mpi) !$OMP END MASTER !$OMP BARRIER MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - !$acc update device(scale_arr) + !$acc update device(scale_arr) if (.not. config_gpu_aware_mpi) MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') !$acc parallel @@ -4718,23 +5699,12 @@ subroutine atm_advance_scalars_mono_work(field_name, block, state, nCells, nEdge end do ! loop over scalars - MPAS_ACC_TIMER_START('atm_advance_scalars_mono [ACC_data_xfer]') - if (local_advance_density) then - !$acc exit data copyout(rho_zz_int) - else - !$acc exit data delete(rho_zz_new) - end if - !$acc exit data copyout(scalars_new) - !$acc exit data delete(scalars_old, scale_arr, rho_zz_old, wwAvg, & - !$acc uhAvg, fnm, fnp, rdnw) - - !$acc end data - MPAS_ACC_TIMER_STOP('atm_advance_scalars_mono [ACC_data_xfer]') + !$acc exit data delete(scale_arr) end subroutine atm_advance_scalars_mono_work - subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, nVertLevels, rk_step, dt, & + subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, diag_physics, configs, nVertLevels, rk_step, dynamics_substep, dt, & cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, & cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd) !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -4750,6 +5720,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, ! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + use mpas_atm_dissipation_models, only : les_model_from_string, les_surface_from_string + implicit none ! @@ -4760,9 +5732,10 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, type (mpas_pool_type), intent(in) :: state type (mpas_pool_type), intent(in) :: diag type (mpas_pool_type), intent(in) :: mesh + type (mpas_pool_type), intent(in) :: diag_physics type (mpas_pool_type), intent(in) :: configs integer, intent(in) :: nVertLevels ! for allocating stack variables - integer, intent(in) :: rk_step + integer, intent(in) :: rk_step, dynamics_substep real (kind=RKIND), intent(in) :: dt integer, intent(in) :: cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd integer, intent(in) :: cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd @@ -4778,7 +5751,9 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, & divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, & rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & - h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save + h_divergence, bn2, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save + + real (kind=RKIND), dimension(:,:), pointer :: eddy_visc_horz, eddy_visc_vert real (kind=RKIND), dimension(:,:), pointer :: theta_m_save @@ -4788,7 +5763,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, real (kind=RKIND), dimension(:,:), pointer :: rthdynten - real (kind=RKIND), dimension(:,:,:), pointer :: scalars + real (kind=RKIND), dimension(:,:,:), pointer :: scalars, tend_scalars real (kind=RKIND), dimension(:,:), pointer :: tend_u_euler, tend_w_euler, tend_theta_euler @@ -4803,6 +5778,7 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, real (kind=RKIND), dimension(:), pointer :: rdzu, rdzw, fzm, fzp, qv_init real (kind=RKIND), dimension(:,:), pointer :: t_init + real (kind=RKIND), dimension(:), pointer:: ustm, hfx, qfx real (kind=RKIND), pointer :: cf1, cf2, cf3 @@ -4810,12 +5786,23 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, real (kind=RKIND), dimension(:,:), pointer :: ur_cell, vr_cell real (kind=RKIND), dimension(:,:), pointer :: defc_a, defc_b + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2, deformation_coef_s2, deformation_coef_cs + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c, deformation_coef_s + real (kind=RKIND), dimension(:,:), pointer :: prandtl_3d_inv + real(kind=RKIND), dimension(:,:), pointer :: tend_w_pgf, tend_w_buoy real (kind=RKIND), pointer :: coef_3rd_order, c_s logical, pointer :: config_mix_full + logical, pointer :: config_mix_scalars character (len=StrKIND), pointer :: config_horiz_mixing + character (len=StrKIND), pointer :: config_les_model + character (len=StrKIND), pointer :: config_les_surface + integer :: les_model_opt, les_surface_opt + real (kind=RKIND), pointer :: config_surface_heat_flux + real (kind=RKIND), pointer :: config_surface_moisture_flux + real (kind=RKIND), pointer :: config_surface_drag_coefficient real (kind=RKIND), pointer :: config_del4u_div_factor real (kind=RKIND), pointer :: config_h_theta_eddy_visc4 real (kind=RKIND), pointer :: config_h_mom_eddy_visc4 @@ -4828,12 +5815,22 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, logical, pointer :: config_rayleigh_damp_u real (kind=RKIND), pointer :: config_rayleigh_damp_u_timescale_days integer, pointer :: config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels + integer, pointer :: index_qv, index_qc, index_tke + + logical :: inactive_rthdynten + logical :: nopbl call mpas_pool_get_config(mesh, 'sphere_radius', r_earth) call mpas_pool_get_config(configs, 'config_coef_3rd_order', coef_3rd_order) call mpas_pool_get_config(configs, 'config_mix_full', config_mix_full) + call mpas_pool_get_config(configs, 'config_mix_scalars', config_mix_scalars) call mpas_pool_get_config(configs, 'config_horiz_mixing', config_horiz_mixing) + call mpas_pool_get_config(configs, 'config_les_model', config_les_model) + call mpas_pool_get_config(configs, 'config_les_surface', config_les_surface) + call mpas_pool_get_config(configs, 'config_surface_heat_flux', config_surface_heat_flux) + call mpas_pool_get_config(configs, 'config_surface_moisture_flux', config_surface_moisture_flux) + call mpas_pool_get_config(configs, 'config_surface_drag_coefficient', config_surface_drag_coefficient) call mpas_pool_get_config(configs, 'config_del4u_div_factor', config_del4u_div_factor) call mpas_pool_get_config(configs, 'config_h_theta_eddy_visc4', config_h_theta_eddy_visc4) call mpas_pool_get_config(configs, 'config_h_mom_eddy_visc4', config_h_mom_eddy_visc4) @@ -4864,7 +5861,9 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, call mpas_pool_get_array(diag, 'rho_p', rr) call mpas_pool_get_array(diag, 'rho_p_save', rr_save) call mpas_pool_get_array(diag, 'v', v) - call mpas_pool_get_array(diag, 'kdiff', kdiff) + call mpas_pool_get_array(diag, 'eddy_visc_horz', eddy_visc_horz) + call mpas_pool_get_array(diag, 'eddy_visc_vert', eddy_visc_vert) + call mpas_pool_get_array(diag, 'bn2', bn2) call mpas_pool_get_array(diag, 'ru', ru) call mpas_pool_get_array(diag, 'ru_save', ru_save) call mpas_pool_get_array(diag, 'rw', rw) @@ -4877,9 +5876,29 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, call mpas_pool_get_array(diag, 'pressure_base', pressure_b) call mpas_pool_get_array(diag, 'h_divergence', h_divergence) call mpas_pool_get_array(diag, 'exner', exner) + call mpas_pool_get_array(diag, 'prandtl_3d_inv', prandtl_3d_inv) + call mpas_pool_get_array(tend_physics, 'rthdynten', rthdynten) + nullify(ustm) + call mpas_pool_get_array(diag_physics,'ustm',ustm) + nullify(hfx) + call mpas_pool_get_array(diag_physics,'hfx',hfx) + nullify(qfx) + call mpas_pool_get_array(diag_physics,'qfx',qfx) + + nopbl = .false. + if (.not. associated(ustm) & + .or. .not. associated(hfx) & + .or. .not. associated(qfx)) then + + allocate(ustm(1)) + allocate(hfx(1)) + allocate(qfx(1)) + nopbl = .true. + end if + call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) call mpas_pool_get_array(mesh, 'cellsOnCell', cellsOnCell) @@ -4905,6 +5924,11 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, call mpas_pool_get_array(mesh, 'angleEdge', angleEdge) call mpas_pool_get_array(mesh, 'defc_a', defc_a) call mpas_pool_get_array(mesh, 'defc_b', defc_b) + call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2) + call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2) + call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs) + call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c) + call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s) call mpas_pool_get_array(mesh, 'meshScalingDel2', meshScalingDel2) call mpas_pool_get_array(mesh, 'meshScalingDel4', meshScalingDel4) call mpas_pool_get_array(mesh, 'u_init', u_init) @@ -4928,6 +5952,8 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, call mpas_pool_get_array(tend, 'w_euler', tend_w_euler) call mpas_pool_get_array(tend, 'w_pgf', tend_w_pgf) call mpas_pool_get_array(tend, 'w_buoy', tend_w_buoy) + call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars) + call mpas_pool_get_array(diag, 'cqw', cqw) call mpas_pool_get_array(diag, 'cqu', cqu) @@ -4944,6 +5970,9 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, call mpas_pool_get_dimension(state, 'num_scalars', num_scalars) call mpas_pool_get_dimension(state, 'moist_start', moist_start) call mpas_pool_get_dimension(state, 'moist_end', moist_end) + call mpas_pool_get_dimension(state, 'index_qv', index_qv) + call mpas_pool_get_dimension(state, 'index_qc', index_qc) + call mpas_pool_get_dimension(state, 'index_tke', index_tke) call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) call mpas_pool_get_array(mesh, 'nAdvCellsForEdge', nAdvCellsForEdge) @@ -4955,53 +5984,73 @@ subroutine atm_compute_dyn_tend(tend, tend_physics, state, diag, mesh, configs, call mpas_pool_get_array(mesh, 'cf2', cf2) call mpas_pool_get_array(mesh, 'cf3', cf3) + les_model_opt = les_model_from_string(config_les_model) + les_surface_opt = les_surface_from_string(config_les_surface) + call atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels, & - nCellsSolve, nEdgesSolve, vertexDegree, maxEdges, maxEdges2, num_scalars, moist_start, moist_end, & + nCellsSolve, nEdgesSolve, vertexDegree, maxEdges, maxEdges2, num_scalars, index_qv, index_qc, moist_start, moist_end, & + tend_scalars, & fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, meshScalingDel2, meshScalingDel4, & weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, & divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, & rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & - h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, & + h_divergence, bn2, eddy_visc_horz, eddy_visc_vert, index_tke, & + edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, & theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, & cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, & latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, & rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, & - tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_del4u_div_factor, & + deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, & + tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_mix_scalars, config_horiz_mixing, les_model_opt, & + les_surface_opt, prandtl_3d_inv, config_del4u_div_factor, & config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, & - config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, & + config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dynamics_substep, dt, & config_mpas_cam_coef, & config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, & config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, & - rthdynten, & + config_surface_heat_flux, config_surface_moisture_flux, config_surface_drag_coefficient, & + rthdynten, ustm, hfx, qfx, & cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, & cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd) + if (nopbl) then + deallocate(ustm) + deallocate(hfx) + deallocate(qfx) + end if + end subroutine atm_compute_dyn_tend subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dummy, & - nCellsSolve, nEdgesSolve, vertexDegree, maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, moist_start, moist_end, & + nCellsSolve, nEdgesSolve, vertexDegree, maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, index_qc, moist_start, moist_end, & + tend_scalars, & fEdge, dvEdge, dcEdge, invDcEdge, invDvEdge, invAreaCell, invAreaTriangle, meshScalingDel2, meshScalingDel4, & weightsOnEdge, zgrid, rho_edge, rho_zz, ru, u, v, tend_u, & divergence, vorticity, ke, pv_edge, theta_m, rw, tend_rho, & rt_diabatic_tend, tend_theta, tend_w, w, cqw, rb, rr, pp, pressure_b, zz, zxu, cqu, & - h_divergence, kdiff, edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, & + h_divergence, bn2, eddy_visc_horz, eddy_visc_vert, index_tke, & + edgesOnCell_sign, edgesOnVertex_sign, rw_save, ru_save, & theta_m_save, exner, rr_save, scalars, tend_u_euler, tend_w_euler, tend_theta_euler, deriv_two, & cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnEdge, cellsOnCell, edgesOnVertex, nEdgesOnCell, nEdgesOnEdge, & latCell, latEdge, angleEdge, u_init, v_init, advCellsForEdge, nAdvCellsForEdge, adv_coefs, adv_coefs_3rd, & rdzu, rdzw, fzm, fzp, qv_init, t_init, cf1, cf2, cf3, r_earth, ur_cell, vr_cell, defc_a, defc_b, & - tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_horiz_mixing, config_del4u_div_factor, & + deformation_coef_c2,deformation_coef_s2,deformation_coef_cs,deformation_coef_c,deformation_coef_s, & + tend_w_pgf, tend_w_buoy, coef_3rd_order, c_s, config_mix_full, config_mix_scalars, config_horiz_mixing, les_model_opt, & + les_surface_opt, prandtl_3d_inv, config_del4u_div_factor, & config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2, & - config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dt, & + config_h_theta_eddy_visc4, config_h_mom_eddy_visc4, config_visc4_2dsmag, config_len_disp, rk_step, dynamics_substep, dt, & config_mpas_cam_coef, & config_rayleigh_damp_u, config_rayleigh_damp_u_timescale_days, & config_number_rayleigh_damp_u_levels, config_number_cam_damping_levels, & - rthdynten, & + config_surface_heat_flux, config_surface_moisture_flux, config_surface_drag_coefficient, & + rthdynten, ustm, hfx, qfx, & cellStart, cellEnd, vertexStart, vertexEnd, edgeStart, edgeEnd, & cellSolveStart, cellSolveEnd, vertexSolveStart, vertexSolveEnd, edgeSolveStart, edgeSolveEnd) use mpas_atm_dimensions + use mpas_atm_dissipation_models, only : LES_MODEL_NONE implicit none @@ -5011,7 +6060,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm ! Dummy arguments ! integer :: nCells, nEdges, nVertices, nVertLevels_dummy, nCellsSolve, nEdgesSolve, vertexDegree, & - maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, moist_start, moist_end + maxEdges_dummy, maxEdges2_dummy, num_scalars_dummy, index_qv, index_qc, moist_start, moist_end, index_tke real (kind=RKIND), dimension(nEdges+1) :: fEdge real (kind=RKIND), dimension(nEdges+1) :: dvEdge @@ -5050,7 +6099,11 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: zxu real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: cqu real (kind=RKIND), dimension(nVertLevels,nCells+1) :: h_divergence - real (kind=RKIND), dimension(nVertLevels,nCells+1) :: kdiff + real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_horz + real (kind=RKIND), dimension(nVertLevels,nCells+1) :: eddy_visc_vert + real (kind=RKIND), dimension(nVertLevels,nCells+1) :: bn2 + real (kind=RKIND), dimension(:) :: ustm, hfx, qfx + real (kind=RKIND), dimension(nVertLevels,nCells+1) :: prandtl_3d_inv real (kind=RKIND), dimension(maxEdges,nCells+1) :: edgesOnCell_sign real (kind=RKIND), dimension(vertexDegree,nVertices+1) :: edgesOnVertex_sign real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: rw_save @@ -5060,6 +6113,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm real (kind=RKIND), dimension(nVertLevels,nCells+1) :: exner real (kind=RKIND), dimension(nVertLevels,nCells+1) :: rr_save real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1) :: scalars + real (kind=RKIND), dimension(num_scalars,nVertLevels,nCells+1) :: tend_scalars real (kind=RKIND), dimension(nVertLevels,nEdges+1) :: tend_u_euler real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: tend_w_euler real (kind=RKIND), dimension(nVertLevels,nCells+1) :: tend_theta_euler @@ -5098,13 +6152,20 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm real (kind=RKIND), dimension(maxEdges,nCells+1) :: defc_a real (kind=RKIND), dimension(maxEdges,nCells+1) :: defc_b + real (kind=RKIND), dimension(maxEdges,nCells+1) :: deformation_coef_c2, deformation_coef_s2, deformation_coef_cs + real (kind=RKIND), dimension(maxEdges,nCells+1) :: deformation_coef_c, deformation_coef_s real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: tend_w_pgf real (kind=RKIND), dimension(nVertLevels+1,nCells+1) :: tend_w_buoy real (kind=RKIND) :: coef_3rd_order, c_s - logical :: config_mix_full + logical :: config_mix_full, config_mix_scalars character (len=StrKIND) :: config_horiz_mixing + integer, intent(in) :: les_model_opt + integer, intent(in) :: les_surface_opt + real (kind=RKIND) :: config_surface_heat_flux + real (kind=RKIND) :: config_surface_moisture_flux + real (kind=RKIND) :: config_surface_drag_coefficient real (kind=RKIND) :: config_del4u_div_factor real (kind=RKIND) :: config_h_theta_eddy_visc4 real (kind=RKIND) :: config_h_mom_eddy_visc4 @@ -5112,7 +6173,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm real (kind=RKIND) :: config_len_disp real (kind=RKIND) :: config_h_mom_eddy_visc2, config_v_mom_eddy_visc2, config_h_theta_eddy_visc2, config_v_theta_eddy_visc2 - integer, intent(in) :: rk_step + integer, intent(in) :: rk_step, dynamics_substep real (kind=RKIND), intent(in) :: dt real (kind=RKIND) :: config_mpas_cam_coef @@ -5136,6 +6197,10 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm real (kind=RKIND), dimension( nVertLevels+1 ) :: d_diag, d_off_diag, flux_arr real (kind=RKIND), dimension( nVertLevels + 1 ) :: wduz, wdwz, wdtz, dpzx real (kind=RKIND), dimension( nVertLevels ) :: ru_edge_w, q, u_mix + +! real (kind=RKIND), dimension( nVertLevels+1 ) :: d_11, d_22, d_12 +! real (kind=RKIND), dimension( nVertLevels+1 ) :: dudx, dudy, dvdx, dvdy + real (kind=RKIND) :: theta_turb_flux, w_turb_flux, r real (kind=RKIND) :: scalar_weight real (kind=RKIND) :: inv_r_earth @@ -5153,6 +6218,9 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm real (kind=RKIND) :: flux3, flux4 real (kind=RKIND) :: q_im2, q_im1, q_i, q_ip1, ua, coef3 + logical, parameter :: perturbation_coriolis = .true. + real (kind=RKIND) :: reference_u + flux4(q_im2, q_im1, q_i, q_ip1, ua) = & ua*( 7.*(q_i + q_im1) - (q_ip1 + q_im2) )/12.0 @@ -5162,43 +6230,14 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') - if (rk_step == 1) then - !$acc enter data create(tend_w_euler) - !$acc enter data create(tend_u_euler) - !$acc enter data create(tend_theta_euler) - !$acc enter data create(tend_rho) - - !$acc enter data create(kdiff) - !$acc enter data copyin(tend_rho_physics) - !$acc enter data copyin(rb, rr_save) - !$acc enter data copyin(divergence, vorticity) - !$acc enter data copyin(v) - !$acc enter data copyin(u_init, v_init) - else - !$acc enter data copyin(tend_w_euler) - !$acc enter data copyin(tend_u_euler) - !$acc enter data copyin(tend_theta_euler) - !$acc enter data copyin(tend_rho) + if (les_model_opt /= LES_MODEL_NONE) then + !$acc enter data copyin(bn2) end if - !$acc enter data create(tend_u) - !$acc enter data copyin(cqu, pp, u, w, pv_edge, rho_edge, ke) - !$acc enter data create(h_divergence) - !$acc enter data copyin(ru, rw) + !$acc enter data copyin(ustm, hfx, qfx) !$acc enter data create(rayleigh_damp_coef) - !$acc enter data copyin(tend_ru_physics) - !$acc enter data create(tend_w) - !$acc enter data copyin(rho_zz) - !$acc enter data create(tend_theta) - !$acc enter data copyin(theta_m) - !$acc enter data copyin(ru_save, theta_m_save) - !$acc enter data copyin(cqw) - !$acc enter data copyin(tend_rtheta_physics) - !$acc enter data copyin(rw_save, rt_diabatic_tend) - !$acc enter data create(rthdynten) - !$acc enter data copyin(t_init) -#ifdef CURVATURE - !$acc enter data copyin(ur_cell, vr_cell) -#endif + !$acc enter data create(eddy_visc_horz) + !$acc enter data create(eddy_visc_vert) + !$acc enter data create(prandtl_3d_inv) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') prandtl_inv = 1.0_RKIND / prandtl @@ -5221,57 +6260,51 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc end parallel ! Smagorinsky eddy viscosity, based on horizontal deformation (in this case on model coordinate surfaces). - ! The integration coefficients were precomputed and stored in defc_a and defc_b + ! The integration coefficients were precomputed and stored in deformation_coef_* - if(config_horiz_mixing == "2d_smagorinsky") then + if(les_model_opt == LES_MODEL_NONE) then - !$acc parallel default(present) - !$acc loop gang worker private(d_diag, d_off_diag) - do iCell = cellStart,cellEnd + if(config_horiz_mixing == "2d_smagorinsky") then - !$acc loop vector - do k = 1, nVertLevels - d_diag(k) = 0.0_RKIND - d_off_diag(k) = 0.0_RKIND - end do + call smagorinsky_2d( eddy_visc_horz, u, v, c_s, config_len_disp, & + deformation_coef_c2, deformation_coef_s2, deformation_coef_cs, & + invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4, & + cellStart, cellEnd, nEdgesOnCell, edgesOnCell, & + nCells, nEdges ) - !$acc loop seq - do iEdge=1,nEdgesOnCell(iCell) + else if(config_horiz_mixing == "2d_fixed") then + + !$acc parallel default(present) + !$acc loop gang worker + do iCell = cellStart, cellEnd !$acc loop vector - do k=1,nVertLevels - d_diag(k) = d_diag(k) + defc_a(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) & - - defc_b(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell)) - d_off_diag(k) = d_off_diag(k) + defc_b(iEdge,iCell)*u(k,EdgesOnCell(iEdge,iCell)) & - + defc_a(iEdge,iCell)*v(k,EdgesOnCell(iEdge,iCell)) + do k = 1, nVertLevels + eddy_visc_horz(k,iCell) = config_h_theta_eddy_visc2 end do end do -!DIR$ IVDEP - !$acc loop vector - do k=1, nVertLevels - ! here is the Smagorinsky formulation, - ! followed by imposition of an upper bound on the eddy viscosity - kdiff(k,iCell) = min((c_s * config_len_disp)**2 * sqrt(d_diag(k)**2 + d_off_diag(k)**2),(0.01*config_len_disp**2) * invDt) - end do - end do - !$acc end parallel + !$acc end parallel - h_mom_eddy_visc4 = config_visc4_2dsmag * config_len_disp**3 - h_theta_eddy_visc4 = h_mom_eddy_visc4 + h_mom_eddy_visc4 = config_h_mom_eddy_visc4 + h_theta_eddy_visc4 = config_h_theta_eddy_visc4 - else if(config_horiz_mixing == "2d_fixed") then + end if - !$acc parallel default(present) - !$acc loop gang worker - do iCell = cellStart, cellEnd - !$acc loop vector - do k = 1, nVertLevels - kdiff(k,iCell) = config_h_theta_eddy_visc2 - end do - end do - !$acc end parallel + else if (les_model_opt /= LES_MODEL_NONE) then - h_mom_eddy_visc4 = config_h_mom_eddy_visc4 - h_theta_eddy_visc4 = config_h_theta_eddy_visc4 + ! call mpas_log_write(' BV call, index qv, qc, tke $i $i $i ', intArgs=(/index_qv, index_qc, index_tke/)) + + call calculate_n2( bn2, theta_m, exner, pressure_b, pp, zgrid, scalars, index_qv, index_qc, qtot, & + cellStart, cellEnd, nCells) + + call les_models( les_model_opt, les_surface_opt, dynamics_substep, eddy_visc_horz, eddy_visc_vert, & + u, v, ur_cell, vr_cell, & + w, c_s, bn2, zgrid, config_len_disp, & + deformation_coef_c2, deformation_coef_s2, deformation_coef_cs, & + deformation_coef_c, deformation_coef_s, prandtl_3d_inv, & + invDt, h_mom_eddy_visc4, config_visc4_2dsmag, h_theta_eddy_visc4, & + scalars, tend_scalars, index_tke, rho_zz, meshScalingDel2, & + cellStart, cellEnd, nEdgesOnCell, edgesOnCell, cellsOnEdge, & + nCells, nEdges, nVertLevels, maxEdges, num_scalars ) end if @@ -5288,7 +6321,7 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm do k = nVertLevels-config_number_cam_damping_levels + 1, nVertLevels visc2cam = 4.0*2.0833*config_len_disp*config_mpas_cam_coef visc2cam = visc2cam*(1.0-real(nVertLevels-k)/real(config_number_cam_damping_levels)) - kdiff(k ,iCell) = max(kdiff(k ,iCell),visc2cam) + eddy_visc_horz(k,iCell) = max(eddy_visc_horz(k,iCell),visc2cam) end do end do !$acc end parallel @@ -5298,8 +6331,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end if ! tendency for density. - ! accumulate total water here for later use in w tendency calculation. - ! accumulate horizontal mass-flux !$acc parallel default(present) @@ -5381,281 +6412,112 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm !$acc loop vector do k=1,nVertLevels tend_u_euler(k,iEdge) = - cqu(k,iEdge)*( (pp(k,cell2)-pp(k,cell1))*invDcEdge(iEdge)/(.5*(zz(k,cell2)+zz(k,cell1))) & - -0.5*zxu(k,iEdge)*(dpdz(k,cell1)+dpdz(k,cell2)) ) - end do - - end if - - ! vertical transport of u - - wduz(1) = 0. - - k = 2 - wduz(k) = 0.5*( rw(k,cell1)+rw(k,cell2))*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge)) - !$acc loop vector - do k=3,nVertLevels-1 - wduz(k) = flux3( u(k-2,iEdge),u(k-1,iEdge),u(k,iEdge),u(k+1,iEdge),0.5*(rw(k,cell1)+rw(k,cell2)), 1.0_RKIND ) - end do - k = nVertLevels - wduz(k) = 0.5*( rw(k,cell1)+rw(k,cell2))*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge)) - - wduz(nVertLevels+1) = 0. - -!DIR$ IVDEP - !$acc loop vector - do k=1,nVertLevels - tend_u(k,iEdge) = - rdzw(k)*(wduz(k+1)-wduz(k)) ! first use of tend_u - end do - - ! Next, nonlinear Coriolis term (q) following Ringler et al JCP 2009 - - !$acc loop vector - do k=1,nVertLevels - q(k) = 0.0_RKIND - end do - - !$acc loop seq - do j = 1,nEdgesOnEdge(iEdge) - eoe = edgesOnEdge(j,iEdge) - - !$acc loop vector - do k=1,nVertLevels - workpv = 0.5 * (pv_edge(k,iEdge) + pv_edge(k,eoe)) -! the original definition of pv_edge had a factor of 1/density. We have removed that factor -! given that it was not integral to any conservation property of the system - q(k) = q(k) + weightsOnEdge(j,iEdge) * u(k,eoe) * workpv - end do - end do - -!DIR$ IVDEP - !$acc loop vector - do k=1,nVertLevels - - ! horizontal ke gradient and vorticity terms in the vector invariant formulation - ! of the horizontal momentum equation - tend_u(k,iEdge) = tend_u(k,iEdge) + rho_edge(k,iEdge)* (q(k) - (ke(k,cell2) - ke(k,cell1)) & - * invDcEdge(iEdge)) & - - u(k,iEdge)*0.5*(h_divergence(k,cell1)+h_divergence(k,cell2)) -#ifdef CURVATURE - ! curvature terms for the sphere - tend_u(k,iEdge) = tend_u(k,iEdge) & - - 2.*omega*cos(angleEdge(iEdge))*cos(latEdge(iEdge)) & - *rho_edge(k,iEdge)*.25*(w(k,cell1)+w(k+1,cell1)+w(k,cell2)+w(k+1,cell2)) & - - u(k,iEdge)*.25*(w(k+1,cell1)+w(k,cell1)+w(k,cell2)+w(k+1,cell2)) & - *rho_edge(k,iEdge) * inv_r_earth -#endif - end do - - end do - !$acc end parallel - - ! - ! horizontal mixing for u - ! mixing terms are integrated using forward-Euler, so this tendency is only computed in the - ! first Runge-Kutta substep and saved for use in later RK substeps 2 and 3. - ! - - if (rk_step == 1) then - -!$OMP BARRIER - - ! del^4 horizontal filter. We compute this as del^2 ( del^2 (u) ). - ! First, storage to hold the result from the first del^2 computation. - - !$acc parallel default(present) - !$acc loop gang worker - do iEdge = edgeStart, edgeEnd - !$acc loop vector - do k = 1, nVertLevels - delsq_u(k,iEdge) = 0.0_RKIND - end do - end do - !$acc end parallel - - !$acc parallel default(present) - !$acc loop gang worker - do iEdge=edgeStart,edgeEnd - cell1 = cellsOnEdge(1,iEdge) - cell2 = cellsOnEdge(2,iEdge) - vertex1 = verticesOnEdge(1,iEdge) - vertex2 = verticesOnEdge(2,iEdge) - r_dc = invDcEdge(iEdge) - r_dv = min(invDvEdge(iEdge), 4*invDcEdge(iEdge)) - -!DIR$ IVDEP - !$acc loop vector - do k=1,nVertLevels - - ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity - ! only valid for h_mom_eddy_visc4 == constant - u_diffusion = ( divergence(k,cell2) - divergence(k,cell1) ) * r_dc & - -( vorticity(k,vertex2) - vorticity(k,vertex1) ) * r_dv - - delsq_u(k,iEdge) = delsq_u(k,iEdge) + u_diffusion - - kdiffu = 0.5*(kdiff(k,cell1)+kdiff(k,cell2)) - - ! include 2nd-orer diffusion here - tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) & - + rho_edge(k,iEdge)* kdiffu * u_diffusion * meshScalingDel2(iEdge) - - end do - end do - !$acc end parallel - - if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active - -!$OMP BARRIER - - !$acc parallel default(present) - !$acc loop gang worker - do iVertex=vertexStart,vertexEnd - - !$acc loop vector - do k=1,nVertLevels - delsq_vorticity(k,iVertex) = 0.0_RKIND - end do - - !$acc loop seq - do i=1,vertexDegree - iEdge = edgesOnVertex(i,iVertex) - edge_sign = invAreaTriangle(iVertex) * dcEdge(iEdge) * edgesOnVertex_sign(i,iVertex) - - !$acc loop vector - do k=1,nVertLevels - delsq_vorticity(k,iVertex) = delsq_vorticity(k,iVertex) + edge_sign * delsq_u(k,iEdge) - end do - end do - end do - - !$acc loop gang worker - do iCell=cellStart,cellEnd - - !$acc loop vector - do k=1,nVertLevels - delsq_divergence(k,iCell) = 0.0_RKIND - end do - - r = invAreaCell(iCell) - - !$acc loop seq - do i=1,nEdgesOnCell(iCell) - iEdge = edgesOnCell(i,iCell) - edge_sign = r * dvEdge(iEdge) * edgesOnCell_sign(i,iCell) - - !$acc loop vector - do k=1,nVertLevels - delsq_divergence(k,iCell) = delsq_divergence(k,iCell) + edge_sign * delsq_u(k,iEdge) - end do - end do - end do - !$acc end parallel - -!$OMP BARRIER - - !$acc parallel default(present) - !$acc loop gang worker - do iEdge=edgeSolveStart,edgeSolveEnd - cell1 = cellsOnEdge(1,iEdge) - cell2 = cellsOnEdge(2,iEdge) - vertex1 = verticesOnEdge(1,iEdge) - vertex2 = verticesOnEdge(2,iEdge) - - u_mix_scale = meshScalingDel4(iEdge)*h_mom_eddy_visc4 - r_dc = u_mix_scale * config_del4u_div_factor * invDcEdge(iEdge) - r_dv = u_mix_scale * min(invDvEdge(iEdge), 4*invDcEdge(iEdge)) + -0.5*zxu(k,iEdge)*(dpdz(k,cell1)+dpdz(k,cell2)) ) + end do -!DIR$ IVDEP - !$acc loop vector - do k=1,nVertLevels + end if - ! Compute diffusion, computed as \nabla divergence - k \times \nabla vorticity - ! only valid for h_mom_eddy_visc4 == constant - ! - ! Here, we scale the diffusion on the divergence part a factor of config_del4u_div_factor - ! relative to the rotational part. The stability constraint on the divergence component is much less - ! stringent than the rotational part, and this flexibility may be useful. - ! - u_diffusion = rho_edge(k,iEdge) * ( ( delsq_divergence(k,cell2) - delsq_divergence(k,cell1) ) * r_dc & - -( delsq_vorticity(k,vertex2) - delsq_vorticity(k,vertex1) ) * r_dv ) - tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) - u_diffusion - - end do - end do - !$acc end parallel - - end if ! 4th order mixing is active + ! vertical transport of u - ! - ! vertical mixing for u - 2nd order filter in physical (z) space - ! - if ( v_mom_eddy_visc2 > 0.0 ) then + wduz(1) = 0. - if (config_mix_full) then ! mix full state + k = 2 + wduz(k) = 0.5*( rw(k,cell1)+rw(k,cell2))*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge)) + !$acc loop vector + do k=3,nVertLevels-1 + wduz(k) = flux3( u(k-2,iEdge),u(k-1,iEdge),u(k,iEdge),u(k+1,iEdge),0.5*(rw(k,cell1)+rw(k,cell2)), 1.0_RKIND ) + end do + k = nVertLevels + wduz(k) = 0.5*( rw(k,cell1)+rw(k,cell2))*(fzm(k)*u(k,iEdge)+fzp(k)*u(k-1,iEdge)) - !$acc parallel default(present) - !$acc loop gang worker - do iEdge=edgeSolveStart,edgeSolveEnd + wduz(nVertLevels+1) = 0. - cell1 = cellsOnEdge(1,iEdge) - cell2 = cellsOnEdge(2,iEdge) +!DIR$ IVDEP + !$acc loop vector + do k=1,nVertLevels + tend_u(k,iEdge) = - rdzw(k)*(wduz(k+1)-wduz(k)) ! first use of tend_u + end do - !$acc loop vector - do k=2,nVertLevels-1 + ! Next, nonlinear Coriolis term (q) following Ringler et al JCP 2009 - z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2)) - z2 = 0.5*(zgrid(k ,cell1)+zgrid(k ,cell2)) - z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2)) - z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2)) + !$acc loop vector + do k=1,nVertLevels + q(k) = 0.0_RKIND + end do - zm = 0.5*(z1+z2) - z0 = 0.5*(z2+z3) - zp = 0.5*(z3+z4) + !$acc loop seq + do j = 1,nEdgesOnEdge(iEdge) + eoe = edgesOnEdge(j,iEdge) - tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*( & - (u(k+1,iEdge)-u(k ,iEdge))/(zp-z0) & - -(u(k ,iEdge)-u(k-1,iEdge))/(z0-zm) )/(0.5*(zp-zm)) - end do - end do - !$acc end parallel + !$acc loop vector + do k=1,nVertLevels + workpv = 0.5 * (pv_edge(k,iEdge) + pv_edge(k,eoe)) +! the original definition of pv_edge had a factor of 1/density. We have removed that factor +! given that it was not integral to any conservation property of the system + q(k) = q(k) + weightsOnEdge(j,iEdge) * u(k,eoe) * workpv + end do + end do - else ! idealized cases where we mix on the perturbation from the initial 1-D state + if (perturbation_coriolis) then ! this is correct only for constant f + !$acc loop seq + do j = 1,nEdgesOnEdge(iEdge) + eoe = edgesOnEdge(j,iEdge) - !$acc parallel default(present) - !$acc loop gang worker private(u_mix) - do iEdge=edgeSolveStart,edgeSolveEnd + !$acc loop vector + do k=1,nVertLevels + reference_u = u_init(k) * cos(angleEdge(eoe)) - v_init(k) * sin(angleEdge(eoe)) + q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * fEdge(iEdge) +! q(k) = q(k) - weightsOnEdge(j,iEdge) * reference_u * 0.729210E-04 + end do + end do + end if - cell1 = cellsOnEdge(1,iEdge) - cell2 = cellsOnEdge(2,iEdge) +!DIR$ IVDEP + !$acc loop vector + do k=1,nVertLevels - !$acc loop vector - do k=1,nVertLevels - u_mix(k) = u(k,iEdge) - u_init(k) * cos( angleEdge(iEdge) ) & - - v_init(k) * sin( angleEdge(iEdge) ) - end do + ! horizontal ke gradient and vorticity terms in the vector invariant formulation + ! of the horizontal momentum equation + tend_u(k,iEdge) = tend_u(k,iEdge) + rho_edge(k,iEdge)* (q(k) - (ke(k,cell2) - ke(k,cell1)) & + * invDcEdge(iEdge)) & + - u(k,iEdge)*0.5*(h_divergence(k,cell1)+h_divergence(k,cell2)) +#ifdef CURVATURE + ! curvature terms for the sphere + tend_u(k,iEdge) = tend_u(k,iEdge) & + - 2.*omega*cos(angleEdge(iEdge))*cos(latEdge(iEdge)) & + *rho_edge(k,iEdge)*.25*(w(k,cell1)+w(k+1,cell1)+w(k,cell2)+w(k+1,cell2)) & + - u(k,iEdge)*.25*(w(k+1,cell1)+w(k,cell1)+w(k,cell2)+w(k+1,cell2)) & + *rho_edge(k,iEdge) * inv_r_earth +#endif + end do - !$acc loop vector - do k=2,nVertLevels-1 + end do + !$acc end parallel - z1 = 0.5*(zgrid(k-1,cell1)+zgrid(k-1,cell2)) - z2 = 0.5*(zgrid(k ,cell1)+zgrid(k ,cell2)) - z3 = 0.5*(zgrid(k+1,cell1)+zgrid(k+1,cell2)) - z4 = 0.5*(zgrid(k+2,cell1)+zgrid(k+2,cell2)) + ! + ! horizontal mixing for u + ! mixing terms are integrated using forward-Euler, so this tendency is only computed in the + ! first Runge-Kutta substep and saved for use in later RK substeps 2 and 3. + ! - zm = 0.5*(z1+z2) - z0 = 0.5*(z2+z3) - zp = 0.5*(z3+z4) + if (rk_step == 1) then - tend_u_euler(k,iEdge) = tend_u_euler(k,iEdge) + rho_edge(k,iEdge) * v_mom_eddy_visc2*( & - (u_mix(k+1)-u_mix(k ))/(zp-z0) & - -(u_mix(k )-u_mix(k-1))/(z0-zm) )/(0.5*(zp-zm)) - end do - end do - !$acc end parallel +!$OMP BARRIER - end if ! mix perturbation state + call u_dissipation_3d( edgeStart, edgeEnd, edgeSolveStart, edgeSolveEnd, vertexStart, vertexEnd, & + cellStart, cellEnd, nCells, nEdges, nVertices, vertexDegree, & + cellsOnEdge, verticesOnEdge, edgesOnCell, edgesOnVertex, & + nEdgesOnCell, edgesOnCell_sign, edgesOnVertex_sign, & + invAreaCell, invAreaTriangle, invDvEdge, invDcEdge, & + angleEdge, dcEdge, dvEdge, meshScalingDel2, meshScalingDel4, & + config_mix_full, h_mom_eddy_visc4, v_mom_eddy_visc2, & + config_del4u_div_factor, zgrid, & + eddy_visc_horz, eddy_visc_vert, zz, rdzu, rdzw, & + fzm, fzp, les_model_opt, les_surface_opt, & + config_surface_drag_coefficient, & + delsq_u, delsq_vorticity, delsq_divergence, & + u, v, divergence, vorticity, rho_edge, rho_zz, u_init, v_init, ustm, tend_u_euler ) - end if ! vertical mixing of horizontal momentum end if ! (rk_step 1 test for computing mixing terms) @@ -5783,82 +6645,20 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then -! !OMP BARRIER why is this openmp barrier here??? - - ! del^4 horizontal filter. We compute this as del^2 ( del^2 (u) ). - ! - ! First, storage to hold the result from the first del^2 computation. - ! we copied code from the theta mixing, hence the theta* names. - - !$acc parallel default(present) - !$acc loop gang worker - do iCell=cellStart,cellEnd - - !$acc loop vector - do k=1,nVertLevels - delsq_w(k,iCell) = 0.0_RKIND - end do - - !$acc loop vector - do k=1,nVertLevels+1 - tend_w_euler(k,iCell) = 0.0_RKIND - end do - - r_areaCell = invAreaCell(iCell) - - !$acc loop seq - do i=1,nEdgesOnCell(iCell) - iEdge = edgesOnCell(i,iCell) - - edge_sign = 0.5 * r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge) - - cell1 = cellsOnEdge(1,iEdge) - cell2 = cellsOnEdge(2,iEdge) - -!DIR$ IVDEP - !$acc loop vector - do k=2,nVertLevels - - w_turb_flux = edge_sign*(rho_edge(k,iEdge)+rho_edge(k-1,iEdge))*(w(k,cell2) - w(k,cell1)) - delsq_w(k,iCell) = delsq_w(k,iCell) + w_turb_flux - w_turb_flux = w_turb_flux * meshScalingDel2(iEdge) * 0.25 * & - (kdiff(k,cell1)+kdiff(k,cell2)+kdiff(k-1,cell1)+kdiff(k-1,cell2)) - tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + w_turb_flux - end do - end do - end do - !$acc end parallel - -!$OMP BARRIER - - if (h_mom_eddy_visc4 > 0.0) then ! 4th order mixing is active - - !$acc parallel default(present) - !$acc loop gang worker - do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... - - r_areaCell = h_mom_eddy_visc4 * invAreaCell(iCell) - - !$acc loop seq - do i=1,nEdgesOnCell(iCell) - iEdge = edgesOnCell(i,iCell) - cell1 = cellsOnEdge(1,iEdge) - cell2 = cellsOnEdge(2,iEdge) - - edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell) * invDcEdge(iEdge) - - !$acc loop vector - do k=2,nVertLevels - tend_w_euler(k,iCell) = tend_w_euler(k,iCell) - edge_sign * (delsq_w(k,cell2) - delsq_w(k,cell1)) - end do - - end do - end do - !$acc end parallel - - end if ! 4th order mixing is active - - end if ! horizontal mixing for w computed in first rk_step + call w_dissipation_3d( cellStart, cellEnd, cellSolveStart, cellSolveEnd, & + nCells, nEdges, & + nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, & + invAreaCell, invDcEdge, dvEdge, & + meshScalingDel2, meshScalingDel4, & + rdzw, rdzu, & + v_mom_eddy_visc2, h_mom_eddy_visc4, & + delsq_w, & + w, rho_edge, rho_zz, divergence, zz, & + eddy_visc_horz, eddy_visc_vert, & + les_model_opt, les_surface_opt, & + tend_w_euler ) + + end if ! mixing for w computed in first rk_step ! Note for OpenMP parallelization: We could avoid allocating the delsq_w scratch ! array, and just use the delsq_theta array as was previously done; however, @@ -5911,27 +6711,6 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - if (rk_step == 1) then - - if ( v_mom_eddy_visc2 > 0.0 ) then - - !$acc parallel default(present) - !$acc loop gang worker - do iCell=cellSolveStart,cellSolveEnd -!DIR$ IVDEP - !$acc loop vector - do k=2,nVertLevels - tend_w_euler(k,iCell) = tend_w_euler(k,iCell) + v_mom_eddy_visc2*0.5*(rho_zz(k,iCell)+rho_zz(k-1,iCell))*( & - (w(k+1,iCell)-w(k ,iCell))*rdzw(k) & - -(w(k ,iCell)-w(k-1,iCell))*rdzw(k-1) )*rdzu(k) - end do - end do - !$acc end parallel - - end if - - end if ! mixing term computed first rk_step - ! add in mixing terms for w !$acc parallel default(present) @@ -6022,69 +6801,25 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm if (rk_step == 1) then - !$acc parallel default(present) - !$acc loop gang worker - do iCell=cellStart,cellEnd - - !$acc loop vector - do k=1,nVertLevels - delsq_theta(k,iCell) = 0.0_RKIND - tend_theta_euler(k,iCell) = 0.0_RKIND - end do - - r_areaCell = invAreaCell(iCell) - - !$acc loop seq - do i=1,nEdgesOnCell(iCell) - iEdge = edgesOnCell(i,iCell) - edge_sign = r_areaCell*edgesOnCell_sign(i,iCell) * dvEdge(iEdge) * invDcEdge(iEdge) - pr_scale = prandtl_inv * meshScalingDel2(iEdge) - cell1 = cellsOnEdge(1,iEdge) - cell2 = cellsOnEdge(2,iEdge) -!DIR$ IVDEP - !$acc loop vector - do k=1,nVertLevels - -! we are computing the Smagorinsky filter at more points than needed here so as to pick up the delsq_theta for 4th order filter below - - theta_turb_flux = edge_sign*(theta_m(k,cell2) - theta_m(k,cell1))*rho_edge(k,iEdge) - delsq_theta(k,iCell) = delsq_theta(k,iCell) + theta_turb_flux - theta_turb_flux = theta_turb_flux*0.5*(kdiff(k,cell1)+kdiff(k,cell2)) * pr_scale - tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + theta_turb_flux - - end do - end do - end do - !$acc end parallel - -!$OMP BARRIER - - if (h_theta_eddy_visc4 > 0.0) then ! 4th order mixing is active - - !$acc parallel default(present) - !$acc loop gang worker - do iCell=cellSolveStart,cellSolveEnd ! Technically updating fewer cells than before... - - r_areaCell = h_theta_eddy_visc4 * prandtl_inv * invAreaCell(iCell) - - !$acc loop seq - do i=1,nEdgesOnCell(iCell) - - iEdge = edgesOnCell(i,iCell) - edge_sign = meshScalingDel4(iEdge)*r_areaCell*dvEdge(iEdge)*edgesOnCell_sign(i,iCell)*invDcEdge(iEdge) - - cell1 = cellsOnEdge(1,iEdge) - cell2 = cellsOnEdge(2,iEdge) - - !$acc loop vector - do k=1,nVertLevels - tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) - edge_sign*(delsq_theta(k,cell2) - delsq_theta(k,cell1)) - end do - end do - end do - !$acc end parallel - - end if ! 4th order mixing is active + call scalar_dissipation_3d_les( cellStart, cellEnd, cellSolveStart, cellSolveEnd, & + nCells, nEdges, & + nEdgesOnCell, edgesOnCell, cellsOnEdge, edgesOnCell_sign, & + invAreaCell, invDcEdge, dvEdge, & + meshScalingDel2, meshScalingDel4, & + config_mix_full, t_init, zgrid, & + rdzw, rdzu, fzm, fzp, & + v_theta_eddy_visc2, h_theta_eddy_visc4, prandtl_inv, & + prandtl_3d_inv, & + delsq_theta, & + theta_m, rho_edge, rho_zz, zz, & + eddy_visc_horz, eddy_visc_vert, & + bn2, config_len_disp, scalars, tend_scalars, & + index_tke, index_qv, num_scalars, config_mix_scalars, & + les_model_opt, les_surface_opt, clock, dt, & + config_surface_heat_flux, config_surface_moisture_flux, & + ur_cell, vr_cell, & + hfx, qfx, & + tend_theta_euler, dynamics_substep ) end if ! theta mixing calculated first rk_step @@ -6125,116 +6860,26 @@ subroutine atm_compute_dyn_tend_work(nCells, nEdges, nVertices, nVertLevels_dumm end do !$acc end parallel - ! - ! vertical mixing for theta - 2nd order - ! - - if (rk_step == 1) then - - if ( v_theta_eddy_visc2 > 0.0 ) then ! vertical mixing for theta_m - - if (config_mix_full) then - - !$acc parallel default(present) - !$acc loop gang worker - do iCell = cellSolveStart,cellSolveEnd - - !$acc loop vector - do k=2,nVertLevels-1 - z1 = zgrid(k-1,iCell) - z2 = zgrid(k ,iCell) - z3 = zgrid(k+1,iCell) - z4 = zgrid(k+2,iCell) - - zm = 0.5*(z1+z2) - z0 = 0.5*(z2+z3) - zp = 0.5*(z3+z4) - - tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(& - (theta_m(k+1,iCell)-theta_m(k ,iCell))/(zp-z0) & - -(theta_m(k ,iCell)-theta_m(k-1,iCell))/(z0-zm) )/(0.5*(zp-zm)) - end do - end do - !$acc end parallel - - else ! idealized cases where we mix on the perturbation from the initial 1-D state - - !$acc parallel default(present) - !$acc loop gang worker - do iCell = cellSolveStart,cellSolveEnd - do k=2,nVertLevels-1 - z1 = zgrid(k-1,iCell) - z2 = zgrid(k ,iCell) - z3 = zgrid(k+1,iCell) - z4 = zgrid(k+2,iCell) - - zm = 0.5*(z1+z2) - z0 = 0.5*(z2+z3) - zp = 0.5*(z3+z4) - - tend_theta_euler(k,iCell) = tend_theta_euler(k,iCell) + v_theta_eddy_visc2*prandtl_inv*rho_zz(k,iCell)*(& - ((theta_m(k+1,iCell)-t_init(k+1,iCell))-(theta_m(k ,iCell)-t_init(k,iCell)))/(zp-z0) & - -((theta_m(k ,iCell)-t_init(k,iCell))-(theta_m(k-1,iCell)-t_init(k-1,iCell)))/(z0-zm) )/(0.5*(zp-zm)) - end do - end do - !$acc end parallel - - end if - - end if - - end if ! compute vertical theta mixing on first rk_step - !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart,cellSolveEnd !DIR$ IVDEP !$acc loop vector do k=1,nVertLevels -! tend_theta(k,iCell) = tend_theta(k,iCell) + tend_theta_euler(k,iCell) tend_theta(k,iCell) = tend_theta(k,iCell) + tend_theta_euler(k,iCell) + tend_rtheta_physics(k,iCell) end do end do !$acc end parallel MPAS_ACC_TIMER_START('atm_compute_dyn_tend_work [ACC_data_xfer]') - if (rk_step == 1) then - !$acc exit data copyout(tend_w_euler) - !$acc exit data copyout(tend_u_euler) - !$acc exit data copyout(tend_theta_euler) - !$acc exit data copyout(tend_rho) - - !$acc exit data delete(kdiff) - !$acc exit data delete(tend_rho_physics) - !$acc exit data delete(rb, rr_save) - !$acc exit data delete(divergence, vorticity) - !$acc exit data delete(v) - !$acc exit data delete(u_init, v_init) - else - !$acc exit data delete(tend_w_euler) - !$acc exit data delete(tend_u_euler) - !$acc exit data delete(tend_theta_euler) - !$acc exit data delete(tend_rho) + if (les_model_opt /= LES_MODEL_NONE) then + !$acc exit data copyout(bn2) end if - !$acc exit data copyout(tend_u) - !$acc exit data delete(cqu, pp, u, w, pv_edge, rho_edge, ke) - !$acc exit data copyout(h_divergence) - !$acc exit data delete(ru, rw) + !$acc exit data delete(ustm, hfx, qfx) !$acc exit data delete(rayleigh_damp_coef) - !$acc exit data delete(tend_ru_physics) - !$acc exit data copyout(tend_w) - !$acc exit data delete(rho_zz) - !$acc exit data copyout(tend_theta) - !$acc exit data delete(theta_m) - !$acc exit data delete(ru_save, theta_m_save) - !$acc exit data delete(cqw) - !$acc exit data delete(tend_rtheta_physics) - !$acc exit data delete(rw_save, rt_diabatic_tend) - !$acc exit data copyout(rthdynten) - !$acc exit data delete(t_init) -#ifdef CURVATURE - !$acc exit data delete(ur_cell, vr_cell) -#endif + !$acc exit data delete(eddy_visc_horz) + !$acc exit data delete(eddy_visc_vert) + !$acc exit data delete(prandtl_3d_inv) MPAS_ACC_TIMER_STOP('atm_compute_dyn_tend_work [ACC_data_xfer]') end subroutine atm_compute_dyn_tend_work @@ -6403,26 +7048,10 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & logical :: reconstruct_v - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data copyin(cellsOnEdge,dcEdge,dvEdge, & - !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, & - !$acc nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,invAreaCell, & - !$acc invAreaTriangle,edgesOnVertex, & - !$acc verticesOnCell,kiteForCell,kiteAreasOnVertex, & - !$acc nEdgesOnEdge,edgesOnEdge,weightsOnEdge, & - !$acc fVertex, & - !$acc verticesOnEdge, & - !$acc invDvEdge,invDcEdge) - !$acc enter data copyin(u,h) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') ! ! Compute height on cell edges at velocity locations ! - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(h_edge,vorticity,divergence) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang do iEdge=edgeStart,edgeEnd @@ -6507,9 +7136,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! ! Replace 2.0 with 2 in exponentiation to avoid outside chance that ! compiler will actually allow "float raised to float" operation - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(ke) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang do iCell=cellStart,cellEnd @@ -6604,14 +7230,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & if(rk_step /= 3) reconstruct_v = .false. end if - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - if (reconstruct_v) then - !$acc enter data create(v) - else - !$acc enter data copyin(v) - end if - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') - if (reconstruct_v) then !$acc parallel default(present) !$acc loop gang @@ -6639,9 +7257,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! ! Avoid dividing h_vertex by areaTriangle and move areaTriangle into ! numerator for the pv_vertex calculation - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(pv_vertex) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop collapse(2) do iVertex = vertexStart,vertexEnd @@ -6665,9 +7280,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! Compute pv at the edges ! ( this computes pv_edge at all edges bounding real cells ) ! - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(pv_edge) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop collapse(2) do iEdge = edgeStart,edgeEnd @@ -6685,9 +7297,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! ( this computes pv_cell for all real cells ) ! only needed for APVM upwinding ! - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(pv_cell) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang do iCell=cellStart,cellEnd @@ -6726,9 +7335,6 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & ! Merged loops for calculating gradPVt, gradPVn and pv_edge ! Also precomputed inverses of dvEdge and dcEdge to avoid repeated divisions ! - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc enter data create(gradPVt,gradPVn) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') r = config_apvm_upwinding * dt !$acc parallel default(present) !$acc loop gang @@ -6745,31 +7351,10 @@ subroutine atm_compute_solve_diagnostics_work(nCells, nEdges, nVertices, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc exit data delete(pv_cell,gradPVt,gradPVn) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') end if ! apvm upwinding - MPAS_ACC_TIMER_START('atm_compute_solve_diagnostics [ACC_data_xfer]') - !$acc exit data delete(cellsOnEdge,dcEdge,dvEdge, & - !$acc edgesOnVertex,edgesOnVertex_sign,invAreaTriangle, & - !$acc nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,invAreaCell, & - !$acc invAreaTriangle,edgesOnVertex, & - !$acc verticesOnCell,kiteForCell,kiteAreasOnVertex, & - !$acc nEdgesOnEdge,edgesOnEdge,weightsOnEdge, & - !$acc verticesOnEdge, & - !$acc fVertex,invDvEdge,invDcEdge) - !$acc exit data delete(u,h) - !$acc exit data copyout(h_edge,vorticity,divergence, & - !$acc ke, & - !$acc v, & - !$acc pv_vertex, & - !$acc pv_edge) - MPAS_ACC_TIMER_STOP('atm_compute_solve_diagnostics [ACC_data_xfer]') - end subroutine atm_compute_solve_diagnostics_work @@ -6858,17 +7443,13 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & call mpas_pool_get_array(mesh, 'zb3_cell', zb3_cell) MPAS_ACC_TIMER_START('atm_init_coupled_diagnostics [ACC_data_xfer]') - ! copyin invariant fields - !$acc enter data copyin(cellsOnEdge,nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,zz,fzm,fzp,zb,zb3, & - !$acc zb_cell,zb3_cell) ! copyin the data that is only on the right-hand side - !$acc enter data copyin(scalars(index_qv,:,:),u,w,rho,theta, & + !$acc enter data copyin(scalars(index_qv,:,:),w,rho,theta, & !$acc rho_base,theta_base) ! copyin the data that will be modified in this routine - !$acc enter data create(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & + !$acc enter data create(theta_m,ru,rw,rho_p,rtheta_base, & !$acc rtheta_p,exner,exner_base,pressure_p, & !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') @@ -6992,17 +7573,12 @@ subroutine atm_init_coupled_diagnostics(state, time_lev, diag, mesh, configs, & !$acc end parallel MPAS_ACC_TIMER_START('atm_init_coupled_diagnostics [ACC_data_xfer]') - ! delete invariant fields - !$acc exit data delete(cellsOnEdge,nEdgesOnCell,edgesOnCell, & - !$acc edgesOnCell_sign,zz,fzm,fzp,zb,zb3, & - !$acc zb_cell,zb3_cell) - ! delete the data that is only on the right-hand side - !$acc exit data delete(scalars(index_qv,:,:),u,w,rho,theta, & + !$acc exit data delete(scalars(index_qv,:,:),w,rho,theta, & !$acc rho_base,theta_base) ! copyout the data that will be modified in this routine - !$acc exit data copyout(theta_m,rho_zz,ru,rw,rho_p,rtheta_base, & + !$acc exit data copyout(theta_m,ru,rw,rho_p,rtheta_base, & !$acc rtheta_p,exner,exner_base,pressure_p, & !$acc pressure_base) MPAS_ACC_TIMER_STOP('atm_init_coupled_diagnostics [ACC_data_xfer]') @@ -7069,13 +7645,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su call mpas_pool_get_array(state, 'rho_zz', rho_zz_1, 1) call mpas_pool_get_array(state, 'rho_zz', rho_zz_2, 2) - MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') - !$acc enter data create(ru_save, u_1, rtheta_p_save, theta_m_1, rho_p_save, rw_save, & - !$acc w_1, rho_zz_1) & - !$acc copyin(ru, u_2, rtheta_p, rho_p, theta_m_2, rho_zz_2, rw, & - !$acc w_2, ruAvg, wwAvg, ruAvg_split, wwAvg_split, rho_zz_old_split) - MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]') - ! Interim fix for the atm_compute_dyn_tend_work subroutine accessing uninitialized values ! in garbage cells of theta_m !$acc kernels @@ -7180,13 +7749,6 @@ subroutine atm_rk_dynamics_substep_finish( state, diag, nVertLevels, dynamics_su !$acc end parallel end if - MPAS_ACC_TIMER_START('atm_rk_dynamics_substep_finish [ACC_data_xfer]') - !$acc exit data copyout(ru_save, u_1, rtheta_p_save, rho_p_save, rw_save, & - !$acc w_1, theta_m_1, rho_zz_1, ruAvg, wwAvg, ruAvg_split, & - !$acc wwAvg_split) & - !$acc delete(ru, u_2, rtheta_p, rho_p, theta_m_2, rho_zz_2, rw, & - !$acc w_2, rho_zz_old_split) - MPAS_ACC_TIMER_STOP('atm_rk_dynamics_substep_finish [ACC_data_xfer]') end subroutine atm_rk_dynamics_substep_finish @@ -7241,9 +7803,6 @@ subroutine atm_zero_gradient_w_bdy_work( w, bdyMaskCell, nearestRelaxationCell, integer :: iCell, k - MPAS_ACC_TIMER_START('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') - !$acc enter data copyin(w) - MPAS_ACC_TIMER_STOP('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang worker @@ -7259,9 +7818,6 @@ subroutine atm_zero_gradient_w_bdy_work( w, bdyMaskCell, nearestRelaxationCell, end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') - !$acc exit data copyout(w) - MPAS_ACC_TIMER_STOP('atm_zero_gradient_w_bdy_work [ACC_data_xfer]') end subroutine atm_zero_gradient_w_bdy_work @@ -7302,11 +7858,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel call mpas_pool_get_array(mesh, 'bdyMaskEdge', bdyMaskEdge) call mpas_pool_get_array(tend, 'rt_diabatic_tend', rt_diabatic_tend) - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - !$acc enter data copyin(tend_ru,tend_rho,tend_rt,tend_rw, & - !$acc rt_diabatic_tend) - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd @@ -7333,11 +7884,6 @@ subroutine atm_bdy_adjust_dynamics_speczone_tend( tend, mesh, config, nVertLevel end if end do !$acc end parallel - - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') - !$acc exit data copyout(tend_ru,tend_rho,tend_rt, & - !$acc tend_rw,rt_diabatic_tend) - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_speczone_tend [ACC_data_xfer]') end subroutine atm_bdy_adjust_dynamics_speczone_tend @@ -7423,10 +7969,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me divdamp_coef = divdamp_coef_ptr vertexDegree = vertexDegree_ptr - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') - !$acc enter data copyin(tend_rho, tend_rt, rho_zz, theta_m, tend_ru, ru) !$acc enter data create(divergence1, divergence2, vorticity1, vorticity2) - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') ! First, Rayleigh damping terms for ru, rtheta_m and rho_zz !$acc parallel default(present) @@ -7571,11 +8114,7 @@ subroutine atm_bdy_adjust_dynamics_relaxzone_tend( config, tend, state, diag, me end do ! end of loop over edges !$acc end parallel - MPAS_ACC_TIMER_START('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') - !$acc exit data copyout(tend_rho, tend_rt, tend_ru) - !$acc exit data delete(rho_zz, theta_m, ru, & - !$acc divergence1, divergence2, vorticity1, vorticity2) - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_dynamics_relaxzone_tend [ACC_data_xfer]') + !$acc exit data delete(divergence1, divergence2, vorticity1, vorticity2) end subroutine atm_bdy_adjust_dynamics_relaxzone_tend @@ -7609,10 +8148,6 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, & call mpas_pool_get_array(state, 'theta_m', theta_m, 2) call mpas_pool_get_array(diag, 'rtheta_p', rtheta_p) call mpas_pool_get_array(diag, 'rtheta_base', rtheta_base) - - MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]') - !$acc enter data copyin(rtheta_base, theta_m, rtheta_p) - MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]') !$acc parallel default(present) !$acc loop gang worker @@ -7627,11 +8162,6 @@ subroutine atm_bdy_reset_speczone_values( state, diag, mesh, nVertLevels, & end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_bdy_reset_speczone_values [ACC_data_xfer]') - !$acc exit data copyout(theta_m, rtheta_p) & - !$acc delete(rtheta_base) - MPAS_ACC_TIMER_STOP('atm_bdy_reset_speczone_values [ACC_data_xfer]') - end subroutine atm_bdy_reset_speczone_values !------------------------------------------------------------------------- @@ -7720,10 +8250,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, integer :: iCell, iEdge, iScalar, i, k, cell1, cell2 !--- - MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]') - !$acc enter data create(scalars_tmp) & - !$acc copyin(scalars_new) - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]') + !$acc enter data create(scalars_tmp) !$acc parallel default(present) !$acc loop gang worker @@ -7805,10 +8332,7 @@ subroutine atm_bdy_adjust_scalars_work( scalars_new, scalars_driving, dt, dt_rk, end do !$acc end parallel - MPAS_ACC_TIMER_START('atm_bdy_adjust_scalars [ACC_data_xfer]') - !$acc exit data delete(scalars_tmp) & - !$acc copyout(scalars_new) - MPAS_ACC_TIMER_STOP('atm_bdy_adjust_scalars [ACC_data_xfer]') + !$acc exit data delete(scalars_tmp) end subroutine atm_bdy_adjust_scalars_work @@ -7878,10 +8402,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, & !--- - MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]') - !$acc enter data copyin(scalars_new) - MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]') - !$acc parallel default(present) !$acc loop gang worker do iCell = cellSolveStart, cellSolveEnd ! threaded over cells @@ -7902,10 +8422,6 @@ subroutine atm_bdy_set_scalars_work( scalars_driving, scalars_new, & end do ! updates now in temp storage !$acc end parallel - - MPAS_ACC_TIMER_START('atm_bdy_set_scalars_work [ACC_data_xfer]') - !$acc exit data copyout(scalars_new) - MPAS_ACC_TIMER_STOP('atm_bdy_set_scalars_work [ACC_data_xfer]') end subroutine atm_bdy_set_scalars_work @@ -7975,16 +8491,6 @@ subroutine summarize_timestep(domain) nVertLevels = nVertLevels_ptr num_scalars = num_scalars_ptr - MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - if (config_print_detailed_minmax_vel) then - !$acc enter data copyin(w,u,v) - else if (config_print_global_minmax_vel) then - !$acc enter data copyin(w,u) - end if - if (config_print_global_minmax_sca) then - !$acc enter data copyin(scalars) - end if - MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') if (config_print_detailed_minmax_vel) then call mpas_log_write('') @@ -8343,17 +8849,6 @@ subroutine summarize_timestep(domain) end if - MPAS_ACC_TIMER_START('summarize_timestep [ACC_data_xfer]') - if (config_print_detailed_minmax_vel) then - !$acc exit data delete(w,u,v) - else if (config_print_global_minmax_vel) then - !$acc exit data delete(w,u) - end if - if (config_print_global_minmax_sca) then - !$acc exit data delete(scalars) - end if - MPAS_ACC_TIMER_STOP('summarize_timestep [ACC_data_xfer]') - end subroutine summarize_timestep end module atm_time_integration diff --git a/src/core_atmosphere/mpas_atm_core.F b/src/core_atmosphere/mpas_atm_core.F index f7d04a1f0c..bae45193d5 100644 --- a/src/core_atmosphere/mpas_atm_core.F +++ b/src/core_atmosphere/mpas_atm_core.F @@ -18,12 +18,13 @@ module atm_core ! in a named group ! abstract interface - subroutine halo_exchange_routine(domain, halo_group, ierr) + subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr) use mpas_derived_types, only : domain_type type (domain_type), intent(inout) :: domain character(len=*), intent(in) :: halo_group + logical, intent(in), optional :: withGPUAwareMPI integer, intent(out), optional :: ierr end subroutine halo_exchange_routine @@ -43,7 +44,8 @@ function atm_core_init(domain, startTimeStamp) result(ierr) use mpas_atm_dimensions, only : mpas_atm_set_dims use mpas_atm_diagnostics_manager, only : mpas_atm_diag_setup use mpas_atm_threading, only : mpas_atm_threading_init - use atm_time_integration, only : mpas_atm_dynamics_init + use atm_time_integration, only : mpas_atm_dynamics_init, & + mpas_atm_pre_dynamics_h2d, mpas_atm_post_dynamics_d2h use mpas_timer, only : mpas_timer_start, mpas_timer_stop use mpas_attlist, only : mpas_modify_att use mpas_string_utils, only : mpas_string_replace @@ -509,6 +511,7 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt) call mpas_pool_get_dimension(block % dimensions, 'edgeSolveThreadStart', edgeSolveThreadStart) call mpas_pool_get_dimension(block % dimensions, 'edgeSolveThreadEnd', edgeSolveThreadEnd) + call mpas_atm_pre_computesolvediag_h2d(block) !$OMP PARALLEL DO do thread=1,nThreads if (.not. config_do_restart .or. (config_do_restart .and. config_do_DAcycling)) then @@ -527,6 +530,7 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt) edgeThreadStart(thread), edgeThreadEnd(thread)) end do !$OMP END PARALLEL DO + call mpas_atm_post_computesolvediag_d2h(block) deallocate(ke_vertex) deallocate(ke_edge) @@ -540,13 +544,18 @@ subroutine atm_mpas_init_block(dminfo, stream_manager, block, mesh, dt) call mpas_pool_get_array(diag, 'uReconstructZ', uReconstructZ) call mpas_pool_get_array(diag, 'uReconstructZonal', uReconstructZonal) call mpas_pool_get_array(diag, 'uReconstructMeridional', uReconstructMeridional) + call mpas_reconstruct_2d_h2d(mesh, u, uReconstructX, uReconstructY, uReconstructZ, & + uReconstructZonal, uReconstructMeridional) call mpas_reconstruct(mesh, u, & uReconstructX, & uReconstructY, & uReconstructZ, & uReconstructZonal, & - uReconstructMeridional & + uReconstructMeridional, & + lACC = .true. & ) + call mpas_reconstruct_2d_d2h(mesh, u, uReconstructX, uReconstructY, uReconstructZ, & + uReconstructZonal, uReconstructMeridional) #ifdef DO_PHYSICS !proceed with initialization of physics parameterization if moist_physics is set to true: diff --git a/src/core_atmosphere/mpas_atm_halos.F b/src/core_atmosphere/mpas_atm_halos.F index df02ee30a2..19c4a5be46 100644 --- a/src/core_atmosphere/mpas_atm_halos.F +++ b/src/core_atmosphere/mpas_atm_halos.F @@ -15,12 +15,13 @@ module mpas_atm_halos ! in a named group ! abstract interface - subroutine halo_exchange_routine(domain, halo_group, ierr) + subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr) use mpas_derived_types, only : domain_type type (domain_type), intent(inout) :: domain character(len=*), intent(in) :: halo_group + logical, intent(in), optional :: withGPUAwareMPI integer, intent(out), optional :: ierr end subroutine halo_exchange_routine @@ -61,18 +62,23 @@ subroutine atm_build_halo_groups(domain, ierr) ! Local variables character(len=StrKIND), pointer :: config_halo_exch_method + logical, pointer :: config_gpu_aware_mpi ! ! Determine from the namelist option config_halo_exch_method which halo exchange method to employ ! call mpas_pool_get_config(domain % blocklist % configs, 'config_halo_exch_method', config_halo_exch_method) + call mpas_pool_get_config(domain % blocklist % configs, 'config_gpu_aware_mpi', config_gpu_aware_mpi) if (trim(config_halo_exch_method) == 'mpas_dmpar') then call mpas_log_write('') call mpas_log_write('*** Using ''mpas_dmpar'' routines for exchanging halos') call mpas_log_write('') + if (config_gpu_aware_mpi) then + call mpas_log_write('GPU-aware MPI is not presently supported with config_halo_exch_method = mpas_dmpar',MPAS_LOG_CRIT) + end if ! ! Set up halo exchange groups used during atmosphere core initialization ! diff --git a/src/core_atmosphere/physics/mpas_atmphys_interface.F b/src/core_atmosphere/physics/mpas_atmphys_interface.F index 71e46dfcd2..76a7e4fb6d 100644 --- a/src/core_atmosphere/physics/mpas_atmphys_interface.F +++ b/src/core_atmosphere/physics/mpas_atmphys_interface.F @@ -6,17 +6,29 @@ ! distributed with this code, or at http://mpas-dev.github.com/license.html ! !================================================================================================================= + +#ifdef MPAS_OPENACC +#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X) +#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X) +#else +#define MPAS_ACC_TIMER_START(X) +#define MPAS_ACC_TIMER_STOP(X) +#endif + module mpas_atmphys_interface use mpas_kind_types use mpas_pool_routines use mpas_atmphys_constants use mpas_atmphys_vars + use mpas_timer implicit none private public:: allocate_forall_physics, & deallocate_forall_physics, & + update_d2h_pre_microphysics, & + update_h2d_post_microphysics, & MPAS_to_physics, & microphysics_from_MPAS, & microphysics_to_MPAS @@ -545,6 +557,40 @@ subroutine MPAS_to_physics(configs,mesh,state,time_lev,diag,diag_physics,its,ite end subroutine MPAS_to_physics +!================================================================================================================= + subroutine update_d2h_pre_microphysics(configs,state,diag,time_lev) +!================================================================================================================= + +!input variables: + type(mpas_pool_type),intent(in):: configs + type(mpas_pool_type),intent(in):: state + type(mpas_pool_type),intent(in):: diag + + integer:: time_lev + +!local pointers: + real(kind=RKIND),dimension(:,:),pointer :: exner,pressure_b,w + real(kind=RKIND),dimension(:,:),pointer :: rho_zz,theta_m,pressure_p + real(kind=RKIND),dimension(:,:,:),pointer:: scalars + + + MPAS_ACC_TIMER_START('update_d2h_pre_microphysics [ACC_data_xfer]') + call mpas_pool_get_array(diag,'exner' ,exner ) + call mpas_pool_get_array(diag,'pressure_base',pressure_b) + call mpas_pool_get_array(diag,'pressure_p' ,pressure_p) + + call mpas_pool_get_array(state,'rho_zz' ,rho_zz ,time_lev) + call mpas_pool_get_array(state,'theta_m',theta_m,time_lev) + call mpas_pool_get_array(state,'w' ,w ,time_lev) + !$acc update host(exner, pressure_b, pressure_p, rho_zz, theta_m, w) + + call mpas_pool_get_array(state,'scalars',scalars,time_lev) + !$acc update host(scalars) + + MPAS_ACC_TIMER_STOP('update_d2h_pre_microphysics [ACC_data_xfer]') + +end subroutine update_d2h_pre_microphysics + !================================================================================================================= subroutine microphysics_from_MPAS(configs,mesh,state,time_lev,diag,diag_physics,tend_physics,its,ite) !================================================================================================================= @@ -599,7 +645,7 @@ subroutine microphysics_from_MPAS(configs,mesh,state,time_lev,diag,diag_physics, call mpas_pool_get_dimension(state,'index_qv',index_qv) call mpas_pool_get_dimension(state,'index_qc',index_qc) call mpas_pool_get_dimension(state,'index_qr',index_qr) - call mpas_pool_get_array(state,'scalars',scalars,time_lev) + call mpas_pool_get_array(state,'scalars',scalars,time_lev) qv => scalars(index_qv,:,:) qc => scalars(index_qc,:,:) qr => scalars(index_qr,:,:) @@ -1042,6 +1088,48 @@ subroutine microphysics_to_MPAS(configs,mesh,state,time_lev,diag,diag_physics,te end subroutine microphysics_to_MPAS + !================================================================================================================= + subroutine update_h2d_post_microphysics(configs,state,diag,tend,time_lev) +!================================================================================================================= + +!input variables: + type(mpas_pool_type),intent(in):: configs + type(mpas_pool_type),intent(in):: state + type(mpas_pool_type),intent(in):: diag + type(mpas_pool_type),intent(inout):: tend + + + integer:: time_lev + +!local pointers: + real(kind=RKIND),dimension(:,:),pointer :: exner,exner_b,pressure_b,rtheta_p,rtheta_b + real(kind=RKIND),dimension(:,:),pointer :: rho_zz,theta_m,pressure_p + real(kind=RKIND),dimension(:,:,:),pointer:: scalars + real(kind=RKIND),dimension(:,:),pointer :: rt_diabatic_tend + + call mpas_pool_get_array(diag,'exner' ,exner ) + call mpas_pool_get_array(diag,'exner_base' ,exner_b ) + call mpas_pool_get_array(diag,'pressure_base',pressure_b) + call mpas_pool_get_array(diag,'pressure_p' ,pressure_p) + call mpas_pool_get_array(diag,'rtheta_base' ,rtheta_b ) + call mpas_pool_get_array(diag,'rtheta_p' ,rtheta_p ) + + call mpas_pool_get_array(state,'rho_zz' ,rho_zz ,time_lev) + call mpas_pool_get_array(state,'theta_m',theta_m,time_lev) + + call mpas_pool_get_array(state,'scalars',scalars,time_lev) + + call mpas_pool_get_array(tend,'rt_diabatic_tend',rt_diabatic_tend) + + + MPAS_ACC_TIMER_START('update_h2d_post_microphysics [ACC_data_xfer]') + !$acc update device(exner, exner_b, pressure_b, pressure_p, rtheta_b) + !$acc update device(rtheta_p, rho_zz, theta_m, scalars) + !$acc update device(rt_diabatic_tend) + MPAS_ACC_TIMER_STOP('update_h2d_post_microphysics [ACC_data_xfer]') + +end subroutine update_h2d_post_microphysics + !================================================================================================================= end module mpas_atmphys_interface !================================================================================================================= diff --git a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F index 284b072851..71f37eb550 100644 --- a/src/core_atmosphere/physics/mpas_atmphys_todynamics.F +++ b/src/core_atmosphere/physics/mpas_atmphys_todynamics.F @@ -6,6 +6,15 @@ ! distributed with this code, or at http://mpas-dev.github.com/license.html ! !================================================================================================================= + +#ifdef MPAS_OPENACC +#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X) +#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X) +#else +#define MPAS_ACC_TIMER_START(X) +#define MPAS_ACC_TIMER_STOP(X) +#endif + module mpas_atmphys_todynamics use mpas_kind_types use mpas_pool_routines @@ -13,10 +22,11 @@ module mpas_atmphys_todynamics use mpas_atm_dimensions use mpas_atmphys_constants, only: R_d,R_v,degrad + use mpas_timer implicit none private - public:: physics_get_tend + public:: physics_get_tend, update_d2h_pre_physics_get_tend, update_h2d_post_physics_get_tend !Interface between the physics parameterizations and the non-hydrostatic dynamical core. @@ -46,12 +56,13 @@ module mpas_atmphys_todynamics ! in a named group ! abstract interface - subroutine halo_exchange_routine(domain, halo_group, ierr) + subroutine halo_exchange_routine(domain, halo_group, withGPUAwareMPI, ierr) use mpas_derived_types, only : domain_type type (domain_type), intent(inout) :: domain character(len=*), intent(in) :: halo_group + logical, intent(in), optional :: withGPUAwareMPI integer, intent(out), optional :: ierr end subroutine halo_exchange_routine @@ -60,6 +71,40 @@ end subroutine halo_exchange_routine contains + +!================================================================================================================= + subroutine update_d2h_pre_physics_get_tend(configs,state,diag,tend) +!================================================================================================================= + +!input variables: + type(mpas_pool_type),intent(in):: configs + type(mpas_pool_type),intent(in):: state + type(mpas_pool_type),intent(in):: diag + type(mpas_pool_type),intent(in):: tend + +!local variables: + real(kind=RKIND),dimension(:,:),pointer:: mass ! time level 2 rho_zz + real(kind=RKIND),dimension(:,:),pointer:: mass_edge ! diag rho_edge + real(kind=RKIND),dimension(:,:),pointer:: theta_m ! time level 1 + real(kind=RKIND),dimension(:,:,:),pointer:: scalars + + real(kind=RKIND),dimension(:,:),pointer:: tend_u_phys + real(kind=RKIND),dimension(:,:,:),pointer:: tend_scalars + + MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer') + call mpas_pool_get_array(state,'theta_m' ,theta_m,1) + call mpas_pool_get_array(state,'scalars' ,scalars,1) + call mpas_pool_get_array(state,'rho_zz' ,mass,2 ) + call mpas_pool_get_array(diag ,'rho_edge',mass_edge) + call mpas_pool_get_array(diag ,'tend_u_phys',tend_u_phys) + + !$acc update self(theta_m, scalars, mass, mass_edge) + + call mpas_pool_get_array(tend,'scalars_tend',tend_scalars) + !$acc update self(tend_scalars) ! Probably not needed + MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer') + + end subroutine update_d2h_pre_physics_get_tend !================================================================================================================= subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_step,dynamics_substep, & @@ -245,6 +290,26 @@ subroutine physics_get_tend(block,mesh,state,diag,tend,tend_physics,configs,rk_s end subroutine physics_get_tend + !================================================================================================================= + subroutine update_h2d_post_physics_get_tend(configs,state,diag,tend) +!================================================================================================================= + +!input variables: + type(mpas_pool_type),intent(in):: configs + type(mpas_pool_type),intent(in):: state + type(mpas_pool_type),intent(in):: diag + type(mpas_pool_type),intent(in):: tend + +!local variables: + real(kind=RKIND),dimension(:,:,:),pointer:: tend_scalars + + MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer') + call mpas_pool_get_array(tend,'scalars_tend',tend_scalars) + !$acc update device(tend_scalars) + MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer') + + end subroutine update_h2d_post_physics_get_tend + !================================================================================================================= subroutine physics_get_tend_work( & block,mesh,nCells,nEdges,nCellsSolve,nEdgesSolve,rk_step,dynamics_substep, & diff --git a/src/core_init_atmosphere/Registry.xml b/src/core_init_atmosphere/Registry.xml index cf4934a81b..b4f4622246 100644 --- a/src/core_init_atmosphere/Registry.xml +++ b/src/core_init_atmosphere/Registry.xml @@ -468,6 +468,11 @@ + + + + + @@ -573,6 +578,11 @@ + + + + + @@ -1114,6 +1124,21 @@ + + + + + + + + + + @@ -1171,6 +1196,8 @@ + diff --git a/src/core_init_atmosphere/mpas_atm_advection.F b/src/core_init_atmosphere/mpas_atm_advection.F index f4d44c984e..4852b17117 100644 --- a/src/core_init_atmosphere/mpas_atm_advection.F +++ b/src/core_init_atmosphere/mpas_atm_advection.F @@ -11,6 +11,7 @@ module atm_advection use mpas_derived_types use mpas_pool_routines use mpas_constants + use mpas_vector_operations use mpas_abort, only : mpas_dmpar_global_abort use mpas_log, only : mpas_log_write @@ -758,10 +759,14 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere real (kind=RKIND), dimension(:,:), pointer :: defc_a, defc_b real (kind=RKIND), dimension(:,:), pointer :: cell_gradient_coef_x, cell_gradient_coef_y + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c2, deformation_coef_s2, deformation_coef_cs + real (kind=RKIND), dimension(:,:), pointer :: deformation_coef_c, deformation_coef_s integer, dimension(:,:), pointer :: cellsOnEdge, edgesOnCell, cellsOnCell, verticesOnCell integer, dimension(:), pointer :: nEdgesOnCell real (kind=RKIND), dimension(:), pointer :: xCell, yCell, zCell real (kind=RKIND), dimension(:), pointer :: xVertex, yVertex, zVertex + real (kind=RKIND), dimension(:), pointer :: xEdge, yEdge, zEdge, angleEdge + real (kind=RKIND), dimension(:), pointer :: areaCell real (kind=RKIND), dimension(nCells) :: theta_abs @@ -772,19 +777,32 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere integer :: iCell real (kind=RKIND) :: pii real (kind=RKIND), dimension(25) :: xp, yp + real (kind=RKIND) :: xe, ye real (kind=RKIND) :: length_scale integer, dimension(25) :: cell_list - integer :: iv + integer :: iv, ie logical :: do_the_cell real (kind=RKIND) :: area_cell, sint2, cost2, sint_cost, dx, dy + logical, pointer :: is_periodic + real(kind=RKIND), pointer :: x_period, y_period + + + call mpas_pool_get_config(mesh, 'is_periodic', is_periodic) + call mpas_pool_get_config(mesh, 'x_period', x_period) + call mpas_pool_get_config(mesh, 'y_period', y_period) call mpas_pool_get_array(mesh, 'defc_a', defc_a) call mpas_pool_get_array(mesh, 'defc_b', defc_b) call mpas_pool_get_array(mesh, 'cell_gradient_coef_x', cell_gradient_coef_x) call mpas_pool_get_array(mesh, 'cell_gradient_coef_y', cell_gradient_coef_y) + call mpas_pool_get_array(mesh, 'deformation_coef_c2', deformation_coef_c2) + call mpas_pool_get_array(mesh, 'deformation_coef_s2', deformation_coef_s2) + call mpas_pool_get_array(mesh, 'deformation_coef_cs', deformation_coef_cs) + call mpas_pool_get_array(mesh, 'deformation_coef_c', deformation_coef_c) + call mpas_pool_get_array(mesh, 'deformation_coef_s', deformation_coef_s) call mpas_pool_get_array(mesh, 'nEdgesOnCell', nEdgesOnCell) call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) call mpas_pool_get_array(mesh, 'edgesOnCell', edgesOnCell) @@ -796,9 +814,19 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere call mpas_pool_get_array(mesh, 'xVertex', xVertex) call mpas_pool_get_array(mesh, 'yVertex', yVertex) call mpas_pool_get_array(mesh, 'zVertex', zVertex) + call mpas_pool_get_array(mesh, 'xEdge', xEdge) + call mpas_pool_get_array(mesh, 'yEdge', yEdge) + call mpas_pool_get_array(mesh, 'zEdge', zEdge) + call mpas_pool_get_array(mesh, 'angleEdge', angleEdge) + call mpas_pool_get_array(mesh, 'areaCell', areaCell) defc_a(:,:) = 0. defc_b(:,:) = 0. + deformation_coef_c2(:,:) = 0. + deformation_coef_s2(:,:) = 0. + deformation_coef_cs(:,:) = 0. + deformation_coef_c(:,:) = 0. + deformation_coef_s(:,:) = 0. cell_gradient_coef_x(:,:) = 0. cell_gradient_coef_y(:,:) = 0. @@ -888,21 +916,58 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere else ! On an x-y plane - theta_abs(iCell) = 0.0 + do i=1,n-1 + iv = verticesOnCell(i,iCell) + xp(i) = mpas_fix_periodicity(xVertex(iv),xCell(iCell),x_period) - xCell(iCell) + yp(i) = mpas_fix_periodicity(yVertex(iv),yCell(iCell),y_period) - yCell(iCell) + end do - xp(1) = xCell(iCell) - yp(1) = yCell(iCell) + ! if(iCell.lt.11) then + ! call mpas_log_write(' setting defc coefs, cell $i', intArgs=(/iCell/)) + ! do i=1,n-1 + ! iv = verticesOnCell(i,iCell) + ! call mpas_log_write(' xp,yp,xvc,yvc, $r $r $r $r', realArgs=(/xp(i),yp(i),xVertex(iv)-xCell(iCell),yVertex(iv)-yCell(iCell)/)) + ! end do + ! end if - do i=2,n - iv = verticesOnCell(i-1,iCell) - xp(i) = xVertex(iv) - yp(i) = yVertex(iv) + do i=1,n-1 + ie = edgesOnCell(i,iCell) + xe = mpas_fix_periodicity(xEdge(ie),xCell(iCell),x_period) - xCell(iCell) + ye = mpas_fix_periodicity(yEdge(ie),yCell(iCell),y_period) - yCell(iCell) + thetat(i) = atan2(ye,xe) end do + ! if(iCell .lt. 11) then + ! call mpas_log_write(' edge angles, plane calc, cell $i', intArgs=(/iCell/)) + ! do i=1,n-1 + ! call mpas_log_write(' edge angle $r', realArgs=(/thetat(i)*180./3.1415926/)) + ! end do + ! end if + + theta_abs(iCell) = thetat(1) + end if ! (1) compute cell area on the tangent plane used in the integrals ! (2) compute angle of cell edge normal vector. here we are repurposing thetat + thetat(1) = theta_abs(iCell) + + do i=2,n-1 + ip1 = i+1 + if (ip1 == n) ip1 = 1 + thetat(i) = plane_angle( 0.0_RKIND, 0.0_RKIND, 0.0_RKIND, & + xp(i)-xp(i-1), yp(i)-yp(i-1), 0.0_RKIND, & + xp(ip1)-xp(i), yp(ip1)-yp(i), 0.0_RKIND, & + 0.0_RKIND, 0.0_RKIND, 1.0_RKIND) + thetat(i) = thetat(i) + thetat(i-1) + end do + + ! if(iCell .lt. 11) then + ! call mpas_log_write(' edge angles, generic calc, cell $i', intArgs=(/iCell/)) + ! do i=1,n-1 + ! call mpas_log_write(' edge angle $r', realArgs=(/thetat(i)*180./3.1415926/)) + ! end do + ! end if area_cell = 0. do i=1,n-1 @@ -927,15 +992,241 @@ subroutine atm_initialize_deformation_weights( mesh, nCells, on_a_sphere, sphere defc_b(i,iCell) = dl*2.*sint_cost/area_cell cell_gradient_coef_x(i,iCell) = dl*cos(thetat(i))/area_cell cell_gradient_coef_y(i,iCell) = dl*sin(thetat(i))/area_cell + deformation_coef_c2(i,iCell) = dl*cost2/area_cell + deformation_coef_s2(i,iCell) = dl*sint2/area_cell + deformation_coef_cs(i,iCell) = dl*sint_cost/area_cell + deformation_coef_c(i,iCell) = dl*cos(thetat(i))/area_cell + deformation_coef_s(i,iCell) = dl*sin(thetat(i))/area_cell if (cellsOnEdge(1,EdgesOnCell(i,iCell)) /= iCell) then defc_a(i,iCell) = - defc_a(i,iCell) defc_b(i,iCell) = - defc_b(i,iCell) + deformation_coef_c2(i,iCell) = - deformation_coef_c2(i,iCell) + deformation_coef_s2(i,iCell) = - deformation_coef_s2(i,iCell) + deformation_coef_cs(i,iCell) = - deformation_coef_cs(i,iCell) +! deformation_coef_c(i,iCell) = - deformation_coef_c(i,iCell) +! deformation_coef_s(i,iCell) = - deformation_coef_s(i,iCell) end if end do end do + call atm_init_test_coefs( deformation_coef_c2, deformation_coef_s2, & + deformation_coef_cs, deformation_coef_c, & + deformation_coef_s, & + is_periodic, on_a_sphere, & + x_period, y_period, & + xEdge, yEdge, zEdge, & + xCell, yCell, zCell, nCells, & + angleEdge, nEdgesOnCell, edgesOnCell ) + + end subroutine atm_initialize_deformation_weights - end module atm_advection + + subroutine atm_init_test_coefs( deformation_coef_c2, deformation_coef_s2, & + deformation_coef_cs, deformation_coef_c, & + deformation_coef_s, & + is_periodic, on_a_sphere, & + x_period, y_period, & + xEdge, yEdge, zEdge, & + xCell, yCell, zCell, nCells, & + angleEdge, nEdgesOnCell, edgesOnCell ) + + implicit none + + logical :: is_periodic, on_a_sphere + integer :: nCells + integer, dimension(:) :: nEdgesOnCell + real (kind=RKIND) :: x_period, y_period + real (kind=RKIND), dimension(:,:) :: deformation_coef_c2, deformation_coef_s2 + real (kind=RKIND), dimension(:,:) :: deformation_coef_cs + real (kind=RKIND), dimension(:,:) :: deformation_coef_c, deformation_coef_s + integer, dimension(:,:) :: edgesOnCell + real (kind=RKIND), dimension(:) :: angleEdge, xEdge, yEdge, zEdge + real (kind=RKIND), dimension(:) :: xCell, yCell, zCell + + ! local variables + + integer :: iCell, iEdge, ie + real (kind=RKIND) :: cos_edge, sin_edge, ux, uy, vx, vy, wx, wy + real (kind=RKIND) :: xc, yc, xe, ye + real (kind=RKIND) :: angle_e, ue, ve, we, e_int + real (kind=RKIND) :: dudx, dudy, dvdx, dvdy, dwdx, dwdy + real (kind=RKIND) :: dudx_c, dudy_c, dvdx_c, dvdy_c, dwdx_c, dwdy_c + + real (kind=RKIND) :: dudx_err_max, dudy_err_max, dvdx_err_max, dvdy_err_max, dwdx_err_max, dwdy_err_max + real (kind=RKIND) :: dudx_err_tot, dudy_err_tot, dvdx_err_tot, dvdy_err_tot, dwdx_err_tot, dwdy_err_tot + real (kind=RKIND) :: dudx_max, dudy_max, dvdx_max, dvdy_max, dwdx_max, dwdy_max + + real (kind=RKIND) :: ang + real (kind=RKIND), parameter :: x_vel= 1.0, y_vel=1.0, w_vel=1.0 + real (kind=RKIND) :: u_edge, v_edge, w_edge, x, y, angle, xl, yl + real (kind=RKIND) :: dudx_cell, dudy_cell, dvdx_cell, dvdy_cell, dwdx_cell, dwdy_cell + + ! Test tunction definitions + ! + ! here are the velocity field functions and their derivatives. + ! First a simple test: U = x_vel*(-x+y), V = y_vel * (-x+y), W = w_vel*(-x+y) + + u_edge(x,y,ang,xl,yl) = (x_vel*(x+y)) * cos(ang) + (y_vel * (x+y) * sin(ang)) + v_edge(x,y,ang,xl,yl) = -(x_vel*(x+y)) * sin(ang) + (y_vel * (x+y) * cos(ang)) + w_edge(x,y,xl,yl) = w_vel * (x+y) + + dudx_cell(x,y,xl,yl) = x_vel + dudy_cell(x,y,xl,yl) = x_vel + dvdx_cell(x,y,xl,yl) = y_vel + dvdy_cell(x,y,xl,yl) = y_vel + dwdx_cell(x,y,xl,yl) = w_vel + dwdy_cell(x,y,xl,yl) = w_vel + + ! ----------------- + + if ( (.not. on_a_sphere) .and. (is_periodic) ) then ! test is for doubly-periodic Cartesian plane only + + dudx_err_max = 0. + dudy_err_max = 0. + dvdx_err_max = 0. + dvdy_err_max = 0. + dwdx_err_max = 0. + dwdy_err_max = 0. + + dudx_err_tot = 0. + dudy_err_tot = 0. + dvdx_err_tot = 0. + dvdy_err_tot = 0. + dwdx_err_tot = 0. + dwdy_err_tot = 0. + + dudx_max = 0. + dudy_max = 0. + dvdx_max = 0. + dvdy_max = 0. + dwdx_max = 0. + dwdy_max = 0. + + do iCell = 1, nCells + + dudx = 0. + dudy = 0. + dvdx = 0. + dvdy = 0. + dwdx = 0. + dwdy = 0. + + xc = xCell(iCell) + yc = yCell(iCell) + + dudx_c = dudx_cell(xc,yc,x_period,y_period) + dudy_c = dudy_cell(xc,yc,x_period,y_period) + dvdx_c = dvdx_cell(xc,yc,x_period,y_period) + dvdy_c = dvdy_cell(xc,yc,x_period,y_period) + dwdx_c = dwdx_cell(xc,yc,x_period,y_period) + dwdy_c = dwdy_cell(xc,yc,x_period,y_period) + + do iEdge = 1, nEdgesOnCell(iCell) + + ie = edgesOnCell(iEdge,iCell) + angle_e = angleEdge(ie) + xe = xEdge(ie) + ye = yEdge(ie) + + xe = mpas_fix_periodicity(xe,xc,x_period) + ye = mpas_fix_periodicity(ye,yc,y_period) + + ue = u_edge(xe,ye,angle_e,x_period,y_period) + ve = v_edge(xe,ye,angle_e,x_period,y_period) + we = w_edge(xe,ye,x_period,y_period) + + dudx = dudx + deformation_coef_c2(iEdge,iCell)*ue & + - deformation_coef_cs(iEdge,iCell)*ve + dudy = dudy + deformation_coef_cs(iEdge,iCell)*ue & + - deformation_coef_s2(iEdge,iCell)*ve + dvdx = dvdx + deformation_coef_cs(iEdge,iCell)*ue & + + deformation_coef_c2(iEdge,iCell)*ve + dvdy = dvdy + deformation_coef_s2(iEdge,iCell)*ue & + + deformation_coef_cs(iEdge,iCell)*ve + + dwdx = dwdx + deformation_coef_c(iEdge,iCell)*we + dwdy = dwdy + deformation_coef_s(iEdge,iCell)*we + + end do + + ! call mpas_log_write(' u_x, u_y, $r, $r ', realArgs=(/dudx, dudy/)) + ! call mpas_log_write(' v_x, v_y, $r, $r ', realArgs=(/dvdx, dvdy/)) + ! call mpas_log_write(' w_x, w_y, $r, $r ', realArgs=(/dwdx, dwdy/)) + + ! check result for cell + + e_int = abs(dudx_c - dudx) + dudx_err_tot = dudx_err_tot + e_int + dudx_err_max = max(dudx_err_max, e_int) + + e_int = abs(dudy_c - dudy) + dudy_err_tot = dudy_err_tot + e_int + dudy_err_max = max(dudy_err_max, e_int) + + e_int = abs(dvdx_c - dvdx) + dvdx_err_tot = dvdx_err_tot + e_int + dvdx_err_max = max(dvdx_err_max, e_int) + + e_int = abs(dvdy_c - dvdy) + dvdy_err_tot = dvdy_err_tot + e_int + dvdy_err_max = max(dvdy_err_max, e_int) + + e_int = abs(dwdx_c - dwdx) + dwdx_err_tot = dwdx_err_tot + e_int + dwdx_err_max = max(dwdx_err_max, e_int) + + e_int = abs(dwdy_c - dwdy) + dwdy_err_tot = dwdy_err_tot + e_int + dwdy_err_max = max(dwdy_err_max, e_int) + + dudx_max = max(dudx_max, abs(dudx_c)) + dudy_max = max(dudy_max, abs(dudy_c)) + dvdx_max = max(dvdx_max, abs(dvdx_c)) + dvdy_max = max(dvdy_max, abs(dvdy_c)) + dwdx_max = max(dwdx_max, abs(dwdx_c)) + dwdy_max = max(dwdy_max, abs(dwdy_c)) + + end do + + ! scale errors + + dudx_err_max = dudx_err_max/dudx_max + dudy_err_max = dudy_err_max/dudy_max + dvdx_err_max = dvdx_err_max/dvdx_max + dvdy_err_max = dvdy_err_max/dvdy_max + dwdx_err_max = dwdx_err_max/dwdx_max + dwdy_err_max = dwdy_err_max/dwdy_max + + dudx_err_tot = dudx_err_tot/dudx_max/real(nCells) + dudy_err_tot = dudy_err_tot/dudy_max/real(nCells) + dvdx_err_tot = dvdx_err_tot/dvdx_max/real(nCells) + dvdy_err_tot = dvdy_err_tot/dvdy_max/real(nCells) + dwdx_err_tot = dwdx_err_tot/dwdx_max/real(nCells) + dwdy_err_tot = dwdy_err_tot/dwdy_max/real(nCells) + + ! output + + call mpas_log_write(' ') + call mpas_log_write(' deformation coefficients check ') + call mpas_log_write(' dudx check, max abs(dudx), max and avg error $r, $r, $r', & + realArgs=(/dudx_max, dudx_err_max, dudx_err_tot/)) + call mpas_log_write(' dudy check, max abs(dudy), max and avg error $r, $r, $r', & + realArgs=(/dudy_max, dudy_err_max, dudy_err_tot/)) + call mpas_log_write(' dvdx check, max abs(dvdx), max and avg error $r, $r, $r', & + realArgs=(/dvdx_max, dvdx_err_max, dvdx_err_tot/)) + call mpas_log_write(' dvdy check, max abs(dvdy), max and avg error $r, $r, $r', & + realArgs=(/dvdy_max, dvdy_err_max, dvdy_err_tot/)) + call mpas_log_write(' dwdx check, max abs(dwdx), max and avg error $r, $r, $r', & + realArgs=(/dwdx_max, dwdx_err_max, dwdx_err_tot/)) + call mpas_log_write(' dwdy check, max abs(dwdy), max and avg error $r, $r, $r', & + realArgs=(/dwdy_max, dwdy_err_max, dwdy_err_tot/)) + call mpas_log_write(' ') + + end if + + end subroutine atm_init_test_coefs + +end module atm_advection diff --git a/src/core_init_atmosphere/mpas_init_atm_cases.F b/src/core_init_atmosphere/mpas_init_atm_cases.F index 673ebfc525..9c372cb163 100644 --- a/src/core_init_atmosphere/mpas_init_atm_cases.F +++ b/src/core_init_atmosphere/mpas_init_atm_cases.F @@ -367,6 +367,27 @@ subroutine init_atm_setup_case(domain, stream_manager) ! call mpas_stream_mgr_reset_alarms(stream_manager, streamID='lbc', direction=MPAS_STREAM_OUTPUT, ierr=ierr) + else if (config_init_case == 10) then + + call mpas_log_write(' les test case ') + block_ptr => domain % blocklist + do while (associated(block_ptr)) + + call mpas_pool_get_dimension(block_ptr % dimensions, 'nCells', nCells) + call mpas_pool_get_dimension(block_ptr % dimensions, 'nVertLevels', nVertLevels) + + call mpas_pool_get_subpool(block_ptr % structs, 'mesh', mesh) + call mpas_pool_get_subpool(block_ptr % structs, 'fg', fg) + call mpas_pool_get_subpool(block_ptr % structs, 'state', state) + call mpas_pool_get_subpool(block_ptr % structs, 'diag', diag) + + call mpas_log_write(' calling test case setup ') + call init_atm_case_les(domain % dminfo, mesh, fg, nCells, nVertLevels, state, diag, config_init_case, block_ptr % configs) + call decouple_variables(mesh, nCells, nVertLevels, state, diag) + call mpas_log_write(' returned from test case setup ') + block_ptr => block_ptr % next + end do + else if (config_init_case == 13 ) then call mpas_log_write(' CAM-MPAS grid ') @@ -399,7 +420,7 @@ subroutine init_atm_setup_case(domain, stream_manager) else call mpas_log_write(' ***********************************************************', messageType=MPAS_LOG_ERR) - call mpas_log_write(' Only test cases 1 through 9 and 13 are currently supported.', messageType=MPAS_LOG_ERR) + call mpas_log_write(' Only test cases 1 through 10 and 13 are currently supported.', messageType=MPAS_LOG_ERR) call mpas_log_write(' ***********************************************************', messageType=MPAS_LOG_CRIT) end if @@ -1408,10 +1429,11 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d integer, dimension(:), pointer :: nEdgesOnEdge integer, dimension(:,:), pointer :: edgesOnEdge, cellsOnEdge real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge - + real (kind=RKIND), pointer :: x_period, y_period integer :: iCell, iCell1, iCell2 , iEdge, ivtx, i, k, nz, nz1, itr, cell1, cell2 integer, pointer :: nEdges, nVertices, maxEdges, nCellsSolve integer, pointer :: index_qv + integer, pointer :: index_tke real (kind=RKIND), dimension(nVertLevels + 1 ) :: zc, zw, ah real (kind=RKIND), dimension(nVertLevels ) :: zu, dzw, rdzwp, rdzwm @@ -1438,6 +1460,7 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d real (kind=RKIND), pointer :: nominalMinDc logical, pointer :: on_a_sphere real (kind=RKIND), pointer :: sphere_radius + real (kind=RKIND), pointer :: config_ztop real (kind=RKIND), dimension(:,:), pointer :: t_init, w, rw, v, rho, theta real (kind=RKIND), dimension(:), pointer :: u_init, qv_init, angleEdge, fEdge, fVertex @@ -1459,10 +1482,13 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d call mpas_pool_get_array(mesh, 'areaTriangle', areaTriangle) call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) call mpas_pool_get_array(mesh, 'nominalMinDc', nominalMinDc) - + + call mpas_pool_get_config(mesh, 'x_period', x_period) + call mpas_pool_get_config(mesh, 'y_period', y_period) call mpas_pool_get_config(mesh, 'on_a_sphere', on_a_sphere) call mpas_pool_get_config(mesh, 'sphere_radius', sphere_radius) call mpas_pool_get_config(configs, 'config_interface_projection', config_interface_projection) + call mpas_pool_get_config(configs, 'config_ztop', config_ztop) ! ! Scale all distances @@ -1485,6 +1511,8 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d areaTriangle(:) = areaTriangle(:) * a_scale**2.0 kiteAreasOnVertex(:,:) = kiteAreasOnVertex(:,:) * a_scale**2.0 nominalMinDc = nominalMinDc * a_scale + x_period = x_period * a_scale + y_period = y_period * a_scale call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) @@ -1545,6 +1573,7 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d call mpas_pool_get_array(state, 'scalars', scalars) call mpas_pool_get_dimension(state, 'index_qv', index_qv) + call mpas_pool_get_dimension(state, 'index_tke', index_tke) scalars(:,:,:) = 0. @@ -1572,7 +1601,8 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d ! metrics for hybrid coordinate and vertical stretching str = 1.0 - zt = 20000. +! zt = 20000. + zt = config_ztop dz = zt/float(nz1) ! write(0,*) ' dz = ',dz @@ -1859,6 +1889,18 @@ subroutine init_atm_case_squall_line(dminfo, mesh, nCells, nVertLevels, state, d t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i)) end do end do +! +! initial seed for tke +! +! call mpas_log_write('index_qv $i ', intArgs=(/index_qv/)) +! call mpas_log_write('index_tke $i ', intArgs=(/index_tke/)) + scalars(index_tke,:,:) = 0. + + do k = 1,nz1 + do i=1,nCells + scalars(index_tke,k,i) = 0.1 + end do + end do do itr=1,30 @@ -2013,6 +2055,7 @@ subroutine init_atm_case_mtn_wave(dminfo, mesh, nCells, nVertLevels, state, diag integer, dimension(:), pointer :: nEdgesOnEdge, nEdgesOnCell integer, dimension(:,:), pointer :: edgesOnEdge, cellsOnEdge, cellsOnCell real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge + real (kind=RKIND), pointer :: x_period, y_period integer :: iCell, iCell1, iCell2 , iEdge, ivtx, i, k, nz, itr, cell1, cell2, nz1 integer, pointer :: nEdges, maxEdges, nCellsSolve, nVertices @@ -2071,6 +2114,8 @@ subroutine init_atm_case_mtn_wave(dminfo, mesh, nCells, nVertLevels, state, diag call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) call mpas_pool_get_array(mesh, 'nominalMinDc', nominalMinDc) + call mpas_pool_get_config(mesh, 'x_period', x_period) + call mpas_pool_get_config(mesh, 'y_period', y_period) call mpas_pool_get_config(mesh, 'on_a_sphere', on_a_sphere) call mpas_pool_get_config(mesh, 'sphere_radius', sphere_radius) @@ -2112,6 +2157,8 @@ subroutine init_atm_case_mtn_wave(dminfo, mesh, nCells, nVertLevels, state, diag areaTriangle(:) = areaTriangle(:) * a_scale**2.0 kiteAreasOnVertex(:,:) = kiteAreasOnVertex(:,:) * a_scale**2.0 nominalMinDc = nominalMinDc * a_scale + x_period = x_period * a_scale + y_period = y_period * a_scale call mpas_pool_get_dimension(mesh, 'nEdges', nEdges) @@ -6184,6 +6231,692 @@ subroutine init_atm_case_lbc(timestamp, block, mesh, nCells, nEdges, nVertLevels end subroutine init_atm_case_lbc +!--------------------- + + subroutine init_atm_case_les(dminfo, mesh, fg, nCells, nVertLevels, state, diag, test_case, configs) + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ! Large Eddy Simulation (les) test case setup + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + implicit none + + type (dm_info), intent(in) :: dminfo + type (mpas_pool_type), intent(inout) :: mesh + type (mpas_pool_type), intent(inout) :: fg + integer, intent(in) :: nCells + integer, intent(in) :: nVertLevels + type (mpas_pool_type), intent(inout) :: state + type (mpas_pool_type), intent(inout) :: diag + type (mpas_pool_type), intent(inout):: configs + + integer, intent(in) :: test_case + + real (kind=RKIND), dimension(:), pointer :: rdzw, dzu, rdzu, fzm, fzp + real (kind=RKIND), dimension(:,:), pointer :: zgrid, zxu, zz, hx, cqw + real (kind=RKIND), dimension(:,:), pointer :: ppb, pb, rho_zz, rb, rr, tb, rtb, p, pp, dss, t, rt, u, ru + real (kind=RKIND), dimension(:,:,:), pointer :: scalars + real (kind=RKIND), dimension(:,:,:), pointer :: zb, zb3 + + real (kind=RKIND), dimension(:), pointer :: xland + integer, dimension(:), pointer :: landmask, lu_index + + !This is temporary variable here. It just need when calculate tangential velocity v. + integer :: eoe + integer, dimension(:), pointer :: nEdgesOnEdge + integer, dimension(:,:), pointer :: edgesOnEdge, cellsOnEdge + real (kind=RKIND), dimension(:,:), pointer :: weightsOnEdge + real (kind=RKIND), pointer :: x_period, y_period + + integer :: iCell, iCell1, iCell2 , iEdge, ivtx, i, k, nz, nz1, itr, cell1, cell2 + integer, pointer :: nEdges, nVertices, maxEdges, nCellsSolve + integer, pointer :: index_qv + integer, pointer :: index_tke + + real (kind=RKIND), dimension(nVertLevels + 1 ) :: zc, zw, ah + real (kind=RKIND), dimension(nVertLevels ) :: zu, dzw, rdzwp, rdzwm + + real (kind=RKIND), dimension(nVertLevels, nCells) :: thi, tbi, cqwb + + real (kind=RKIND) :: xnutr + real (kind=RKIND) :: ztemp, zd, zt, dz, str + + real (kind=RKIND), dimension(nVertLevels ) :: qvb, qvp, zg + real (kind=RKIND), dimension(nVertLevels ) :: t_init_1d + + real (kind=RKIND) :: cof1, cof2 + real (kind=RKIND), pointer :: cf1, cf2, cf3 + real (kind=RKIND) :: pitop, pibtop, ptopb, ptop, rcp, rcv, p0 + real (kind=RKIND) :: a_scale + + real (kind=RKIND), dimension(:), pointer :: xCell, yCell, zCell + real (kind=RKIND), dimension(:), pointer :: xEdge, yEdge, zEdge + real (kind=RKIND), dimension(:), pointer :: xVertex, yVertex, zVertex + real (kind=RKIND), dimension(:), pointer :: dcEdge, dvEdge, areaCell, areaTriangle + real (kind=RKIND), dimension(:,:), pointer :: kiteAreasOnVertex + logical, pointer :: on_a_sphere + real (kind=RKIND), pointer :: sphere_radius + real (kind=RKIND), pointer :: config_ztop + + real (kind=RKIND), dimension(:,:), pointer :: t_init, w, rw, v, rho, theta + real (kind=RKIND), dimension(:), pointer :: u_init, v_init, qv_init, angleEdge, fEdge, fVertex + real (kind=RKIND) :: u_vel, v_vel, randx + + call mpas_pool_get_array(mesh, 'xCell', xCell) + call mpas_pool_get_array(mesh, 'yCell', yCell) + call mpas_pool_get_array(mesh, 'zCell', zCell) + call mpas_pool_get_array(mesh, 'xEdge', xEdge) + call mpas_pool_get_array(mesh, 'yEdge', yEdge) + call mpas_pool_get_array(mesh, 'zEdge', zEdge) + call mpas_pool_get_array(mesh, 'xVertex', xVertex) + call mpas_pool_get_array(mesh, 'yVertex', yVertex) + call mpas_pool_get_array(mesh, 'zVertex', zVertex) + call mpas_pool_get_array(mesh, 'dvEdge', dvEdge) + call mpas_pool_get_array(mesh, 'dcEdge', dcEdge) + call mpas_pool_get_array(mesh, 'areaCell', areaCell) + call mpas_pool_get_array(mesh, 'areaTriangle', areaTriangle) + call mpas_pool_get_array(mesh, 'kiteAreasOnVertex', kiteAreasOnVertex) + + call mpas_pool_get_config(mesh, 'x_period', x_period) + call mpas_pool_get_config(mesh, 'y_period', y_period) + call mpas_pool_get_config(mesh, 'on_a_sphere', on_a_sphere) + call mpas_pool_get_config(mesh, 'sphere_radius', sphere_radius) + call mpas_pool_get_config(configs, 'config_ztop', config_ztop) + + ! + ! Scale all distances + ! + + a_scale = 1.0 + + xCell(:) = xCell(:) * a_scale + yCell(:) = yCell(:) * a_scale + zCell(:) = zCell(:) * a_scale + xVertex(:) = xVertex(:) * a_scale + yVertex(:) = yVertex(:) * a_scale + zVertex(:) = zVertex(:) * a_scale + xEdge(:) = xEdge(:) * a_scale + yEdge(:) = yEdge(:) * a_scale + zEdge(:) = zEdge(:) * a_scale + dvEdge(:) = dvEdge(:) * a_scale + dcEdge(:) = dcEdge(:) * a_scale + areaCell(:) = areaCell(:) * a_scale**2.0 + areaTriangle(:) = areaTriangle(:) * a_scale**2.0 + kiteAreasOnVertex(:,:) = kiteAreasOnVertex(:,:) * a_scale**2.0 + x_period = x_period * a_scale + y_period = y_period * a_scale + + call mpas_pool_get_array(mesh, 'weightsOnEdge', weightsOnEdge) + call mpas_pool_get_array(mesh, 'nEdgesOnEdge', nEdgesOnEdge) + call mpas_pool_get_array(mesh, 'edgesOnEdge', edgesOnEdge) + call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) + + call mpas_pool_get_dimension(mesh, 'nEdges', nEdges) + call mpas_pool_get_dimension(mesh, 'nVertices', nVertices) + call mpas_pool_get_dimension(mesh, 'nCellsSolve', nCellsSolve) + call mpas_pool_get_dimension(mesh, 'maxEdges', maxEdges) + nz1 = nVertLevels + nz = nz1 + 1 + + call mpas_pool_get_array(mesh, 'zgrid', zgrid) + call mpas_pool_get_array(mesh, 'zb', zb) + call mpas_pool_get_array(mesh, 'zb3', zb3) + call mpas_pool_get_array(mesh, 'rdzw', rdzw) + call mpas_pool_get_array(mesh, 'dzu', dzu) + call mpas_pool_get_array(mesh, 'rdzu', rdzu) + call mpas_pool_get_array(mesh, 'fzm', fzm) + call mpas_pool_get_array(mesh, 'fzp', fzp) + call mpas_pool_get_array(mesh, 'zxu', zxu) + call mpas_pool_get_array(mesh, 'zz', zz) + call mpas_pool_get_array(mesh, 'hx', hx) + call mpas_pool_get_array(mesh, 'dss', dss) + call mpas_pool_get_array(mesh, 't_init', t_init) + call mpas_pool_get_array(mesh, 'u_init', u_init) + call mpas_pool_get_array(mesh, 'v_init', v_init) + call mpas_pool_get_array(mesh, 'qv_init', qv_init) + call mpas_pool_get_array(mesh, 'angleEdge', angleEdge) + call mpas_pool_get_array(mesh, 'fEdge', fEdge) + call mpas_pool_get_array(mesh, 'fVertex', fVertex) + + call mpas_pool_get_array(mesh, 'cf1', cf1) + call mpas_pool_get_array(mesh, 'cf2', cf2) + call mpas_pool_get_array(mesh, 'cf3', cf3) + + call mpas_pool_get_array(diag, 'pressure_base', ppb) + call mpas_pool_get_array(diag, 'exner_base', pb) + call mpas_pool_get_array(diag, 'rho_base', rb) + call mpas_pool_get_array(diag, 'theta_base', tb) + call mpas_pool_get_array(diag, 'rtheta_base', rtb) + call mpas_pool_get_array(diag, 'exner', p) + call mpas_pool_get_array(diag, 'cqw', cqw) + + call mpas_pool_get_array(diag, 'pressure_p', pp) + call mpas_pool_get_array(diag, 'rho_p', rr) + call mpas_pool_get_array(diag, 'rtheta_p', rt) + call mpas_pool_get_array(diag, 'ru', ru) + call mpas_pool_get_array(diag, 'rw', rw) + call mpas_pool_get_array(diag, 'v', v) + call mpas_pool_get_array(diag, 'rho', rho) + call mpas_pool_get_array(diag, 'theta', theta) + + call mpas_pool_get_array(state, 'rho_zz', rho_zz) + call mpas_pool_get_array(state, 'theta_m', t) + call mpas_pool_get_array(state, 'u', u) + call mpas_pool_get_array(state, 'w', w) + call mpas_pool_get_array(state, 'scalars', scalars) + + call mpas_pool_get_dimension(state, 'index_qv', index_qv) + call mpas_pool_get_dimension(state, 'index_tke', index_tke) + + call mpas_pool_get_array(fg, 'xland', xland) + call mpas_pool_get_array(mesh, 'landmask', landmask) + call mpas_pool_get_array(mesh, 'lu_index', lu_index) + + scalars(:,:,:) = 0. + + call atm_initialize_advection_rk(mesh, nCells, nEdges, maxEdges, on_a_sphere, sphere_radius ) + call atm_initialize_deformation_weights(mesh, nCells, on_a_sphere, sphere_radius) + + xnutr = 0. + zd = 12000. + + p0 = 1.e+05 + rcp = rgas/cp + rcv = rgas/(cp-rgas) + + call mpas_log_write(' point 1 in test case setup ') + +! We may pass in an hx(:,:) that has been precomputed elsewhere. +! For now it is independent of k + + do iCell=1,nCells + do k=1,nz + hx(k,iCell) = 0. ! les on a flat Cartesian plane + end do + end do + +! write(0,*) ' dz = ',dz + call mpas_log_write(' hx computation complete ') + + ! metrics for hybrid coordinate and vertical stretching + + str = 1.0 ! no stretching in les case: constant dz + ! zt = 20000. + zt = config_ztop + dz = zt/float(nz1) + + + do k=1,nz + + ! zw(k) = zt*(real(k-1)*dz/zt)**str + zw(k) = float(k-1)*dz + zc(k) = zw(k) +! +! ah(k) governs the transition between terrain-following +! and pureheight coordinates +! ah(k) = 0 is a terrain-following coordinate +! ah(k) = 1 is a height coordinate + +! ah(k) = 1.-cos(.5*pii*(k-1)*dz/zt)**6 + ah(k) = 1. +! call mpas_log_write(' k, zc, zw, ah = $i $r $r $r', intArgs=(/k/), realArgs=(/zc(k),zw(k),ah(k)/)) + end do + do k=1,nz1 + dzw (k) = zw(k+1)-zw(k) + rdzw(k) = 1./dzw(k) + zu(k ) = .5*(zw(k)+zw(k+1)) + end do + do k=2,nz1 + dzu (k) = .5*(dzw(k)+dzw(k-1)) + rdzu(k) = 1./dzu(k) + fzp (k) = .5* dzw(k )/dzu(k) + fzm (k) = .5* dzw(k-1)/dzu(k) + rdzwp(k) = dzw(k-1)/(dzw(k )*(dzw(k)+dzw(k-1))) + rdzwm(k) = dzw(k )/(dzw(k-1)*(dzw(k)+dzw(k-1))) + end do + +!********** how are we storing cf1, cf2 and cf3? + + COF1 = (2.*DZU(2)+DZU(3))/(DZU(2)+DZU(3))*DZW(1)/DZU(2) + COF2 = DZU(2) /(DZU(2)+DZU(3))*DZW(1)/DZU(3) + CF1 = FZP(2) + COF1 + CF2 = FZM(2) - COF1 - COF2 + CF3 = COF2 + + do iCell=1,nCells + do k=1,nz + zgrid(k,iCell) = ah(k)*(zc(k)*(1.-hx(k,iCell)/zt)+hx(k,iCell)) & + + (1.-ah(k)) * zc(k) + end do + do k=1,nz1 + zz (k,iCell) = (zw(k+1)-zw(k))/(zgrid(k+1,iCell)-zgrid(k,iCell)) + end do + end do + + do i=1, nEdges + iCell1 = cellsOnEdge(1,i) + iCell2 = cellsOnEdge(2,i) + do k=1,nz1 + zxu (k,i) = 0.5 * (zgrid(k,iCell2)-zgrid(k,iCell1) + zgrid(k+1,iCell2)-zgrid(k+1,iCell1)) / dcEdge(i) + end do + end do + do i=1, nCells + do k=1,nz1 + ztemp = .5*(zgrid(k+1,i)+zgrid(k,i)) + dss(k,i) = 0. + ztemp = zgrid(k,i) + if(ztemp.gt.zd+.1) then + dss(k,i) = dss(k,i)+xnutr*sin(.5*pii*(ztemp-zd)/(zt-zd))**2 + end if + end do + end do + +! +! initialization +! + do i=1,nCells + do k=1,nz1 + + ztemp = .5*(zgrid(k,i)+zgrid(k+1,i)) + + ! if(ztemp .gt. ztr) then + ! t (k,i) = thetar*exp(9.8*(ztemp-ztr)/(1003.*ttr)) + ! relhum(k,i) = 0.25 + ! else + ! t (k,i) = 300.+43.*(ztemp/ztr)**1.25 + ! relhum(k,i) = (1.-0.75*(ztemp/ztr)**1.25) + ! if(t(k,i).lt.thetas) t(k,i) = thetas + ! end if + + t(k,i) = atm_get_sounding('theta',ztemp) + scalars(index_qv,k,i) = atm_get_sounding('qv',ztemp) + + tb(k,i) = t(k,i) + thi(k,i) = t(k,i) + tbi(k,i) = t(k,i) + cqw(k,i) = 1. + cqwb(k,i) = 1. + end do + end do + +! set the velocity field - we are on a plane here. + + do i=1, nEdges + cell1 = cellsOnEdge(1,i) + cell2 = cellsOnEdge(2,i) + if(cell1 <= nCellsSolve .or. cell2 <= nCellsSolve) then + do k=1,nz1 + ztemp = .25*( zgrid(k,cell1 )+zgrid(k+1,cell1 ) & + +zgrid(k,cell2)+zgrid(k+1,cell2)) + u_vel = atm_get_sounding('u',ztemp) + v_vel = atm_get_sounding('v',ztemp) + u(k,i) = cos(angleEdge(i))*u_vel - sin(angleEdge(i))*v_vel + if(i == 1 ) then + u_init(k) = u_vel + v_init(k) = v_vel + end if + end do + end if + end do + + call mpas_dmpar_bcast_reals(dminfo, nz1, u_init) + +! +! for reference sounding +! + do itr=1,30 + + pitop = 1.-.5*dzw(1)*gravity*(1.+scalars(index_qv,1,1))/(cp*t(1,1)*zz(1,1)) + pibtop = 1.-.5*dzw(1)*gravity*(1.+qvb(1))/(cp*tb(1,1)*zz(1,1)) + do k=2,nz1 + pitop = pitop-dzu(k)*gravity/(cp*cqw(k,1)*.5*(t(k,1)+t(k-1,1)) & + *.5*(zz(k,1)+zz(k-1,1))) + pibtop = pibtop-dzu(k)*gravity/(cp*cqwb(k,1)*.5*(tb(k,1)+tb(k-1,1)) & + *.5*(zz(k,1)+zz(k-1,1))) + + !call mpas_log_write('$i $r $r $r $r', intArgs=(/k/), realArgs=(/pitop,tb(k,1),dzu(k),tb(k,1)/)) + end do + pitop = pitop-.5*dzw(nz1)*gravity*(1.+scalars(index_qv,nz1,1))/(cp*t(nz1,1)*zz(nz1,1)) + pibtop = pibtop-.5*dzw(nz1)*gravity*(1.+qvb(nz1))/(cp*tb(nz1,1)*zz(nz1,1)) + + call mpas_dmpar_bcast_real(dminfo, pitop) + call mpas_dmpar_bcast_real(dminfo, pibtop) + + ptopb = p0*pibtop**(1./rcp) +! call mpas_log_write('ptopb = $r', realArgs=(/0.01_RKIND*ptopb/)) + + do i=1, nCells + pb(nz1,i) = pibtop+.5*dzw(nz1)*gravity*(1.+qvb(nz1))/(cp*tb(nz1,i)*zz(nz1,i)) + p (nz1,i) = pitop+.5*dzw(nz1)*gravity*(1.+scalars(index_qv,nz1,i))/(cp*t (nz1,i)*zz(nz1,i)) + do k=nz1-1,1,-1 + pb(k,i) = pb(k+1,i) + dzu(k+1)*gravity/(cp*cqwb(k+1,i)*.5*(tb(k,i)+tb(k+1,i)) & + *.5*(zz(k,i)+zz(k+1,i))) + p (k,i) = p (k+1,i) + dzu(k+1)*gravity/(cp*cqw(k+1,i)*.5*(t (k,i)+t (k+1,i)) & + *.5*(zz(k,i)+zz(k+1,i))) + end do + do k=1,nz1 + rb (k,i) = pb(k,i)**(1./rcv)/((rgas/p0)*tb(k,i)*zz(k,i)) + rtb(k,i) = rb(k,i)*tb(k,i) + rr (k,i) = p (k,i)**(1./rcv)/((rgas/p0)*t (k,i)*zz(k,i))-rb(k,i) + ppb(k,i) = p0*(zz(k,i)*rgas*rtb(k,i)/p0)**(cp/cv) + end do + end do + + ! + ! update water vapor mixing ratio from humidity profile + ! + ! do i= 1,nCells + ! do k=1,nz1 + ! temp = p(k,i)*thi(k,i) + ! pres = p0*p(k,i)**(1./rcp) + ! qvs = 380.*exp(17.27*(temp-273.)/(temp-36.))/pres + ! scalars(index_qv,k,i) = min(0.014_RKIND,relhum(k,i)*qvs) + ! end do + ! end do + + do k=1,nz1 +!********************************************************************* +! QVB = QV INCLUDES MOISTURE IN REFERENCE STATE +! qvb(k) = scalars(index_qv,k,1) +! QVB = 0 PRODUCES DRY REFERENCE STATE + qvb(k) = 0. + qvp(k) = scalars(index_qv,k,1)*1000. + zg(k) = .5*(zgrid(k,1)+zgrid(k+1,1)) +!********************************************************************* + end do + + do i= 1,nCells + do k=1,nz1 + t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i)) + tb(k,i) = tbi(k,i)*(1.+1.61*qvb(k)) + end do + do k=2,nz1 + cqw (k,i) = 1./(1.+.5*(scalars(index_qv,k,i)+scalars(index_qv,k-1,i))) + cqwb(k,i) = 1./(1.+.5*(qvb(k)+qvb(k-1))) + end do + end do + + end do !end of iteration loop + + call mpas_log_write(' sounding ') + call mpas_log_write(' k, zg, rb, tb, rtb, t, rr, p, qvp') + do k=1,nVertLevels + call mpas_log_write('$i $r $r $r $r $r $r $r $r', intArgs=(/k/), realArgs=(/zg(k),rb(k,1),tb(k,1),rtb(k,1),t(k,1),rr(k,1),p(k,1),qvp(k)/)) + end do + +! +! potential temperature perturbation +! + +! do i=1,nCells +! do k = 1,nz1 ! same as in WRF +! call random_number(randx) +! thi(k,i) = thi(k,i) + 0.1*(randx-0.5) +! t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i)) +! end do +! end do + + do k = 1,nz1 ! same as in WRF + if(zg(k) .le. 397.0) then ! the SAS initial PBL height + do i=1,nCells + call random_number(randx) + thi(k,i) = thi(k,i) + 1.0*(randx-0.5) + t (k,i) = thi(k,i)*(1.+1.61*scalars(index_qv,k,i)) + end do + end if + end do +! +! initial seed for tke +! + scalars(index_tke,:,:) = 0. + + do k = 1,nz1 + if( zg(k) .le. 255.) then + do i=1,nCells + !! scalars(index_tke,k,i) = 0.1 + scalars(index_tke,k,i) = 0.4*((1.-(zg(k)/255.))**3) + end do + end if + end do + + do itr=1,30 + + pitop = 1.-.5*dzw(1)*gravity*(1.+scalars(index_qv,1,1))/(cp*t(1,1)*zz(1,1)) + do k=2,nz1 + pitop = pitop-dzu(k)*gravity/(cp*cqw(k,1)*.5*(t (k,1)+t (k-1,1)) & + *.5*(zz(k,1)+zz(k-1,1))) + end do + pitop = pitop - .5*dzw(nz1)*gravity*(1.+scalars(index_qv,nz1,1))/(cp*t(nz1,1)*zz(nz1,1)) + ptop = p0*pitop**(1./rcp) +! call mpas_log_write('ptop = $r $r', realArgs=(/0.01_RKIND*ptop, 0.01_RKIND*ptopb/)) + + call mpas_dmpar_bcast_real(dminfo, ptop) + + do i = 1, nCells + + pp(nz1,i) = ptop-ptopb+.5*dzw(nz1)*gravity* & + (rr(nz1,i)+(rr(nz1,i)+rb(nz1,i))*scalars(index_qv,nz1,i)) + do k=nz1-1,1,-1 +! pp(k,i) = pp(k+1,i)+.5*dzu(k+1)*gravity* & +! (rr(k ,i)+(rr(k ,i)+rb(k ,i))*scalars(index_qv,k ,i) & +! +rr(k+1,i)+(rr(k+1,i)+rb(k+1,i))*scalars(index_qv,k+1,i)) + pp(k,i) = pp(k+1,i)+dzu(k+1)*gravity*( & + fzm(k+1)*(rb(k+1,i)*(scalars(index_qv,k+1,i)-qvb(k+1)) & + +rr(k+1,i)*(1.+scalars(index_qv,k+1,i))) & + +fzp(k+1)*(rb(k ,i)*(scalars(index_qv,k ,i)-qvb(k)) & + +rr(k ,i)*(1.+scalars(index_qv,k ,i)))) + end do +! if (itr==1.and.i==1) then +! do k=1,nz1 +! call mpas_log_write('pp-check $r', realArgs=(/pp(k,i)/)) +! end do +! end if + do k=1,nz1 + rt(k,i) = (pp(k,i)/(rgas*zz(k,i)) & + -rtb(k,i)*(p(k,i)-pb(k,i)))/p(k,i) + p (k,i) = (zz(k,i)*(rgas/p0)*(rtb(k,i)+rt(k,i)))**rcv + rr(k,i) = (rt(k,i)-rb(k,i)*(t(k,i)-tb(k,i)))/t(k,i) + end do + + end do ! loop over cells + + end do ! iteration loop +!---------------------------------------------------------------------- +! + do k=1,nz1 + qv_init(k) = scalars(index_qv,k,1) + end do + + t_init_1d(:) = t(:,1) + call mpas_dmpar_bcast_reals(dminfo, nz1, t_init_1d) + call mpas_dmpar_bcast_reals(dminfo, nz1, qv_init) + + do i=1,nCells + do k=1,nz1 + t_init(k,i) = t_init_1d(k) + rho_zz(k,i) = rb(k,i)+rr(k,i) + end do + end do + + do i=1,nEdges + cell1 = cellsOnEdge(1,i) + cell2 = cellsOnEdge(2,i) + if(cell1 <= nCellsSolve .or. cell2 <= nCellsSolve) then + do k=1,nz1 + ru (k,i) = 0.5*(rho_zz(k,cell1)+rho_zz(k,cell2))*u(k,i) + end do + end if + end do + + + ! + ! we are assuming w and rw are zero for this initialization + ! i.e., no terrain + ! + rw = 0.0 + w = 0.0 + + zb = 0.0 + zb3 = 0.0 + + ! + ! Generate rotated Coriolis field - same settings as in WRF + ! + do iEdge=1,nEdges + ! fEdge(iEdge) = 1.e-04 + fEdge(iEdge) = 7.2921e-05 + end do + + do iVtx=1,nVertices + ! fVertex(iVtx) = 1.e-04 + fVertex(iVtx) = 7.2921e-05 + end do + + ! + ! Compute mass fluxes tangential to each edge (i.e., through the faces of dual grid cells) + ! + v(:,:) = 0.0 + do iEdge = 1, nEdges + do i=1,nEdgesOnEdge(iEdge) + eoe = edgesOnEdge(i,iEdge) + if (eoe > 0) then + do k = 1, nVertLevels + v(k,iEdge) = v(k,iEdge) + weightsOnEdge(i,iEdge) * u(k, eoe) + end do + end if + end do + end do + + ! call mpas_log_write(' k,u_init, t_init, qv_init ') + ! do k=1,nVertLevels + ! call mpas_log_write('$i $r $r $r', intArgs=(/k/), realArgs=(/u_init(k),t_init(k,1),qv_init(k)/)) + ! end do + + ! Compute rho and theta from rho_zz and theta_m + do iCell=1,nCells + do k=1,nVertLevels + rho(k,iCell) = rho_zz(k,iCell) * zz(k,iCell) + theta(k,iCell) = t(k,iCell) / (1.0 + 1.61 * scalars(index_qv,k,iCell)) + end do + end do + + do iCell = 1, nCells + !land category + landmask(iCell) = 1 + lu_index(iCell) = 1 + xland(iCell) = 1.0 + if (iCell == 1) call mpas_log_write(' landmask, lu_index, xland $i $i $r', intArgs=(/landmask(iCell),lu_index(iCell)/), realArgs=(/xland(iCell)/)) + end do + + end subroutine init_atm_case_les + + + real (kind=RKIND) function atm_get_sounding( variable, height ) + + implicit none + real (kind=RKIND), intent(in) :: height + character(len=*), intent(in) :: variable + + atm_get_sounding = -999. +! SAS case sounding + + if(variable == 'u') then + atm_get_sounding = 2.0 ! SAS value + + else if (variable == 'v') then + atm_get_sounding = 0. + + else if (variable == 'theta') then + + if(height .le. 352.5) then + atm_get_sounding = 296.6 + else if(height .le. 442.5) then + atm_get_sounding = 296.6 + (height-352.5)*1.5/90. + else + atm_get_sounding = 298.1 + (height-442.5)*0.003 + end if + + else if (variable == 'qv') then + + if(height .le. 352.5) then + atm_get_sounding = 11.8/1000. + else if(height .le. 442.5) then + atm_get_sounding = 11.8/1000. - (height-352.5)*4.0/90./1000. + else + atm_get_sounding = max(7.8/1000. - (height-442.5)*0.004/1000.,0.0) + end if + + end if + + end function atm_get_sounding + + + real (kind=RKIND) function atm_get_sounding_1( variable, height ) + + implicit none + real (kind=RKIND), intent(in) :: height + character(len=*), intent(in) :: variable + + atm_get_sounding_1 = -999. + + if(variable == 'u') then + atm_get_sounding_1 = 15.0 + else if (variable == 'v') then + atm_get_sounding_1 = 0. + else if (variable == 'qv') then + + atm_get_sounding_1 = 0. ! dry sounding + ! atm_get_sounding_1 = 0.010 + ! if(height .gt. 500.) atm_get_sounding_1 = 0.004 + + else if (variable == 'theta') then + + if(height .le. 500.) then + atm_get_sounding_1 = 300. + else if(height .le. 600.) then + atm_get_sounding_1 = 300. + (height-500.)*3./100. + else + atm_get_sounding_1 = 303. + (height-600.)*3./1000. + end if + ! atm_get_sounding_1 = atm_get_sounding_1 - 10.0 ! for water case + + end if + + end function atm_get_sounding_1 + + real (kind=RKIND) function atm_get_sounding_2( variable, height ) + + implicit none + real (kind=RKIND), intent(in) :: height + character(len=*), intent(in) :: variable + + atm_get_sounding_2 = -999. + + if(variable == 'u') then + atm_get_sounding_2 = 10.0 + else if (variable == 'v') then + atm_get_sounding_2 = 0. + else if (variable == 'qv') then + + ! atm_get_sounding_2 = 0. ! dry sounding_2 + atm_get_sounding_2 = 0.010 + if(height .gt. 1000.) atm_get_sounding_2 = 0.004 + + else if (variable == 'theta') then + + if(height .le. 1000.) then + atm_get_sounding_2 = 300. + else if(height .le. 1150.) then + atm_get_sounding_2 = 300. + (height-1000.)*8./150. + else + atm_get_sounding_2 = 308. + (height-1150.)*3./1000. + end if + atm_get_sounding_2 = atm_get_sounding_2 - 10.0 ! for water case + + end if + + end function atm_get_sounding_2 + +!----------- !----------------------------------------------------------------------- ! routine init_atm_case_cam_mpas diff --git a/src/framework/Makefile b/src/framework/Makefile index 2d8e7dc92b..64d3d6faf4 100644 --- a/src/framework/Makefile +++ b/src/framework/Makefile @@ -110,7 +110,7 @@ mpas_c_interfacing.o: xml_stream_parser.o: xml_stream_parser.c $(CC) $(CFLAGS) $(CPPFLAGS) $(CPPINCLUDES) -I../external/ezxml -c xml_stream_parser.c -mpas_halo.o: mpas_derived_types.o mpas_pool_routines.o mpas_log.o +mpas_halo.o: mpas_derived_types.o mpas_pool_routines.o mpas_log.o mpas_timer.o mpas_stream_inquiry.o : mpas_derived_types.o mpas_log.o mpas_c_interfacing.o diff --git a/src/framework/mpas_dmpar.F b/src/framework/mpas_dmpar.F index 6d68c0c656..5d9b48d53b 100644 --- a/src/framework/mpas_dmpar.F +++ b/src/framework/mpas_dmpar.F @@ -7448,19 +7448,28 @@ end subroutine mpas_dmpar_exch_group_end_halo_exch!}}} !> exchange is complete. ! !----------------------------------------------------------------------- - subroutine mpas_dmpar_exch_group_full_halo_exch(domain, groupName, iErr)!{{{ + subroutine mpas_dmpar_exch_group_full_halo_exch(domain, groupName, withGPUAwareMPI, iErr)!{{{ type (domain_type), intent(inout) :: domain character (len=*), intent(in) :: groupName + logical, optional, intent(in) :: withGPUAwareMPI integer, optional, intent(out) :: iErr type (mpas_exchange_group), pointer :: exchGroupPtr integer :: nLen + logical :: useGPUAwareMPI if ( present(iErr) ) then iErr = MPAS_DMPAR_NOERR end if + useGPUAwareMPI = .false. + if (present(withGPUAwareMPI)) then + if (withGPUAwareMPI) then + call mpas_log_write(' GPU-aware MPI not implemented in this module', MPAS_LOG_CRIT) + end if + end if + nLen = len_trim(groupName) DMPAR_DEBUG_WRITE(' -- Trying to perform a full exchange for group ' // trim(groupName)) diff --git a/src/framework/mpas_halo.F b/src/framework/mpas_halo.F index 4ab8817c23..8877d6330d 100644 --- a/src/framework/mpas_halo.F +++ b/src/framework/mpas_halo.F @@ -280,6 +280,30 @@ subroutine mpas_halo_exch_group_complete(domain, groupName, iErr) call refactor_lists(domain, groupName, iErr) + ! Logic to return early if there are no neighbors to send to + if ( newGroup% nGroupSendNeighbors <=0 ) then + return + end if + + + ! Always copy in the main data member first + !$acc enter data copyin(newGroup) + ! Then the data in the members of the type + !$acc enter data copyin(newGroup % recvBuf(:), newGroup % sendBuf(:)) + ! !$acc enter data copyin(newGroup % sendBuf(:)) + !$acc enter data copyin(newGroup % fields(:)) + do i = 1, newGroup % nFields + !$acc enter data copyin(newGroup % fields(i)) + !$acc enter data copyin(newGroup % fields(i) % nSendLists(:,:)) + !$acc enter data copyin(newGroup % fields(i) % packOffsets(:)) + !$acc enter data copyin(newGroup % fields(i) % sendListSrc(:,:,:)) + !$acc enter data copyin(newGroup % fields(i) % sendListDst(:,:,:)) + !$acc enter data copyin(newGroup % fields(i) % nRecvLists(:,:)) + !$acc enter data copyin(newGroup % fields(i) % unpackOffsets(:)) + !$acc enter data copyin(newGroup % fields(i) % recvListSrc(:,:,:)) + !$acc enter data copyin(newGroup % fields(i) % recvListDst(:,:,:)) + end do + end subroutine mpas_halo_exch_group_complete @@ -349,15 +373,26 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr) deallocate(cursor % fields(i) % compactHaloInfo) deallocate(cursor % fields(i) % compactSendLists) deallocate(cursor % fields(i) % compactRecvLists) + !$acc exit data delete(cursor % fields(i) % nSendLists(:,:)) deallocate(cursor % fields(i) % nSendLists) + !$acc exit data delete(cursor % fields(i) % sendListSrc(:,:,:)) deallocate(cursor % fields(i) % sendListSrc) + !$acc exit data delete(cursor % fields(i) % sendListDst(:,:,:)) deallocate(cursor % fields(i) % sendListDst) + !$acc exit data delete(cursor % fields(i) % packOffsets(:)) deallocate(cursor % fields(i) % packOffsets) + !$acc exit data delete(cursor % fields(i) % nRecvLists(:,:)) deallocate(cursor % fields(i) % nRecvLists) + !$acc exit data delete(cursor % fields(i) % recvListSrc(:,:,:)) deallocate(cursor % fields(i) % recvListSrc) + !$acc exit data delete(cursor % fields(i) % recvListDst(:,:,:)) deallocate(cursor % fields(i) % recvListDst) + !$acc exit data delete(cursor % fields(i) % unpackOffsets(:)) deallocate(cursor % fields(i) % unpackOffsets) + !$acc exit data delete(cursor % fields(i)) end do + ! Use finalize here in-case the copyins in ..._complete increment the reference counter + !$acc exit data finalize delete(cursor % fields(:)) deallocate(cursor % fields) deallocate(cursor % groupPackOffsets) deallocate(cursor % groupSendNeighbors) @@ -368,10 +403,14 @@ subroutine mpas_halo_exch_group_destroy(domain, groupName, iErr) deallocate(cursor % groupToFieldRecvIdx) deallocate(cursor % groupRecvOffsets) deallocate(cursor % groupRecvCounts) + !$acc exit data delete(cursor % sendBuf(:)) deallocate(cursor % sendBuf) + !$acc exit data delete(cursor % recvBuf(:)) deallocate(cursor % recvBuf) deallocate(cursor % sendRequests) deallocate(cursor % recvRequests) + ! Finalize here as well, just in-case + !$acc exit data finalize delete(cursor) deallocate(cursor) end subroutine mpas_halo_exch_group_destroy @@ -495,7 +534,7 @@ end subroutine mpas_halo_exch_group_add_field !> exchange group. ! !----------------------------------------------------------------------- - subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) + subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, withGPUAwareMPI, iErr) #ifdef MPAS_USE_MPI_F08 use mpi_f08, only : MPI_Datatype, MPI_Comm @@ -508,6 +547,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) use mpas_derived_types, only : domain_type, mpas_halo_group, MPAS_HALO_REAL, MPAS_LOG_CRIT use mpas_pool_routines, only : mpas_pool_get_array use mpas_log, only : mpas_log_write + use mpas_timer, only : mpas_timer_start, mpas_timer_stop ! Parameters #ifdef MPAS_USE_MPI_F08 @@ -527,6 +567,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) ! Arguments type (domain_type), intent(inout) :: domain character (len=*), intent(in) :: groupName + logical, optional, intent(in) :: withGPUAwareMPI integer, optional, intent(out) :: iErr ! Local variables @@ -542,6 +583,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) integer :: comm #endif integer :: mpi_ierr + logical:: useGPUAwareMPI type (mpas_halo_group), pointer :: group integer, dimension(:), pointer :: compactHaloInfo integer, dimension(:), pointer :: compactSendLists @@ -554,12 +596,17 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) integer :: maxNRecvList integer, dimension(:,:,:), CONTIGUOUS pointer :: recvListSrc, recvListDst integer, dimension(:), CONTIGUOUS pointer :: unpackOffsets - + if (present(iErr)) then iErr = 0 end if + useGPUAwareMPI = .false. + if (present(withGPUAwareMPI)) then + useGPUAwareMPI = withGPUAwareMPI + end if + ! ! Find this halo exhange group in the list of groups ! @@ -577,6 +624,12 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) messageType=MPAS_LOG_CRIT) end if + ! Logic to return early if there are no neighbors to send to + if ( group% nGroupSendNeighbors <= 0 ) then + return + end if + + call mpas_timer_start('full_halo_exch') ! ! Get the rank of this task and the MPI communicator to use from the first field in ! the group; all fields should be using the same communicator, so this should not @@ -589,6 +642,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) #endif rank = group % fields(1) % compactHaloInfo(8) + !$acc data present(group % recvBuf(:), group % sendBuf(:)) if(useGPUAwareMPI) ! ! Initiate non-blocking MPI receives for all neighbors @@ -598,9 +652,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) bufstart = group % groupRecvOffsets(i) bufend = group % groupRecvOffsets(i) + group % groupRecvCounts(i) - 1 !TO DO: how do we determine appropriate type here? + !$acc host_data use_device(group % recvBuf) if(useGPUAwareMPI) call MPI_Irecv(group % recvBuf(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, & group % groupRecvNeighbors(i), group % groupRecvNeighbors(i), comm, & group % recvRequests(i), mpi_ierr) + !$acc end host_data else group % recvRequests(i) = MPI_REQUEST_NULL end if @@ -638,12 +694,16 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) case (1) call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), & group % fields(i) % r1arr, timeLevel=group % fields(i) % timeLevel) - + ! ! Pack send buffer for all neighbors for current field ! + call mpas_timer_start('packing_halo_exch') + !$acc parallel default(present) attach(group % fields(i) % r1arr) if(useGPUAwareMPI) + !$acc loop gang collapse(2) do iEndp = 1, nSendEndpts do iHalo = 1, nHalos + !$acc loop vector do j = 1, maxNSendList if (j <= nSendLists(iHalo,iEndp)) then group % sendBuf(packOffsets(iEndp) + sendListDst(j,iHalo,iEndp)) = & @@ -652,20 +712,31 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) end do end do end do - + !$acc end parallel + call mpas_timer_stop('packing_halo_exch') + ! ! Packing code for 2-d real-valued fields ! case (2) call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), & group % fields(i) % r2arr, timeLevel=group % fields(i) % timeLevel) - + ! ! Pack send buffer for all neighbors for current field - ! + ! + ! Use data regions for specificity and so the reference or attachment counters are easier to make sense of + ! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7.2 describes 'attach action' + ! !$acc data present(group) present(group % fields(i)) present(group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:)) + + + call mpas_timer_start('packing_halo_exch') + !$acc parallel default(present) attach(group % fields(i) % r2arr) if(useGPUAwareMPI) + !$acc loop gang collapse(3) do iEndp = 1, nSendEndpts do iHalo = 1, nHalos do j = 1, maxNSendList + !$acc loop vector do i1 = 1, dim1 if (j <= nSendLists(iHalo,iEndp)) then group % sendBuf(packOffsets(iEndp) + dim1 * (sendListDst(j,iHalo,iEndp) - 1) + i1) = & @@ -675,21 +746,27 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) end do end do end do - + !$acc end parallel + call mpas_timer_stop('packing_halo_exch') + ! ! Packing code for 3-d real-valued fields ! case (3) call mpas_pool_get_array(domain % blocklist % allFields, trim(group % fields(i) % fieldName), & - group % fields(i) % r3arr, group % fields(i) % timeLevel) - + group % fields(i) % r3arr, group % fields(i) % timeLevel) + ! ! Pack send buffer for all neighbors for current field ! + call mpas_timer_start('packing_halo_exch') + !$acc parallel default(present) attach(group % fields(i) % r3arr) if(useGPUAwareMPI) + !$acc loop gang collapse(4) do iEndp = 1, nSendEndpts do iHalo = 1, nHalos do j = 1, maxNSendList do i2 = 1, dim2 + !$acc loop vector do i1 = 1, dim1 if (j <= nSendLists(iHalo,iEndp)) then group % sendBuf(packOffsets(iEndp) + dim1*dim2*(sendListDst(j,iHalo,iEndp) - 1) & @@ -701,6 +778,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) end do end do end do + !$acc end parallel + call mpas_timer_stop('packing_halo_exch') end select end if @@ -714,9 +793,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) bufstart = group % groupSendOffsets(i) bufend = group % groupSendOffsets(i) + group % groupSendCounts(i) - 1 !TO DO: how do we determine appropriate type here? + !$acc host_data use_device(group % sendBuf) if(useGPUAwareMPI) call MPI_Isend(group % sendBuf(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, & group % groupSendNeighbors(i), rank, comm, & group % sendRequests(i), mpi_ierr) + !$acc end host_data else group % sendRequests(i) = MPI_REQUEST_NULL end if @@ -771,7 +852,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) ! ! Unpack recv buffer from all neighbors for current field ! + call mpas_timer_start('unpacking_halo_exch') + !$acc parallel default(present) attach(group % fields(i) % r1arr) if(useGPUAwareMPI) + !$acc loop gang do iHalo = 1, nHalos + !$acc loop vector do j = 1, maxNRecvList if (j <= nRecvLists(iHalo,iEndp)) then group % fields(i) % r1arr(recvListDst(j,iHalo,iEndp)) = & @@ -779,6 +864,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) end if end do end do + !$acc end parallel + call mpas_timer_stop('unpacking_halo_exch') ! ! Unpacking code for 2-d real-valued fields @@ -787,8 +874,13 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) ! ! Unpack recv buffer from all neighbors for current field ! + call mpas_timer_start('unpacking_halo_exch') + !$acc parallel default(present) attach(group % fields(i) % r2arr) if(useGPUAwareMPI) + !$acc loop gang do iHalo = 1, nHalos + !$acc loop worker do j = 1, maxNRecvList + !$acc loop vector do i1 = 1, dim1 if (j <= nRecvLists(iHalo,iEndp)) then group % fields(i) % r2arr(i1, recvListDst(j,iHalo,iEndp)) = & @@ -797,6 +889,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) end do end do end do + !$acc end parallel + call mpas_timer_stop('unpacking_halo_exch') ! ! Unpacking code for 3-d real-valued fields @@ -805,8 +899,12 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) ! ! Unpack recv buffer from all neighbors for current field ! + call mpas_timer_start('unpacking_halo_exch') + !$acc parallel default(present) attach(group % fields(i) % r3arr) if(useGPUAwareMPI) + !$acc loop gang collapse(2) do iHalo = 1, nHalos do j = 1, maxNRecvList + !$acc loop vector collapse(2) do i2 = 1, dim2 do i1 = 1, dim1 if (j <= nRecvLists(iHalo,iEndp)) then @@ -818,12 +916,16 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) end do end do end do + !$acc end parallel + call mpas_timer_stop('unpacking_halo_exch') end select end if end do end do - + ! For the present(group % recvBuf(:), group % sendBuf(:)) + !$acc end data + ! ! Nullify array pointers - not necessary for correctness, but helpful when debugging ! to not leave pointers to what might later be incorrect targets @@ -843,6 +945,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr) ! call MPI_Waitall(group % nGroupSendNeighbors, group % sendRequests, MPI_STATUSES_IGNORE, mpi_ierr) + call mpas_timer_stop('full_halo_exch') + end subroutine mpas_halo_exch_group_full_halo_exch diff --git a/src/operators/mpas_vector_reconstruction.F b/src/operators/mpas_vector_reconstruction.F index 605da9cd6d..88d87474ab 100644 --- a/src/operators/mpas_vector_reconstruction.F +++ b/src/operators/mpas_vector_reconstruction.F @@ -202,7 +202,8 @@ end subroutine mpas_init_reconstruct!}}} !> Input: grid meta data and vector component data residing at cell edges !> Output: reconstructed vector field (measured in X,Y,Z) located at cell centers !----------------------------------------------------------------------- - subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, uReconstructZonal, uReconstructMeridional, includeHalos)!{{{ + subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, & + uReconstructZonal, uReconstructMeridional, includeHalos, lACC)!{{{ implicit none @@ -214,9 +215,11 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZonal !< Output: Zonal Component of velocity reconstructed to cell centers real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructMeridional !< Output: Meridional Component of velocity reconstructed to cell centers logical, optional, intent(in) :: includeHalos !< Input: Optional logical that allows reconstruction over halo regions + logical, optional, intent(in) :: lACC !< Input: Optional logical that controls execution on the GPU with OpenACC ! temporary arrays needed in the compute procedure logical :: includeHalosLocal + logical :: lACCLocal integer, pointer :: nCells_ptr, nVertLevels_ptr integer :: nCells, nVertLevels integer, dimension(:,:), pointer :: edgesOnCell @@ -236,6 +239,12 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon includeHalosLocal = .false. end if + if ( present(lACC) ) then + lACCLocal = lACC + else + lACCLocal = .false. + end if + ! stored arrays used during compute procedure call mpas_pool_get_array(meshPool, 'coeffs_reconstruct', coeffs_reconstruct) @@ -258,19 +267,9 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon call mpas_pool_get_config(meshPool, 'on_a_sphere', on_a_sphere) - MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]') - ! Only use sections needed, nCells may be all cells or only non-halo cells - !$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & - !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) - !$acc enter data copyin(u(:,:)) - !$acc enter data create(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), & - !$acc uReconstructZ(:,1:nCells),uReconstructZonal(:,1:nCells), & - !$acc uReconstructMeridional(:,1:nCells)) - MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]') - ! loop over cell centers !$omp do schedule(runtime) - !$acc parallel default(present) + !$acc parallel default(present) if(lACCLocal) !$acc loop gang do iCell = 1, nCells ! initialize the reconstructed vectors @@ -305,7 +304,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon if (on_a_sphere) then !$omp do schedule(runtime) - !$acc parallel default(present) + !$acc parallel default(present) if(lACCLocal) !$acc loop gang do iCell = 1, nCells clat = cos(latCell(iCell)) @@ -325,7 +324,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon !$omp end do else !$omp do schedule(runtime) - !$acc parallel default(present) + !$acc parallel default(present) if(lACCLocal) !$acc loop gang vector collapse(2) do iCell = 1, nCells do k = 1, nVertLevels @@ -337,6 +336,109 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon !$omp end do end if + end subroutine mpas_reconstruct_2d!}}} + + + subroutine mpas_reconstruct_2d_h2d(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, uReconstructZonal, uReconstructMeridional, includeHalos)!{{{ + + implicit none + + type (mpas_pool_type), intent(in) :: meshPool !< Input: Mesh information + real (kind=RKIND), dimension(:,:), intent(in) :: u !< Input: Velocity field on edges + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructX !< Output: X Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructY !< Output: Y Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZ !< Output: Z Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZonal !< Output: Zonal Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructMeridional !< Output: Meridional Component of velocity reconstructed to cell centers + logical, optional, intent(in) :: includeHalos !< Input: Optional logical that allows reconstruction over halo regions + + logical :: includeHalosLocal + integer, dimension(:,:), pointer :: edgesOnCell + integer, dimension(:), pointer :: nEdgesOnCell + integer :: nCells + integer, pointer :: nCells_ptr + real(kind=RKIND), dimension(:), pointer :: latCell, lonCell + real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct + + if ( present(includeHalos) ) then + includeHalosLocal = includeHalos + else + includeHalosLocal = .false. + end if + + ! stored arrays used during compute procedure + call mpas_pool_get_array(meshPool, 'coeffs_reconstruct', coeffs_reconstruct) + + ! temporary variables + call mpas_pool_get_array(meshPool, 'nEdgesOnCell', nEdgesOnCell) + call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell) + call mpas_pool_get_array(meshPool, 'latCell', latCell) + call mpas_pool_get_array(meshPool, 'lonCell', lonCell) + + if ( includeHalosLocal ) then + call mpas_pool_get_dimension(meshPool, 'nCells', nCells_ptr) + else + call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCells_ptr) + end if + nCells = nCells_ptr + + MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]') + ! Only use sections needed, nCells may be all cells or only non-halo cells + !$acc enter data copyin(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & + !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) + !$acc enter data copyin(u(:,:)) + !$acc enter data create(uReconstructX(:,1:nCells),uReconstructY(:,1:nCells), & + !$acc uReconstructZ(:,1:nCells),uReconstructZonal(:,1:nCells), & + !$acc uReconstructMeridional(:,1:nCells)) + MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]') + + end subroutine mpas_reconstruct_2d_h2d + + + + subroutine mpas_reconstruct_2d_d2h(meshPool, u, uReconstructX, uReconstructY, uReconstructZ, uReconstructZonal, uReconstructMeridional, includeHalos)!{{{ + + implicit none + + type (mpas_pool_type), intent(in) :: meshPool !< Input: Mesh information + real (kind=RKIND), dimension(:,:), intent(in) :: u !< Input: Velocity field on edges + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructX !< Output: X Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructY !< Output: Y Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZ !< Output: Z Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructZonal !< Output: Zonal Component of velocity reconstructed to cell centers + real (kind=RKIND), dimension(:,:), intent(out) :: uReconstructMeridional !< Output: Meridional Component of velocity reconstructed to cell centers + logical, optional, intent(in) :: includeHalos !< Input: Optional logical that allows reconstruction over halo regions + + logical :: includeHalosLocal + integer, dimension(:,:), pointer :: edgesOnCell + integer, dimension(:), pointer :: nEdgesOnCell + integer :: nCells + integer, pointer :: nCells_ptr + real(kind=RKIND), dimension(:), pointer :: latCell, lonCell + real (kind=RKIND), dimension(:,:,:), pointer :: coeffs_reconstruct + + if ( present(includeHalos) ) then + includeHalosLocal = includeHalos + else + includeHalosLocal = .false. + end if + + ! stored arrays used during compute procedure + call mpas_pool_get_array(meshPool, 'coeffs_reconstruct', coeffs_reconstruct) + + ! temporary variables + call mpas_pool_get_array(meshPool, 'nEdgesOnCell', nEdgesOnCell) + call mpas_pool_get_array(meshPool, 'edgesOnCell', edgesOnCell) + call mpas_pool_get_array(meshPool, 'latCell', latCell) + call mpas_pool_get_array(meshPool, 'lonCell', lonCell) + + if ( includeHalosLocal ) then + call mpas_pool_get_dimension(meshPool, 'nCells', nCells_ptr) + else + call mpas_pool_get_dimension(meshPool, 'nCellsSolve', nCells_ptr) + end if + nCells = nCells_ptr + MPAS_ACC_TIMER_START('mpas_reconstruct_2d [ACC_data_xfer]') !$acc exit data delete(coeffs_reconstruct(:,:,1:nCells),nEdgesOnCell(1:nCells), & !$acc edgesOnCell(:,1:nCells),latCell(1:nCells),lonCell(1:nCells)) @@ -346,7 +448,7 @@ subroutine mpas_reconstruct_2d(meshPool, u, uReconstructX, uReconstructY, uRecon !$acc uReconstructMeridional(:,1:nCells)) MPAS_ACC_TIMER_STOP('mpas_reconstruct_2d [ACC_data_xfer]') - end subroutine mpas_reconstruct_2d!}}} + end subroutine mpas_reconstruct_2d_d2h !***********************************************************************