Skip to content

Commit a85b0ae

Browse files
committed
final_fix_poisson_channel
1 parent f3b66dc commit a85b0ae

File tree

1 file changed

+49
-67
lines changed

1 file changed

+49
-67
lines changed

channel/main.f90

Lines changed: 49 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ program main
2626
integer :: batchsize
2727
integer :: status
2828
integer :: i,j,k,il,jl,kl,ig,jg,kg,t,stage
29-
integer :: im,ip,jm,jp,km,kp,last,idx
29+
integer :: im,ip,jm,jp,km,kp,last,idx,kgm
3030
! TDMA variables
3131
double precision, allocatable :: a(:), b(:), c(:)
3232
double complex, allocatable :: d(:), sol(:)
@@ -74,8 +74,8 @@ program main
7474
call readinput
7575

7676
! domain decomposition (pencils in y and z)
77-
pr = 0
78-
pc = 0
77+
pr = 1
78+
pc = 2
7979
halo_ext=1
8080

8181
! CuDECOMP initialization and settings
@@ -148,6 +148,7 @@ program main
148148
! End cuDecomp initialization
149149

150150

151+
151152
! CUFFT initialization -- Create Plans (along x anf y only, z not required)
152153
! Forward 1D FFT in X: D2Z
153154
batchSize = piX_d2z%shape(2)*piX_d2z%shape(3) !<- number of FFT (from x-pencil dimension)
@@ -239,13 +240,14 @@ program main
239240
do j = 1+halo_ext, piX%shape(2)-halo_ext
240241
jg = piX%lo(2) + j - 1 - halo_ext
241242
do i = 1, piX%shape(1)
242-
amp=1.d0
243-
mx=2.d0
244-
my=2.d0
245-
mz=4.d0
243+
amp=3.d0
244+
mx= 3.03d0
245+
my= 2.02d0
246+
mz= 4.00d0
246247
!3D divergence free flow with fluctuations that satisfies the boundary conditions
247-
u(i,j,k) = 20.d0*(1.d0 - ((2*z(kg) - lz)/lz)**2)
248+
u(i,j,k) = 20.d0*(1.d0 - ((2*z(kg) - lz)/lz)**2) !
248249
u(i,j,k) = u(i,j,k) - amp*cos(twopi*mx*x(i)/lx)*sin(twopi*my*y(jg)/ly)*2.d0*twopi/lz*sin(twopi*z(kg)/lz)*cos(twopi*z(kg)/lz)
250+
u(i,j,k) = u(i,j,k) + amp*sin(twopi*mx*x(i)/lx)*(-twopi*my/ly)*sin(2.d0*twopi*my*y(jg)/ly)*sin(twopi*z(kg)/lz)*sin(twopi*z(kg)/lz)
249251
v(i,j,k) = -amp*cos(twopi*my*y(jg)/ly)*(twopi*mx/lx)*cos(twopi*mx*x(i)/lx)*sin(twopi*z(kg)/lz)*sin(twopi*z(kg)/lz)
250252
w(i,j,k) = amp*cos(twopi*mx*x(i)/lx)*(twopi*mx/lx)*sin(twopi*my*y(jg)/ly)*sin(twopi*z(kg)/lz)*sin(twopi*z(kg)/lz)
251253
enddo
@@ -719,7 +721,7 @@ program main
719721
enddo
720722
#endif
721723

722-
! 5.3 update halos (y and z directions), required to then compute the RHS of Poisson equation because of staggered grid
724+
! 8.3 update halos (y direction), required to then compute the RHS of Poisson equation because of staggered grid
723725
!$acc host_data use_device(u,v,w)
724726
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, u, work_halo_d, CUDECOMP_DOUBLE, piX%halo_extents, halo_periods, 2))
725727
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, u, work_halo_d, CUDECOMP_DOUBLE, piX%halo_extents, halo_periods, 3))
@@ -730,24 +732,27 @@ program main
730732
!$acc end host_data
731733

732734
! impose velocity boundary conditions, w is at the wall, u and v interpolate so that the mean value is zero, no-slip assumted, i.e. u=0, can be extented to any value
733-
! even when stretched the spacing cell face is centered
734-
!$acc parallel loop collapse(3)
735+
!$acc parallel loop collapse(3)
735736
do k=1, piX%shape(3)
736-
do j=1, piX%shape(2)
737+
do j=1+halo_ext, piX%shape(2)-halo_ext
737738
do i=1,nx
738-
kg = piX%lo(3) + k - 1 -halo_ext
739+
kg = piX%lo(3) + k - 1 -halo_ext
739740
! bottom wall
740-
if (kg .eq. 1) u(i,j,k-1)= -u(i,j,k) ! mean value between kg and kg-1 (wall) equal to zero
741-
if (kg .eq. 1) v(i,j,k-1)= -v(i,j,k) ! mean value between kg and kg-1 (wall) equal to zero
742-
if (kg .eq. 1) w(i,j,k)=0.d0 ! w point is at the wall
741+
if (kg .eq. 0) u(i,j,k)= -u(i,j,k+1) ! mean value between kg and kg-1 (wall) equal to zero
742+
if (kg .eq. 0) v(i,j,k)= -v(i,j,k+1) ! mean value between kg and kg-1 (wall) equal to zero
743+
if (kg .eq. 1) w(i,j,k)= 0.d0 ! w point is at the wall
743744
! top wall
744-
if (kg .eq. nz) u(i,j,k+1)= -u(i,j,k) ! mean value between kg and kg+1 (wall) equal to zero
745-
if (kg .eq. nz) v(i,j,k+1)= -v(i,j,k) ! mean value between kg and kg+1 (wall) equal to zero
746-
if (kg .eq. nz+1) w(i,j,k)=0.d0 ! w point (nz+1) is at the wall
745+
if (kg .eq. nz+1) u(i,j,k)= -u(i,j,k-1) ! mean value between kg and kg+1 (wall) equal to zero
746+
if (kg .eq. nz+1) v(i,j,k)= -v(i,j,k-1) ! mean value between kg and kg+1 (wall) equal to zero
747+
if (kg .eq. nz+1) w(i,j,k)= 0.d0 ! w point (nz+1) is at the wall
747748
enddo
748749
enddo
749750
enddo
751+
750752
enddo
753+
call nvtxEndRange
754+
755+
751756
!########################################################################################################################################
752757
! END STEP 6: USTAR COMPUTATION
753758
!########################################################################################################################################
@@ -780,6 +785,7 @@ program main
780785
!$acc end kernels
781786
call nvtxEndRange
782787

788+
783789
call nvtxStartRange("FFT forward w/ transpositions")
784790
!$acc host_data use_device(rhsp)
785791
status = cufftExecD2Z(planXf, rhsp, psi_d)
@@ -810,30 +816,29 @@ program main
810816
!$acc parallel loop collapse(2) gang private(a,b,c,d,factor)
811817
do jl = 1, npy
812818
do il = 1, npx
813-
! compute index global wavenumber ig and jg
819+
! compute global index ig and jg
814820
jg = yoff + jl
815821
ig = xoff + il
816822
! Set up tridiagonal system for each i and j
817823
! Fill diagonals and rhs for each
818-
! 0 and ny+1 are the ghost nodes
824+
! 0 and nz+1 are the ghost nodes
819825
do k = 1, nz
820-
a(k) = 2.0d0*(dzi(k)**2*dzi(k+1))/(dzi(k)+dzi(k+1))
821-
c(k) = 2.0d0*(dzi(k)*dzi(k+1)**2)/(dzi(k)+dzi(k+1))
822-
b(k) = -a(k) - c(k) + (2.d0*(cos(kx_d(ig)*dx)-1.d0)*dxi*dxi + 2.d0*(cos(ky_d(jg)*dy)-1.d0)*dyi*dyi)
826+
a(k) = 2.0d0*(dzi(k)**2.d0*dzi(k+1))/(dzi(k)+dzi(k+1))
827+
c(k) = 2.0d0*(dzi(k)*dzi(k+1)**2.d0)/(dzi(k)+dzi(k+1))
828+
b(k) = -a(k) - c(k) + 2.d0*(cos(kx_d(ig)*dx)-1.d0)*dxi*dxi + 2.d0*(cos(ky_d(jg)*dy)-1.d0)*dyi*dyi !opt: precompute cosines?
823829
d(k) = psi3d(k,il,jl)
824830
enddo
825831
! Neumann BC at bottom
826832
a(0) = 0.d0
827-
b(0) = -1.d0!*dzi(1)*dzi(1)
828-
c(0) = 1.d0!*dzi(1)*dzi(1)
833+
b(0) = -1.d0
834+
c(0) = 1.d0
829835
d(0) = 0.d0
830836
! Neumann BC at top
831-
a(nz+1) = 1.d0!*dzi(nz+1)*dzi(nz+1)
832-
b(nz+1) = -1.d0!*dzi(nz+1)*dzi(nz+1)
837+
a(nz+1) = 1.d0
838+
b(nz+1) = -1.d0
833839
c(nz+1) = 0.d0
834840
d(nz+1) = 0.d0
835841
! Enforce pressure at one point? one interior point, avodig messing up with BC
836-
! need brackets?
837842
if (ig == 1 .and. jg == 1) then
838843
a(nz+1) = 0.d0
839844
b(nz+1) = 1.d0
@@ -847,10 +852,9 @@ program main
847852
d(k) = d(k) - factor*d(k-1)
848853
end do
849854
! Back substitution
850-
psi3d(nz+1,il,jl) = d(nz+1)/b(nz+1)
851-
! check on pivot like flutas?
855+
psi3d(nz,il,jl) = (d(nz) - c(nz)*d(nz+1)/b(nz+1))/b(nz)
852856
!$acc loop seq
853-
do k = nz, 1, -1
857+
do k = nz-1, 1, -1
854858
psi3d(k,il,jl) = (d(k) - c(k)*psi3d(k+1,il,jl))/b(k)
855859
end do
856860
end do
@@ -879,18 +883,14 @@ program main
879883
end do
880884
end do
881885
end do
882-
883-
call nvtxEndRange
884886
! update halo nodes with pressure
885-
! Update X-pencil halos
886887
!$acc host_data use_device(p)
887888
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, p, work_halo_d, CUDECOMP_DOUBLE, piX%halo_extents, halo_periods, 2))
888889
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, p, work_halo_d, CUDECOMP_DOUBLE, piX%halo_extents, halo_periods, 3))
889890
!$acc end host_data
890891
!########################################################################################################################################
891892
! END STEP 7: POISSON SOLVER FOR PRESSURE
892893
!########################################################################################################################################
893-
call nvtxEndRange
894894

895895

896896
call nvtxStartRange("Correction")
@@ -900,7 +900,10 @@ program main
900900
! 8.1 Correct velocity
901901
! 8.2 Call halo update
902902
! Correct velocity, pressure has also the halo
903-
!$acc kernels
903+
umax=0.d0
904+
vmax=0.d0
905+
wmax=0.d0
906+
!$acc parallel loop collapse(3) reduction(max:umax,vmax,wmax)
904907
do k=1+halo_ext, piX%shape(3)-halo_ext
905908
do j=1+halo_ext, piX%shape(2)-halo_ext
906909
do i = 1, piX%shape(1) ! equal to nx (no halo on x)
@@ -909,13 +912,20 @@ program main
909912
km=k-1
910913
kg=piX%lo(3) + k - 1 - halo_ext
911914
if (im < 1) im=nx
915+
if (kg .eq. 1) then
916+
u(i,j,k)=u(i,j,k) - dt/rho*(p(i,j,k)-p(im,j,k))*dxi
917+
v(i,j,k)=v(i,j,k) - dt/rho*(p(i,j,k)-p(i,jm,k))*dyi
918+
else
912919
u(i,j,k)=u(i,j,k) - dt/rho*(p(i,j,k)-p(im,j,k))*dxi
913920
v(i,j,k)=v(i,j,k) - dt/rho*(p(i,j,k)-p(i,jm,k))*dyi
914921
w(i,j,k)=w(i,j,k) - dt/rho*(p(i,j,k)-p(i,j,km))*dzi(kg)
922+
endif
923+
!umax=max(umax,u(i,j,k))
924+
!vmax=max(vmax,v(i,j,k))
925+
!wmax=max(wmax,w(i,j,k))
915926
enddo
916927
enddo
917928
enddo
918-
!$acc end kernels
919929

920930
! 8.3 update halos (y direction), required to then compute the RHS of Poisson equation because of staggered grid
921931
!$acc host_data use_device(u,v,w)
@@ -927,34 +937,7 @@ program main
927937
CHECK_CUDECOMP_EXIT(cudecompUpdateHalosX(handle, grid_desc, w, work_halo_d, CUDECOMP_DOUBLE, piX%halo_extents, halo_periods, 3))
928938
!$acc end host_data
929939

930-
! impose velocity boundary conditions, can be optimized, no real gain
931-
! w is at the wall, u and v interpolate so that the mean value is zero
932-
! no-slip assumted, i.e. u=0, can be extented to any value
933-
umax=0.d0
934-
vmax=0.d0
935-
wmax=0.d0
936-
!$acc parallel loop collapse(3) reduction(max:umax,vmax,wmax)
937-
do k=1, piX%shape(3)
938-
do j=1, piX%shape(2)
939-
do i=1,nx
940-
kg = piX%lo(3) + k - 1 - halo_ext
941-
! bottom wall
942-
if (kg .eq. 1) u(i,j,k-1) = -u(i,j,k) ! mean value between kg and kg-1 (wall) equal to zero
943-
if (kg .eq. 1) v(i,j,k-1) = -v(i,j,k) ! mean value between kg and kg-1 (wall) equal to zero
944-
if (kg .eq. 1) w(i,j,k) = 0.d0 ! w point is at the wall
945-
! top wall
946-
if (kg .eq. nz) u(i,j,k+1) = -u(i,j,k) ! mean value between kg and kg+1 (wall) equal to zero
947-
if (kg .eq. nz) v(i,j,k+1) = -v(i,j,k) ! mean value between kg and kg+1 (wall) equal to zero
948-
if (kg .eq. nz+1) w(i,j,k) = 0.d0 ! w point (nz+1) is at the wall
949-
umax=max(umax,u(i,j,k))
950-
vmax=max(vmax,v(i,j,k))
951-
wmax=max(wmax,w(i,j,k))
952-
enddo
953-
enddo
954-
enddo
955-
956-
957-
maxdiv=0.0d0
940+
maxdiv=0.d0
958941
!$acc parallel loop collapse(3) reduction(max:maxdiv)
959942
do k=1+halo_ext, piX%shape(3)-halo_ext
960943
do j=1+halo_ext, piX%shape(2)-halo_ext
@@ -971,7 +954,6 @@ program main
971954
enddo
972955
write(*,*) "Max divergence after correction ", maxdiv
973956

974-
975957
call MPI_Allreduce(umax,gumax,1,MPI_DOUBLE_PRECISION,MPI_MAX,MPI_COMM_WORLD, ierr)
976958
call MPI_Allreduce(vmax,gvmax,1,MPI_DOUBLE_PRECISION,MPI_MAX,MPI_COMM_WORLD, ierr)
977959
call MPI_Allreduce(wmax,gwmax,1,MPI_DOUBLE_PRECISION,MPI_MAX,MPI_COMM_WORLD, ierr)

0 commit comments

Comments
 (0)