Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/bse/K_driver_init.F
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ subroutine K_driver_init(what,iq,Ken,Xk)
use drivers, ONLY:l_rt_carriers_in_use
use RT_control, ONLY:NEQ_Kernel,EQ_Transitions,EQ_NoOcc,NEQ_Residuals,RT_BSE_Occ_Mode
#endif
#if defined _CUDA
#if defined _GPU
use TDDFT, ONLY:FXC_mode
#endif
!
Expand Down Expand Up @@ -173,7 +173,7 @@ subroutine K_driver_init(what,iq,Ken,Xk)
call parser('FxcLibxc',l_Fxc_Libxc)
l_Fxc_from_Vxc=(n_spin==1).and..not.l_Fxc_Libxc
endif
#if defined _CUDA
#if defined _GPU
if ( BS_K_is_alda.and. index(FXC_mode,"G-")>0 ) then
call warning(" Tddft with G-integrals is not GPU ported. Fallback to R-integrals")
FXC_mode="R-def"
Expand Down
8 changes: 4 additions & 4 deletions src/bse/K_exchange_collisions.F
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,15 @@ subroutine K_exchange_collisions(iq,Xk,i_T_grp,NG,l_bs_exch_wf_in_loop)
!
l_load_WFs= l_bs_exch_wf_in_loop .and. (NK(1)/=min(i_k,i_p).or.NK(2)/=max(i_k,i_p))
if (l_load_WFs) then
#if defined(__NOTNOW) && ! defined(_CUDA)
!$omp critical
#if defined(__NOTNOW)
!DEV_OMP critical
#endif
if (NK(2)/=-1) call WF_free(WF,keep_fft=.true.,keep_states_to_load=.true.)
NK=(/min(i_k,i_p),max(i_k,i_p)/)
call WF_load(WF,NG(1),NG(2),BS_bands,NK,k_extrema_only=.true.,quiet=.true.,&
& space='R',title="Kernel exch",keep_states_to_load=.true.)
#if defined(__NOTNOW) && ! defined(_CUDA)
!$omp end critical
#if defined(__NOTNOW)
!DEV_OMP end critical
#endif
endif
!
Expand Down
10 changes: 6 additions & 4 deletions src/bse/K_kernel.F
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,6 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
!
call OPENMP_update(master_thread)
!
!
! Allocate tddft_wf variables neeeded within the OMP loop
if (l_tddft_rsum.and.iHxc==2) then
YAMBO_ALLOC(tddft_wf%WF_symm1,(fft_size,n_spinor))
Expand Down Expand Up @@ -709,11 +708,14 @@ subroutine K_kernel(iq,Ken,Xk,q,X,Xw,W_bss)
!
if (iHxc==3) then
!
if (iq_W_bz/=iq_W_bz_mq.or.iq_W/=iq_W_mq.or.iq_W_s/=iq_W_s_mq) call error("Wrong transferred momentum")
if (iq_W_bz/=iq_W_bz_mq.or.iq_W/=iq_W_mq.or.iq_W_s/=iq_W_s_mq) &
& call error("Wrong transferred momentum")
!
!if ( G_m_G(ig_W,ig_W_mq) /= G_m_G(ig_kmq,ig_pmq) ) call error("Wrong gvector shifts")
!if ( G_m_G(ig_W,ig_W_mq) /= G_m_G(ig_kmq,ig_pmq) ) &
!& call error("Wrong gvector shifts")
!
if ( (.not.BS_W_is_diagonal) .and. iq_W_s>nsym/(i_time_rev+1) .and. i_space_inv == 0 ) iq_W=q%nibz+iq_W
if ( (.not.BS_W_is_diagonal) .and. iq_W_s>nsym/(i_time_rev+1) &
& .and. i_space_inv == 0 ) iq_W=q%nibz+iq_W
!
endif
!
Expand Down
52 changes: 42 additions & 10 deletions src/coulomb/rim.F
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
!
! Copyright (C) 2006 The Yambo Team
!
! Authors (see AUTHORS file for details): AM
! Authors (see AUTHORS file for details): AM AF NS GS
!
#include<dev_defs.h>
!
subroutine rim(mode,Xw)
!
Expand All @@ -17,10 +19,12 @@ subroutine rim(mode,Xw)
use R_lattice, ONLY:RL_vol,k_grid_uc_vol,k_grid_b,nqbz,&
& nqibz,RIM_epsm1,RIM_is_diagonal,RIM_RL_vol,&
& RIM_n_rand_pts,RIM_ng,RIM_W_ng,RIM_qpg,RIM_W_is_diagonal,&
& RIM_id_epsm1_reference,RIM_anisotropy,RIM_W,&
& cut_is_slab,idir
& RIM_id_epsm1_reference,RIM_anisotropy,RIM_W,RIM_W_d,aux_RIM_W,& ! aux_RIM_W_d,&
& cut_is_slab,idir,f_coeff,f_coeff_d
use timing_m, ONLY:timing
use frequency, ONLY:w_samp
use gpu_m, ONLY:have_gpu
use devxlib, ONLY:devxlib_memcpy_h2d
!
#include<y_memory.h>
!
Expand All @@ -29,19 +33,20 @@ subroutine rim(mode,Xw)
!
! Work Space
!
type(PP_indexes) ::px
integer :: iq
real(SP) :: em1_anis(3),G_radii,G_circ
type(PP_indexes) :: px
integer :: iq
real(SP) :: em1_anis(3),G_radii,G_circ
!
! Random generator
!
real(SP) :: first_neighbour(26,3),fn_dist(26)
integer :: N_out,N_in,N_out_G,rep_factor,inn1,inn2,inn3,ic
real(SP) :: v1(3),v2(3),v1_norm(2)
real(SP), allocatable :: qr(:,:)
real(SP), allocatable DEV_ATTR :: qr_d(:,:)
integer :: iseed(8)
real(DP), external :: dlaran
!
!
if (mode == "x") call timing('RIM',OPR='start')
if (mode == "c") call timing('RIM-W',OPR='start')
!
Expand All @@ -53,6 +58,9 @@ subroutine rim(mode,Xw)
! the bare part is embodied in the exchange.
!
YAMBO_ALLOC(qr,(3,RIM_n_rand_pts))
if (have_gpu) then
YAMBO_ALLOC_GPU(DEV_VAR(qr),(3,RIM_n_rand_pts))
endif
!
em1_anis=RIM_epsm1(:)-1.
!
Expand Down Expand Up @@ -146,6 +154,10 @@ subroutine rim(mode,Xw)
!Only the 2D-BZ is sampled for 2D systems
if (cut_is_slab) qr(idir(1),:) = 0._SP
!
! init on GPU mem
!
if (have_gpu) call devxlib_memcpy_h2d(DEV_VAR(qr),qr)
!
call live_timing()
call msg('r','Points outside the sBZ ',N_out)
!
Expand All @@ -166,7 +178,16 @@ subroutine rim(mode,Xw)
else if (mode == "c") then
YAMBO_ALLOC(RIM_W,(Xw%n_freqs,nqibz,RIM_W_ng,RIM_W_ng))
RIM_W=0._SP
end if
!
! AF: need to cp f_coeff on GPU mem before calling rim_integrate_w
!
YAMBO_ALLOC(aux_RIM_W,(Xw%n_freqs,RIM_W_ng,RIM_W_ng))
#ifdef _GPU
!AMBO_ALLOC_GPU_SOURCE(DEV_VAR(aux_RIM_W),aux_RIM_W)
YAMBO_ALLOC_GPU_SOURCE(DEV_VAR(RIM_W),RIM_W)
YAMBO_ALLOC_GPU_SOURCE(DEV_VAR(f_coeff),f_coeff)
#endif
endif
!
call PARALLEL_index(px,(/nqibz/))
call live_timing('Momenta loop',px%n_of_elements(myid+1))
Expand All @@ -176,8 +197,10 @@ subroutine rim(mode,Xw)
!
if (mode == "x") then
call rim_integrate_v(iq,qr,em1_anis,N_out,N_out_G,G_radii,G_circ)
else
call rim_integrate_w(iq,qr,N_out,em1_anis,Xw)
else if (mode == "c") then
!
call rim_integrate_w(iq,qr,qr_d,N_out,em1_anis,Xw)
!
endif
call live_timing(steps=1)
!
Expand Down Expand Up @@ -210,7 +233,16 @@ subroutine rim(mode,Xw)
! CLEAN
!
call PP_indexes_reset(px)
!
if (mode == "c") then
YAMBO_FREE(aux_RIM_W)
#ifdef _GPU
YAMBO_FREE_GPU(DEV_VAR(f_coeff))
YAMBO_FREE_GPU(DEV_VAR(qr))
!AMBO_FREE_GPU(DEV_VAR(aux_RIM_W))
#endif
YAMBO_FREE(qr)
end if
!
if (mode == "x") call timing('RIM',OPR='stop')
if (mode == "c") call timing('RIM-W',OPR='stop')
Expand Down
Loading