Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 56 additions & 5 deletions source/module_hamilt_pw/hamilt_pwdft/forces.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "module_hamilt_general/module_surchem/surchem.h"
#include "module_hamilt_general/module_vdw/vdw.h"
#include "kernels/force_op.h"

#include <type_traits>
#ifdef _OPENMP
#include <omp.h>
#endif
Expand Down Expand Up @@ -579,7 +579,7 @@ void Forces<FPTYPE, Device>::cal_force_loc(const UnitCell& ucell,
syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, forcelc_d, forcelc.c, this->nat * 3);
syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, vloc_d, vloc.c, vloc.nr * vloc.nc);

hamilt::cal_force_loc_op<FPTYPE, Device>()(
/* hamilt::cal_force_loc_op<FPTYPE, Device>()(
this->nat,
rho_basis->npw,
ucell.tpiba * ucell.omega,
Expand All @@ -590,7 +590,34 @@ void Forces<FPTYPE, Device>::cal_force_loc(const UnitCell& ucell,
aux_d,
vloc_d,
vloc.nc,
forcelc_d);
forcelc_d);*/
if constexpr (std::is_same<Device, base_device::DEVICE_GPU>::value) {
hamilt::cal_force_loc_sincos_op<FPTYPE, Device>()(
this->ctx,
this->nat,
rho_basis->npw,
ucell.ntype,
gcar_d,
tau_d,
vloc_d,
aux_d,
static_cast<FPTYPE>(ucell.tpiba * ucell.omega),
forcelc_d);
} else {
hamilt::cal_force_loc_op<FPTYPE, Device>()(
this->nat,
rho_basis->npw,
ucell.tpiba * ucell.omega,
iat2it_d,
ig2gg_d,
gcar_d,
tau_d,
aux_d,
vloc_d,
vloc.nc,
forcelc_d);
}

syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, forcelc.c, forcelc_d, this->nat * 3);

delmem_int_op()(this->ctx,iat2it_d);
Expand Down Expand Up @@ -788,7 +815,7 @@ void Forces<FPTYPE, Device>::cal_force_ew(const UnitCell& ucell,
syncmem_complex_h2d_op()(this->ctx, this->cpu_ctx, aux_d, aux.data(), rho_basis->npw);
syncmem_var_h2d_op()(this->ctx, this->cpu_ctx, forceion_d, forceion.c, this->nat * 3);

hamilt::cal_force_ew_op<FPTYPE, Device>()(
/* hamilt::cal_force_ew_op<FPTYPE, Device>()(
this->nat,
rho_basis->npw,
rho_basis->ig_gge0,
Expand All @@ -798,7 +825,31 @@ void Forces<FPTYPE, Device>::cal_force_ew(const UnitCell& ucell,
it_fact_d,
aux_d,
forceion_d);

*/
if constexpr (std::is_same<Device, base_device::DEVICE_GPU>::value) {
hamilt::cal_force_ew_sincos_op<FPTYPE, Device>()(
this->ctx,
this->nat,
rho_basis->npw,
rho_basis->ig_gge0,
gcar_d,
tau_d,
it_fact_d,
aux_d,
forceion_d);
} else {
hamilt::cal_force_ew_op<FPTYPE, Device>()(
this->nat,
rho_basis->npw,
rho_basis->ig_gge0,
iat2it_d,
gcar_d,
tau_d,
it_fact_d,
aux_d,
forceion_d);
}

syncmem_var_d2h_op()(this->cpu_ctx, this->ctx, forceion.c, forceion_d, this->nat * 3);
delmem_int_op()(this->ctx,iat2it_d);
delmem_var_op()(this->ctx,gcar_d);
Expand Down
31 changes: 30 additions & 1 deletion source/module_hamilt_pw/hamilt_pwdft/kernels/force_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ struct cal_force_ew_op{
FPTYPE* forceion
) {};
};

template <typename FPTYPE, typename Device> struct cal_force_loc_sincos_op;
template <typename FPTYPE, typename Device> struct cal_force_ew_sincos_op;
#if __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM
template <typename FPTYPE>
struct cal_vkb1_nl_op<FPTYPE, base_device::DEVICE_GPU>
Expand Down Expand Up @@ -335,6 +338,32 @@ struct cal_force_ew_op<FPTYPE, base_device::DEVICE_GPU>{
FPTYPE* forceion
);
};
template <typename FPTYPE>
struct cal_force_loc_sincos_op<FPTYPE, base_device::DEVICE_GPU> {
void operator()(const base_device::DEVICE_GPU* ctx,
const int& nat,
const int& npw,
const int& ntype,
const FPTYPE* gcar,
const FPTYPE* tau,
const FPTYPE* vloc_per_type,
const std::complex<FPTYPE>* aux,
const FPTYPE& scale_factor,
FPTYPE* force);
};

template <typename FPTYPE>
struct cal_force_ew_sincos_op<FPTYPE, base_device::DEVICE_GPU> {
void operator()(const base_device::DEVICE_GPU* ctx,
const int& nat,
const int& npw,
const int& ig_gge0,
const FPTYPE* gcar,
const FPTYPE* tau,
const FPTYPE* it_facts,
const std::complex<FPTYPE>* aux,
FPTYPE* force);
};
#endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM
} // namespace hamilt
#endif // W_ABACUS_DEVELOP_ABACUS_DEVELOP_SOURCE_source_pw_HAMILT_PWDFT_KERNELS_FORCE_OP_H
#endif // W_ABACUS_DEVELOP_ABACUS_DEVELOP_SOURCE_source_pw_HAMILT_PWDFT_KERNELS_FORCE_OP_H
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

namespace hamilt {

__device__ __forceinline__ void sincos_(float x, float* s, float* c) { sincosf(x, s, c); }
__device__ __forceinline__ void sincos_(double x, double* s, double* c) { sincos(x, s, c); }
template <typename FPTYPE>
__global__ void cal_vkb1_nl(
const int npwx,
Expand Down Expand Up @@ -658,7 +660,7 @@ __global__ void cal_force_loc_sincos_kernel(

// Use HIP intrinsic for sincos
FPTYPE sinp, cosp;
sincos(phase, &sinp, &cosp);
sincos_(phase, &sinp, &cosp);

// Calculate force factor
const FPTYPE vloc_factor = vloc_per_type[iat * npw + ig];
Expand Down Expand Up @@ -718,7 +720,7 @@ __global__ void cal_force_ew_sincos_kernel(

// Use HIP intrinsic for sincos
FPTYPE sinp, cosp;
sincos(phase, &sinp, &cosp);
sincos_(phase, &sinp, &cosp);

// Calculate Ewald sum contribution (fixed sign error)
const FPTYPE factor = it_fact * (-cosp * aux[ig].imag() + sinp * aux[ig].real());
Expand Down
12 changes: 6 additions & 6 deletions source/module_hsolver/kernels/rocm/dngvd_op.hip.cu
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ void dngvd_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const ba
hipsolverErrcheck(hipsolverDnChegvd_bufferSize(
hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
nstart,
reinterpret_cast<const float2 *>(_vcc), ldh,
reinterpret_cast<const float2 *>(_scc), ldh,
const_cast<float2*>(reinterpret_cast<const float2 *>(_vcc)), ldh,
const_cast<float2*>(reinterpret_cast<const float2 *>(_scc)), ldh,
_eigenvalue,
&lwork));

Expand All @@ -140,7 +140,7 @@ void dngvd_op<std::complex<float>, base_device::DEVICE_GPU>::operator()(const ba
hipsolverErrcheck(hipsolverDnChegvd(
hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
nstart,
reinterpret_cast<float2 *>(_vcc), ldh,
const_cast<float2*>(reinterpret_cast<float2 *>(_vcc)), ldh,
const_cast<float2 *>(reinterpret_cast<const float2 *>(_scc)), ldh,
_eigenvalue,
work, lwork, devInfo));
Expand Down Expand Up @@ -206,8 +206,8 @@ void dngvd_op<std::complex<double>, base_device::DEVICE_GPU>::operator()(const b
hipsolverErrcheck(hipsolverDnZhegvd_bufferSize(
hipsolver_H, HIPSOLVER_EIG_TYPE_1, HIPSOLVER_EIG_MODE_VECTOR, uplo,
nstart,
reinterpret_cast<const double2 *>(_vcc), ldh,
reinterpret_cast<const double2 *>(_scc), ldh,
const_cast<double2*>(reinterpret_cast<const double2 *>(_vcc)), ldh,
const_cast<double2*>(reinterpret_cast<const double2 *>(_scc)), ldh,
_eigenvalue,
&lwork));

Expand Down Expand Up @@ -365,4 +365,4 @@ void dngvx_op<double, base_device::DEVICE_GPU>::operator()(const base_device::DE
}
#endif // __LCAO

} // namespace hsolver
} // namespace hsolver
Loading