From 0940c98a3b8d2b3fba335c7d0fbf55d44242776a Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 29 Jan 2024 17:15:03 +0100 Subject: [PATCH 01/47] [rocrand] in gg_tt.mad, first no-brainer addition of Rocrand support: just do EXACTLY as for Curand, but replace Curand by Rocrand (add RocrandRandomNumberKernel.cc, modify check_sa.cc, RandomNumberKernels.h, mgOnGpuConfig.h) --- .../P1_gg_ttx/RocrandRandomNumberKernel.cc | 1 + .../SubProcesses/P1_gg_ttx/check_sa.cc | 4 +- .../SubProcesses/RandomNumberKernels.h | 47 +++++- .../SubProcesses/RocrandRandomNumberKernel.cc | 134 ++++++++++++++++++ epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h | 18 ++- 5 files changed, 199 insertions(+), 5 deletions(-) create mode 120000 epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/RocrandRandomNumberKernel.cc create mode 100644 epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/RocrandRandomNumberKernel.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/RocrandRandomNumberKernel.cc new file mode 120000 index 0000000000..ab1b0c4ce1 --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/RocrandRandomNumberKernel.cc @@ -0,0 +1 @@ +../RocrandRandomNumberKernel.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc index aab490dc5b..848ba4680d 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc @@ -1,10 +1,10 @@ // Copyright (C) 2010 The MadGraph5_aMC@NLO development team and contributors. // Created by: J. Alwall (Oct 2010) for the MG5aMC CPP backend. //========================================================================== -// Copyright (C) 2020-2023 CERN and UCLouvain. +// Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Modified by: O. Mattelaer (Nov 2020) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. //========================================================================== #include "mgOnGpuConfig.h" diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h index 21d63beeac..e50fe7c494 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h @@ -1,7 +1,7 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. +// Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2021) for the MG5aMC CUDACPP plugin. -// Further modified by: J. Teig, A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin. #ifndef RANDOMNUMBERKERNELS_H #define RANDOMNUMBERKERNELS_H 1 @@ -146,6 +146,49 @@ namespace mg5amcCpu curandGenerator_st* m_rnGen; }; +#endif + + //-------------------------------------------------------------------------- + +#ifndef MGONGPU_HAS_NO_ROCRAND + // A class encapsulating ROCRAND random number generation on a CPU host or on a GPU device + class RocrandRandomNumberKernel final : public RandomNumberKernelBase + { + public: + + // Constructor from an existing output buffer + RocrandRandomNumberKernel( BufferRndNumMomenta& rnarray, const bool onDevice ); + + // Destructor + ~RocrandRandomNumberKernel(); + + // Seed the random number generator + void seedGenerator( const unsigned int seed ) override final; + + // Generate the random number array + void generateRnarray() override final; + + // Is this a host or device kernel? + bool isOnDevice() const override final { return m_isOnDevice; } + + private: + + // Create the generator (workaround for #429: do this in every seedGenerator call rather than only in the ctor) + void createGenerator(); + + // Destroy the generator (workaround for #429: do this in every seedGenerator call rather than only in the ctor) + void destroyGenerator(); + + private: + + // Is this a host or device kernel? + const bool m_isOnDevice; + + // The rocrand generator + // (NB: rocrand.h defines typedef generator_t as a pointer to forward-defined 'struct rocrandGenerator_st') + rocrandGenerator_st* m_rnGen; + }; + #endif //-------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc new file mode 100644 index 0000000000..a23c877bff --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc @@ -0,0 +1,134 @@ +// Copyright (C) 2020-2024 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Jan 2024) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin. + +#include "GpuRuntime.h" +#include "MemoryBuffers.h" +#include "RandomNumberKernels.h" + +#include + +#ifndef MGONGPU_HAS_NO_ROCRAND /* clang-format off */ +#include +#define checkRocrand( code ){ assertRocrand( code, __FILE__, __LINE__ ); } +inline void assertRocrand( rocrandStatus_t code, const char *file, int line, bool abort = true ) +{ + if ( code != ROCRAND_STATUS_SUCCESS ) + { + printf( "RocrandAssert: %s:%d code=%d\n", file, line, code ); + if ( abort ) assert( code == ROCRAND_STATUS_SUCCESS ); + } +} +#endif /* clang-format on */ + +#ifdef MGONGPUCPP_GPUIMPL +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- +#ifndef MGONGPU_HAS_NO_ROCRAND + RocrandRandomNumberKernel::RocrandRandomNumberKernel( BufferRndNumMomenta& rnarray, const bool onDevice ) + : RandomNumberKernelBase( rnarray ) + , m_isOnDevice( onDevice ) + { + if( m_isOnDevice ) + { +#ifdef MGONGPUCPP_GPUIMPL + if( !m_rnarray.isOnDevice() ) + throw std::runtime_error( "RocrandRandomNumberKernel on device with a host random number array" ); +#else + throw std::runtime_error( "RocrandRandomNumberKernel does not support RocrandDevice on CPU host" ); +#endif + } + else + { + if( m_rnarray.isOnDevice() ) + throw std::runtime_error( "RocrandRandomNumberKernel on host with a device random number array" ); + } + createGenerator(); + } + + //-------------------------------------------------------------------------- + + RocrandRandomNumberKernel::~RocrandRandomNumberKernel() + { + destroyGenerator(); + } + + //-------------------------------------------------------------------------- + + void RocrandRandomNumberKernel::seedGenerator( const unsigned int seed ) + { + if( m_isOnDevice ) + { + destroyGenerator(); // workaround for #429 + createGenerator(); // workaround for #429 + } + //printf( "seedGenerator: seed %d\n", seed ); + checkRocrand( rocrandSetPseudoRandomGeneratorSeed( m_rnGen, seed ) ); + } + + //-------------------------------------------------------------------------- + + void RocrandRandomNumberKernel::createGenerator() + { + // [NB Timings are for GenRnGen host|device (cpp|cuda) generation of 256*32*1 events with nproc=1: rn(0) is host=0.0012s] + const rocrandRngType_t type = ROCRAND_RNG_PSEUDO_MTGP32; // 0.00082s | 0.00064s (FOR FAST TESTS) + //const rocrandRngType_t type = ROCRAND_RNG_PSEUDO_XORWOW; // 0.049s | 0.0016s + //const rocrandRngType_t type = ROCRAND_RNG_PSEUDO_MRG32K3A; // 0.71s | 0.0012s (better but slower, especially in c++) + //const rocrandRngType_t type = ROCRAND_RNG_PSEUDO_MT19937; // 21s | 0.021s + //const rocrandRngType_t type = ROCRAND_RNG_PSEUDO_PHILOX4_32_10; // 0.024s | 0.00026s (used to segfault?) + if( m_isOnDevice ) + { + checkRocrand( rocrandCreateGenerator( &m_rnGen, type ) ); + } + else + { + checkRocrand( rocrandCreateGeneratorHost( &m_rnGen, type ) ); + } + //checkRocrand( rocrandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_LEGACY ) ); // fails with code=104 (see #429) + checkRocrand( rocrandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_BEST ) ); + //checkRocrand( rocrandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_DYNAMIC ) ); // fails with code=104 (see #429) + //checkRocrand( rocrandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_SEEDED ) ); // fails with code=104 (see #429) + } + + //-------------------------------------------------------------------------- + + void RocrandRandomNumberKernel::destroyGenerator() + { + checkRocrand( rocrandDestroyGenerator( m_rnGen ) ); + } + + //-------------------------------------------------------------------------- + + void RocrandRandomNumberKernel::generateRnarray() + { +#if defined MGONGPU_FPTYPE_DOUBLE + checkRocrand( rocrandGenerateUniformDouble( m_rnGen, m_rnarray.data(), m_rnarray.size() ) ); +#elif defined MGONGPU_FPTYPE_FLOAT + checkRocrand( rocrandGenerateUniform( m_rnGen, m_rnarray.data(), m_rnarray.size() ) ); +#endif + /* + printf( "\nRocrandRandomNumberKernel::generateRnarray size = %d\n", (int)m_rnarray.size() ); + fptype* data = m_rnarray.data(); +#ifdef MGONGPUCPP_GPUIMPL + if( m_rnarray.isOnDevice() ) + { + data = new fptype[m_rnarray.size()](); + checkCuda( cudaMemcpy( data, m_rnarray.data(), m_rnarray.bytes(), cudaMemcpyDeviceToHost ) ); + } +#endif + for( int i = 0; i < ( (int)m_rnarray.size() / 4 ); i++ ) + printf( "[%4d] %f %f %f %f\n", i * 4, data[i * 4], data[i * 4 + 2], data[i * 4 + 2], data[i * 4 + 3] ); +#ifdef MGONGPUCPP_GPUIMPL + if( m_rnarray.isOnDevice() ) delete[] data; +#endif + */ + } + + //-------------------------------------------------------------------------- +#endif +} diff --git a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h index 69cee0085b..42ea924047 100644 --- a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h @@ -24,7 +24,7 @@ // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For HIP, by default, do not use curand (common random numbers will be used instead) +// For HIP, by default, do not allow curand to be used (rocrand or common random numbers will be used instead) // For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785) #if defined __HIPCC__ @@ -39,6 +39,22 @@ //#endif #endif +// Choose if rocrand is supported for generating random numbers +// For CUDA, by default, do not allow rocrand to be used (curand or common random numbers will be used instead) +// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_ROCRAND +// (there may exist HIP installations which do not include rocrand?) +#if defined __CUDACC__ +#define MGONGPU_HAS_NO_ROCRAND 1 +#else +//#ifdef __HIPCC__ +//#undef MGONGPU_HAS_NO_ROCRAND // default +////#define MGONGPU_HAS_NO_ROCRAND 1 +//#else +//#undef MGONGPU_HAS_NO_ROCRAND // default +////#define MGONGPU_HAS_NO_ROCRAND 1 +//#endif +#endif + // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) #if not defined MGONGPU_FPTYPE_DOUBLE and not defined MGONGPU_FPTYPE_FLOAT From 6ed1a95acfb4cc54e6f1328801ea2c9ebf7b23ec Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 29 Jan 2024 17:57:59 +0100 Subject: [PATCH 02/47] [rocrand] in gg_tt.mad cudacpp.mk, clean up (remove extra spaces) --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 117edc1782..2a9768b591 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -432,13 +432,13 @@ CXXFLAGS+= $(AVXFLAGS) $(info FPTYPE=$(FPTYPE)) ifeq ($(FPTYPE),d) CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE - GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE else ifeq ($(FPTYPE),f) CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT - GPUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT else ifeq ($(FPTYPE),m) CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT - GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT else $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) endif @@ -447,7 +447,7 @@ endif $(info HELINL=$(HELINL)) ifeq ($(HELINL),1) CXXFLAGS += -DMGONGPU_INLINE_HELAMPS - GPUFLAGS += -DMGONGPU_INLINE_HELAMPS + GPUFLAGS += -DMGONGPU_INLINE_HELAMPS else ifneq ($(HELINL),0) $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported) endif @@ -456,7 +456,7 @@ endif $(info HRDCOD=$(HRDCOD)) ifeq ($(HRDCOD),1) CXXFLAGS += -DMGONGPU_HARDCODE_PARAM - GPUFLAGS += -DMGONGPU_HARDCODE_PARAM + GPUFLAGS += -DMGONGPU_HARDCODE_PARAM else ifneq ($(HRDCOD),0) $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) endif From 7e7110b89f9b859945748d80edc6057f0cf3555f Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 29 Jan 2024 18:51:08 +0100 Subject: [PATCH 03/47] [rocrand] in gg_tt.mad cudacpp_src.mk, replace RNDGEN by HASCURAND and HASROCRAND (for TAG), and remove CXXFLAGS based on that as there is no #ifdef HASMGONGPU_HAS_NO_CURAND in any source code file in src) --- epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk index 159e19a46d..99fd00c01b 100644 --- a/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk @@ -45,13 +45,13 @@ endif #------------------------------------------------------------------------------- -#=== Configure the CUDA compiler (note: GPUCC is already exported including ccache) +#=== Configure the CUDA compiler (note: GPUCC have been exported from cudacpp.mk including ccache) ###$(info GPUCC=$(GPUCC)) #------------------------------------------------------------------------------- -#=== Configure ccache for C++ builds (note: GPUCC is already exported including ccache) +#=== Configure ccache for C++ builds (note: GPUCC have been exported from cudacpp.mk including ccache) # Enable ccache if USECCACHE=1 ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) @@ -86,7 +86,8 @@ endif #------------------------------------------------------------------------------- -#=== Set the CUDA/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD, RNDGEN +#=== Set the CUDA/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD (exported from cudacpp.mk) +#=== (NB the RNDCXXFLAGS and RNDLIBFLAGS appropriate to user-defined choices of HASCURAND and HASROCRAND have been exported from cudacpp.mk) # Set the build flags appropriate to OMPFLAGS ###$(info OMPFLAGS=$(OMPFLAGS)) @@ -175,14 +176,6 @@ else ifneq ($(HRDCOD),0) $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) endif -# Set the build flags appropriate to each RNDGEN choice (example: "make RNDGEN=hasNoCurand") -###$(info RNDGEN=$(RNDGEN)) -ifeq ($(RNDGEN),hasNoCurand) - CXXFLAGS += -DMGONGPU_HAS_NO_CURAND -else ifneq ($(RNDGEN),hasCurand) - $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported) -endif - #------------------------------------------------------------------------------- #=== Configure build directories and build lockfiles === @@ -193,7 +186,7 @@ override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) # Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) # (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) -override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(RNDGEN) +override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASROCRAND) # Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 ###$(info Current directory is $(shell pwd)) From 8bfd3d186a260bc4e074712841ba2a535808261c Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 29 Jan 2024 18:53:08 +0100 Subject: [PATCH 04/47] [rocrand] in gg_tt.mad cudacpp.mk, replace RNDGEN by two separate HASCURAND and HASROCRAND variables, and try to rationalize the logic, making space for rocrand in parallel to curand --- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 122 ++++++++++++------ 1 file changed, 84 insertions(+), 38 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 2a9768b591..1c2fbc4798 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -173,11 +173,6 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - ifeq ($(RNDGEN),hasNoCurand) - CURANDLIBFLAGS= - else - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! - endif CUOPTFLAGS = -lineinfo ###GPUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math GPUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math @@ -241,7 +236,6 @@ else override GPUCC= override USE_NVTX= override CUINC= - override CURANDLIBFLAGS= endif @@ -291,7 +285,7 @@ endif #------------------------------------------------------------------------------- -#=== Configure defaults and check if user-defined choices exist for OMPFLAGS, AVX, FPTYPE, HELINL, HRDCOD, RNDGEN +#=== Configure defaults and check if user-defined choices exist for OMPFLAGS, AVX, FPTYPE, HELINL, HRDCOD # Set the default OMPFLAGS choice ifneq ($(findstring hipcc,$(GPUCC)),) @@ -352,29 +346,62 @@ ifeq ($(HRDCOD),) override HRDCOD = 0 endif -# Set the default RNDGEN (random number generator) choice -ifeq ($(RNDGEN),) - ifeq ($(GPUCC),) - override RNDGEN = hasNoCurand - # Edgecase for HIP compilation - else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) - override RNDGEN = hasNoCurand - else ifeq ($(RNDGEN),) - override RNDGEN = hasCurand - endif -endif - -# Export AVX, FPTYPE, HELINL, HRDCOD, RNDGEN, OMPFLAGS so that it is not necessary to pass them to the src Makefile too +# Export AVX, FPTYPE, HELINL, HRDCOD, OMPFLAGS so that it is not necessary to pass them to the src Makefile too export AVX export FPTYPE export HELINL export HRDCOD -export RNDGEN export OMPFLAGS #------------------------------------------------------------------------------- -#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD, RNDGEN +#=== Configure defaults and check if user-defined choices exist for RNDGEN (legacy!), HASCURAND, HASROCRAND + +# If the legacy RNDGEN exists, this take precedence over any HASCURAND choice (but a warning is printed out) +###$(info RNDGEN=$(RNDGEN)) +ifneq ($(RNDGEN),) + $(warning Environment variable RNDGEN is no longer supported, please use HASCURAND instead!) + ifeq ($(RNDGEN),hasCurand) + override HASCURAND = $(RNDGEN) + else ifeq ($(RNDGEN),hasNoCurand) + override HASCURAND = $(RNDGEN) + else ifneq ($(RNDGEN),hasNoCurand) + $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported - but use HASCURAND instead!) + endif +endif + +# Set the default HASCURAND (curand random number generator) choice, if no prior choice exists for HASCURAND +# (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) +ifeq ($(HASCURAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASCURAND = hasNoCurand + else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + override HASCURAND = hasCurand + else # non-Nvidia GPU build + override HASCURAND = hasNoCurand + endif +endif + +# Set the default HASROCRAND (rocrand random number generator) choice, if no prior choice exists for HASROCRAND +# (NB: allow HASROCRAND=hasRocrand even if $(GPUCC) does not point to hipcc: assume HIP_HOME was defined correctly...) +ifeq ($(HASROCRAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASROCRAND = hasNoRocrand + else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build + override HASROCRAND = hasRocrand + else # non-AMD GPU build + override HASROCRAND = hasNoRocrand + endif +endif + +# Export HASCURAND, HASROCRAND so that it is not necessary to pass them to the src Makefile too +# (NB: these variables in cudacpp_src.mk are only used to define the build tag, they are NOT needed for RNDCXXFLAGS or RNDLIBFLAGS) +export HASCURAND +export HASROCRAND + +#------------------------------------------------------------------------------- + +#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD # Set the build flags appropriate to OMPFLAGS $(info OMPFLAGS=$(OMPFLAGS)) @@ -461,16 +488,33 @@ else ifneq ($(HRDCOD),0) $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) endif -# Set the build flags appropriate to each RNDGEN choice (example: "make RNDGEN=hasNoCurand") -$(info RNDGEN=$(RNDGEN)) -ifeq ($(RNDGEN),hasNoCurand) - override CXXFLAGSCURAND = -DMGONGPU_HAS_NO_CURAND -else ifeq ($(RNDGEN),hasCurand) - override CXXFLAGSCURAND = + +#=== Set the CUDA/HIP/C++ compiler and linker flags appropriate to user-defined choices of HASCURAND, HASROCRAND + +$(info HASCURAND=$(HASCURAND)) +$(info HASROCRAND=$(HASROCRAND)) +override RNDCXXFLAGS= +override RNDLIBFLAGS= + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASCURAND choice (example: "make HASCURAND=hasNoCurand") +ifeq ($(HASCURAND),hasNoCurand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_CURAND +else ifeq ($(HASCURAND),hasCurand) + override RNDLIBFLAGS += -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! else - $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported) + $(error Unknown HASCURAND='$(HASCURAND)': only 'hasCurand' and 'hasNoCurand' are supported) endif +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASROCRAND choice (example: "make HASROCRAND=hasNoRocrand") +ifeq ($(HASROCRAND),hasNoRocrand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_ROCRAND +else ifneq ($(HASROCRAND),hasRocrand) + $(error Unknown HASROCRAND='$(HASROCRAND)': only 'hasRocrand' and 'hasNoRocrand' are supported) +endif + +#$(info RNDCXXFLAGS=$(RNDCXXFLAGS)) +#$(info HASROCRAND=$(HASROCRAND)) + #------------------------------------------------------------------------------- #=== Configure build directories and build lockfiles === @@ -481,7 +525,7 @@ override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) # Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) # (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) -override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(RNDGEN) +override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASROCRAND) # Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 ifeq ($(USEBUILDDIR),1) @@ -589,12 +633,14 @@ endif $(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) -# Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) -ifeq ($(RNDGEN),hasCurand) +# Apply special build flags only to check_sa and (Cu|Roc)randRandomNumberKernel +$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/RocrandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/gRocrandRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) +ifeq ($(HASCURAND),hasCurand) # curand headers, #679 $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif @@ -674,7 +720,7 @@ endif # Target (and build rules): C++ and CUDA standalone executables $(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(CURANDLIBFLAGS) + $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) @@ -685,7 +731,7 @@ $(cu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) - endif $(cu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH $(cu_main): $(BUILDDIR)/gcheck_sa.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o - $(GPUCC) -o $@ $(BUILDDIR)/gcheck_sa.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o $(CURANDLIBFLAGS) + $(GPUCC) -o $@ $(BUILDDIR)/gcheck_sa.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- From 90dc350e3a29acab2c43569719eef196724165a1 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 29 Jan 2024 19:32:12 +0100 Subject: [PATCH 05/47] [rocrand] in gg_tt.mad check_sa.cc add rocrand support (and the --rordev and --rorhst options) --- .../SubProcesses/P1_gg_ttx/check_sa.cc | 103 +++++++++++++++--- 1 file changed, 88 insertions(+), 15 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc index 848ba4680d..36cdaa27c1 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc @@ -58,7 +58,7 @@ int usage( char* argv0, int ret = 1 ) { std::cout << "Usage: " << argv0 - << " [--verbose|-v] [--debug|-d] [--performance|-p] [--json|-j] [--curhst|--curdev|--common] [--rmbhst|--rmbdev] [--bridge]" + << " [--verbose|-v] [--debug|-d] [--performance|-p] [--json|-j] [--curhst|--curdev|--rorhst|--rordev|--common] [--rmbhst|--rmbdev] [--bridge]" << " [#gpuBlocksPerGrid #gpuThreadsPerBlock] #iterations" << std::endl; std::cout << std::endl; std::cout << "The number of events per iteration is #gpuBlocksPerGrid * #gpuThreadsPerBlock" << std::endl; @@ -131,17 +131,31 @@ main( int argc, char** argv ) enum class RandomNumberMode { CommonRandom = 0, - CurandHost = 1, - CurandDevice = 2 + CurandHost = -1, + CurandDevice = 1, + RocrandHost = -2, + RocrandDevice = 2 }; -#ifdef MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784 and #785) -#elif defined __HIPCC__ -#error Internal error: MGONGPU_HAS_NO_CURAND should have been set for __HIPCC__ // default on AMD GPUs should be common random -#elif defined __CUDACC__ +#if defined __CUDACC__ +#ifndef MGONGPU_HAS_NO_CURAND RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on NVidia GPU if build has curand #else + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on NVidia GPU if build has no curand (PR #784 and #785) +#endif +#elif defined __HIPCC__ +#ifndef MGONGPU_HAS_NO_ROCRAND + RandomNumberMode rndgen = RandomNumberMode::RocrandDevice; // default on AMD GPU if build has rocrand +#else + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on AMD GPU if build has no rocrand +#endif +#else +#ifndef MGONGPU_HAS_NO_CURAND RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#elif not defined MGONGPU_HAS_NO_ROCRAND + RandomNumberMode rndgen = RandomNumberMode::RocrandDevice; // default on CPU if build has rocrand +#else + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has neither curand nor rocrand +#endif #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -193,6 +207,24 @@ main( int argc, char** argv ) throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); #else rndgen = RandomNumberMode::CurandHost; +#endif + } + else if( arg == "--rordev" ) + { +#ifndef __HIPCC__ + throw std::runtime_error( "RocrandDevice is not supported on CPUs or non-AMD GPUs" ); +#elif defined MGONGPU_HAS_NO_ROCRAND + throw std::runtime_error( "RocrandDevice is not supported because this application was built without Rocrand support" ); +#else + rndgen = RandomNumberMode::RocrandDevice; +#endif + } + else if( arg == "--rorhst" ) + { +#ifdef MGONGPU_HAS_NO_ROCRAND + throw std::runtime_error( "RocrandHost is not supported because this application was built without Rocrand support" ); +#else + rndgen = RandomNumberMode::RocrandHost; #endif } else if( arg == "--common" ) @@ -260,7 +292,18 @@ main( int argc, char** argv ) std::cout << "WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost" << std::endl; rndgen = RandomNumberMode::CurandHost; #else - std::cout << "WARNING! RamboHost selected: cannot use CurandDevice, will use CommonRandom" << std::endl; + std::cout << "WARNING! RamboHost selected: cannot use CurandDevice , will use CommonRandom" << std::endl; + rndgen = RandomNumberMode::CommonRandom; +#endif + } + + if( rmbsmp == RamboSamplingMode::RamboHost && rndgen == RandomNumberMode::RocrandDevice ) + { +#if not defined MGONGPU_HAS_NO_ROCRAND + std::cout << "WARNING! RamboHost selected: cannot use RocrandDevice, will use RocrandHost" << std::endl; + rndgen = RandomNumberMode::RocrandHost; +#else + std::cout << "WARNING! RamboHost selected: cannot use RocrandDevice , will use CommonRandom" << std::endl; rndgen = RandomNumberMode::CommonRandom; #endif } @@ -415,7 +458,7 @@ main( int argc, char** argv ) std::unique_ptr wavetimes( new double[niter] ); std::unique_ptr wv3atimes( new double[niter] ); - // --- 0c. Create curand or common generator + // --- 0c. Create curand, rocrand or common generator const std::string cgenKey = "0c GenCreat"; timermap.start( cgenKey ); // Allocate the appropriate RandomNumberKernel @@ -433,7 +476,7 @@ main( int argc, char** argv ) prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); #endif } - else + else if( rndgen == RandomNumberMode::CurandDevice ) { #ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) @@ -444,7 +487,28 @@ main( int argc, char** argv ) throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) #endif } - + else if( rndgen == RandomNumberMode::RocrandHost ) + { +#ifdef MGONGPU_HAS_NO_ROCRAND + throw std::runtime_error( "INTERNAL ERROR! RocrandHost is not supported because this application was built without Rocrand support" ); // INTERNAL ERROR (no path to this statement) +#else + const bool onDevice = false; + prnk.reset( new RocrandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif + } + else if( rndgen == RandomNumberMode::RocrandDevice ) + { +#ifdef MGONGPU_HAS_NO_ROCRAND + throw std::runtime_error( "INTERNAL ERROR! RocrandDevice is not supported because this application was built without Rocrand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __CUDACC__ + const bool onDevice = true; + prnk.reset( new RocrandRandomNumberKernel( devRndmom, onDevice ) ); +#else + throw std::logic_error( "INTERNAL ERROR! RocrandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) +#endif + } + else throw std::logic_error( "INTERNAL ERROR! Unknown rndgen value?" ); // INTERNAL ERROR (no path to this statement) + // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; if( rmbsmp == RamboSamplingMode::RamboHost ) @@ -497,7 +561,7 @@ main( int argc, char** argv ) // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** double genrtime = 0; - // --- 1a. Seed rnd generator (to get same results on host and device in curand) + // --- 1a. Seed rnd generator (to get same results on host and device in curand/rocrand) // [NB This should not be necessary using the host API: "Generation functions // can be called multiple times on the same generator to generate successive // blocks of results. For pseudorandom generators, multiple calls to generation @@ -515,7 +579,8 @@ main( int argc, char** argv ) //std::cout << "Got random numbers" << std::endl; #ifdef MGONGPUCPP_GPUIMPL - if( rndgen != RandomNumberMode::CurandDevice && rmbsmp == RamboSamplingMode::RamboDevice ) + if( ( rndgen != RandomNumberMode::CurandDevice && rmbsmp == RamboSamplingMode::RamboDevice ) || + ( rndgen != RandomNumberMode::RocrandDevice && rmbsmp == RamboSamplingMode::RamboDevice ) ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; @@ -761,6 +826,10 @@ main( int argc, char** argv ) rndgentxt = "CURAND HOST"; else if( rndgen == RandomNumberMode::CurandDevice ) rndgentxt = "CURAND DEVICE"; + else if( rndgen == RandomNumberMode::RocrandHost ) + rndgentxt = "ROCRND HOST"; + else if( rndgen == RandomNumberMode::RocrandDevice ) + rndgentxt = "ROCRND DEVICE"; #ifdef __CUDACC__ rndgentxt += " (CUDA code)"; #elif defined __HIPCC__ @@ -822,6 +891,10 @@ main( int argc, char** argv ) wrkflwtxt += "CURHST+"; else if( rndgen == RandomNumberMode::CurandDevice ) wrkflwtxt += "CURDEV+"; + else if( rndgen == RandomNumberMode::RocrandHost ) + wrkflwtxt += "RORHST+"; + else if( rndgen == RandomNumberMode::RocrandDevice ) + wrkflwtxt += "RORDEV+"; else wrkflwtxt += "??????+"; // no path to this statement // -- HOST or DEVICE rambo sampling? @@ -1095,7 +1168,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL //<< "\"Wavefunction GPU memory\": " << "\"LOCAL\"," << std::endl #endif - << "\"Curand generation\": " + << "\"Random generation\": " << "\"" << rndgentxt << "\"," << std::endl; double minelem = hstStats.minME; From 3968e2ee86189bd08d98efc1a6bbac79fb87c0ea Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 29 Jan 2024 19:40:41 +0100 Subject: [PATCH 06/47] [rocrand] in gg_tt.mad cudacpp.mk, add RNDCXXFLAGS to runTest, fsampler and CommonRandomNumberKernel objects to avoid an error --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 1c2fbc4798..8b2d433d2d 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -633,13 +633,19 @@ endif $(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) -# Apply special build flags only to check_sa and (Cu|Roc)randRandomNumberKernel +# Apply special build flags only to check_sa, (Common|Curand|Rocrand)RandomNumberKernel, runTest, fsampler $(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CommonRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/gCommonRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/RocrandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/gRocrandRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/runTest.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/runTest_cu.o: CUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/fsampler.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/fsampler_cu.o: CUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif From 73789262052c9c31bcf056460b13f0a8e8b8a2da Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 29 Jan 2024 20:05:05 +0100 Subject: [PATCH 07/47] [rocrand] revert the previous commit in cudacpp.mk, as the issue can be fixed in RandomNumberKernels.h instead Revert "[rocrand] in gg_tt.mad cudacpp.mk, add RNDCXXFLAGS to runTest, fsampler and CommonRandomNumberKernel objects to avoid an error" This reverts commit 3968e2ee86189bd08d98efc1a6bbac79fb87c0ea. --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 8b2d433d2d..1c2fbc4798 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -633,19 +633,13 @@ endif $(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) -# Apply special build flags only to check_sa, (Common|Curand|Rocrand)RandomNumberKernel, runTest, fsampler +# Apply special build flags only to check_sa and (Cu|Roc)randRandomNumberKernel $(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/CommonRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/gCommonRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/RocrandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/gRocrandRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/runTest.o: CXXFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/runTest_cu.o: CUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/fsampler.o: CXXFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/fsampler_cu.o: CUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif From f3fe636006679d682ebca5d7b8efdd6e2b5dec4f Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Mon, 29 Jan 2024 20:01:41 +0100 Subject: [PATCH 08/47] [rocrand] in gg_tt.mad RandomNumberKernels.h, add a forward definition to bypass undefined rocrandGenerator_st error in runTest, fsampler and CommonRandomNumberKernel, and general cleanup to remove all unnecessary ifdefs (the class definition needs no typedef protections) In file included from fsampler.cc:12: RandomNumberKernels.h:189:5: error: 'rocrandGenerator_st' does not name a type; did you mean 'curandGenerator_st'? 189 | rocrandGenerator_st* m_rnGen; | ^~~~~~~~~~~~~~~~~~~ | curandGenerator_st In file included from CommonRandomNumberKernel.cc:9: RandomNumberKernels.h:189:5: error: 'rocrandGenerator_st' does not name a type; did you mean 'curandGenerator_st'? 189 | rocrandGenerator_st* m_rnGen; | ^~~~~~~~~~~~~~~~~~~ | curandGenerator_st In file included from runTest.cc:18: RandomNumberKernels.h:189:5: error: 'rocrandGenerator_st' does not name a type; did you mean 'curandGenerator_st'? 189 | rocrandGenerator_st* m_rnGen; | ^~~~~~~~~~~~~~~~~~~ | curandGenerator_st --- .../SubProcesses/CurandRandomNumberKernel.cc | 1 + .../SubProcesses/RandomNumberKernels.h | 25 ++++++++----------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/CurandRandomNumberKernel.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/CurandRandomNumberKernel.cc index 08a16f6f2c..c160c5e06b 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/CurandRandomNumberKernel.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/CurandRandomNumberKernel.cc @@ -10,6 +10,7 @@ #include #ifndef MGONGPU_HAS_NO_CURAND /* clang-format off */ +// NB This must come AFTER mgOnGpuConfig.h which contains our definition of __global__ when MGONGPUCPP_GPUIMPL is not defined #include "curand.h" #define checkCurand( code ){ assertCurand( code, __FILE__, __LINE__ ); } inline void assertCurand( curandStatus_t code, const char *file, int line, bool abort = true ) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h index e50fe7c494..d0680861e4 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h @@ -8,14 +8,15 @@ #include "mgOnGpuConfig.h" -// NB This must come AFTER mgOnGpuConfig.h which contains our definition of __global__ when MGONGPUCPP_GPUIMPL is not defined -#ifndef MGONGPU_HAS_NO_CURAND -//#include "curand.h" -struct curandGenerator_st; // forward definition from curand.h -#endif - #include "MemoryBuffers.h" +// Forward definition from curand.h (the full header is only needed in CurandRandomKernel.cc) +struct curandGenerator_st; + +// Forward definition from hiprand.h (the full header is only needed in RocrandRandomKernel.cc) +struct rocrand_generator_base_type; +typedef rocrand_generator_base_type hiprandGenerator_st; + #ifdef MGONGPUCPP_GPUIMPL namespace mg5amcGpu #else @@ -107,7 +108,6 @@ namespace mg5amcCpu //-------------------------------------------------------------------------- -#ifndef MGONGPU_HAS_NO_CURAND // A class encapsulating CURAND random number generation on a CPU host or on a GPU device class CurandRandomNumberKernel final : public RandomNumberKernelBase { @@ -142,15 +142,12 @@ namespace mg5amcCpu const bool m_isOnDevice; // The curand generator - // (NB: curand.h defines typedef generator_t as a pointer to forward-defined 'struct curandGenerator_st') + // (NB: curand.h defines typedef curandGenerator_t as a pointer to forward-defined 'struct curandGenerator_st') curandGenerator_st* m_rnGen; }; -#endif - //-------------------------------------------------------------------------- -#ifndef MGONGPU_HAS_NO_ROCRAND // A class encapsulating ROCRAND random number generation on a CPU host or on a GPU device class RocrandRandomNumberKernel final : public RandomNumberKernelBase { @@ -185,12 +182,10 @@ namespace mg5amcCpu const bool m_isOnDevice; // The rocrand generator - // (NB: rocrand.h defines typedef generator_t as a pointer to forward-defined 'struct rocrandGenerator_st') - rocrandGenerator_st* m_rnGen; + // (NB: hipand.h defines typedef hiprandGenerator_t as a pointer to forward-defined 'struct hiprandGenerator_st') + hiprandGenerator_st* m_rnGen; }; -#endif - //-------------------------------------------------------------------------- } #endif // RANDOMNUMBERKERNELS_H From 576eb655268b1b8dc568331a012cb4ddd1fa2f46 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 30 Jan 2024 00:06:52 +0200 Subject: [PATCH 09/47] [rocrand] in gg_tt.mad, replace rocrand by horand and add other fixes in RocrandRandomNumberKernel.cc --- .../SubProcesses/RocrandRandomNumberKernel.cc | 45 ++++++++++--------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc index a23c877bff..6c380963f3 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc @@ -10,14 +10,15 @@ #include #ifndef MGONGPU_HAS_NO_ROCRAND /* clang-format off */ -#include -#define checkRocrand( code ){ assertRocrand( code, __FILE__, __LINE__ ); } -inline void assertRocrand( rocrandStatus_t code, const char *file, int line, bool abort = true ) +//#include +#include "hiprand.h" +#define checkHiprand( code ){ assertHiprand( code, __FILE__, __LINE__ ); } +inline void assertHiprand( hiprandStatus_t code, const char *file, int line, bool abort = true ) { - if ( code != ROCRAND_STATUS_SUCCESS ) + if ( code != HIPRAND_STATUS_SUCCESS ) { - printf( "RocrandAssert: %s:%d code=%d\n", file, line, code ); - if ( abort ) assert( code == ROCRAND_STATUS_SUCCESS ); + printf( "HiprandAssert: %s:%d code=%d\n", file, line, code ); + if ( abort ) assert( code == HIPRAND_STATUS_SUCCESS ); } } #endif /* clang-format on */ @@ -40,7 +41,7 @@ namespace mg5amcCpu if( !m_rnarray.isOnDevice() ) throw std::runtime_error( "RocrandRandomNumberKernel on device with a host random number array" ); #else - throw std::runtime_error( "RocrandRandomNumberKernel does not support RocrandDevice on CPU host" ); + throw std::runtime_error( "RocrandRandomNumberKernel does not support HiprandDevice on CPU host" ); #endif } else @@ -68,7 +69,7 @@ namespace mg5amcCpu createGenerator(); // workaround for #429 } //printf( "seedGenerator: seed %d\n", seed ); - checkRocrand( rocrandSetPseudoRandomGeneratorSeed( m_rnGen, seed ) ); + checkHiprand( hiprandSetPseudoRandomGeneratorSeed( m_rnGen, seed ) ); } //-------------------------------------------------------------------------- @@ -76,30 +77,30 @@ namespace mg5amcCpu void RocrandRandomNumberKernel::createGenerator() { // [NB Timings are for GenRnGen host|device (cpp|cuda) generation of 256*32*1 events with nproc=1: rn(0) is host=0.0012s] - const rocrandRngType_t type = ROCRAND_RNG_PSEUDO_MTGP32; // 0.00082s | 0.00064s (FOR FAST TESTS) - //const rocrandRngType_t type = ROCRAND_RNG_PSEUDO_XORWOW; // 0.049s | 0.0016s - //const rocrandRngType_t type = ROCRAND_RNG_PSEUDO_MRG32K3A; // 0.71s | 0.0012s (better but slower, especially in c++) - //const rocrandRngType_t type = ROCRAND_RNG_PSEUDO_MT19937; // 21s | 0.021s - //const rocrandRngType_t type = ROCRAND_RNG_PSEUDO_PHILOX4_32_10; // 0.024s | 0.00026s (used to segfault?) + const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_MTGP32; // 0.00082s | 0.00064s (FOR FAST TESTS) + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_XORWOW; // 0.049s | 0.0016s + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_MRG32K3A; // 0.71s | 0.0012s (better but slower, especially in c++) + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_MT19937; // 21s | 0.021s + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_PHILOX4_32_10; // 0.024s | 0.00026s (used to segfault?) if( m_isOnDevice ) { - checkRocrand( rocrandCreateGenerator( &m_rnGen, type ) ); + checkHiprand( hiprandCreateGenerator( &m_rnGen, type ) ); } else { - checkRocrand( rocrandCreateGeneratorHost( &m_rnGen, type ) ); + checkHiprand( hiprandCreateGeneratorHost( &m_rnGen, type ) ); } - //checkRocrand( rocrandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_LEGACY ) ); // fails with code=104 (see #429) - checkRocrand( rocrandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_BEST ) ); - //checkRocrand( rocrandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_DYNAMIC ) ); // fails with code=104 (see #429) - //checkRocrand( rocrandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_SEEDED ) ); // fails with code=104 (see #429) + //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_LEGACY ) ); // fails with code=104 (see #429) + checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_BEST ) ); + //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_DYNAMIC ) ); // fails with code=104 (see #429) + //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_SEEDED ) ); // fails with code=104 (see #429) } //-------------------------------------------------------------------------- void RocrandRandomNumberKernel::destroyGenerator() { - checkRocrand( rocrandDestroyGenerator( m_rnGen ) ); + checkHiprand( hiprandDestroyGenerator( m_rnGen ) ); } //-------------------------------------------------------------------------- @@ -107,9 +108,9 @@ namespace mg5amcCpu void RocrandRandomNumberKernel::generateRnarray() { #if defined MGONGPU_FPTYPE_DOUBLE - checkRocrand( rocrandGenerateUniformDouble( m_rnGen, m_rnarray.data(), m_rnarray.size() ) ); + checkHiprand( hiprandGenerateUniformDouble( m_rnGen, m_rnarray.data(), m_rnarray.size() ) ); #elif defined MGONGPU_FPTYPE_FLOAT - checkRocrand( rocrandGenerateUniform( m_rnGen, m_rnarray.data(), m_rnarray.size() ) ); + checkHiprand( hiprandGenerateUniform( m_rnGen, m_rnarray.data(), m_rnarray.size() ) ); #endif /* printf( "\nRocrandRandomNumberKernel::generateRnarray size = %d\n", (int)m_rnarray.size() ); From 79c9a6b03f4476da740bcd5774395235adf84705 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 30 Jan 2024 00:09:23 +0200 Subject: [PATCH 10/47] [rocrand] in gg_tt.mad cudacpp.mk, link also RocrandRandomNumberKernel.o to the relevant executables --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 1c2fbc4798..8c847687a1 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -719,8 +719,8 @@ endif # Target (and build rules): C++ and CUDA standalone executables $(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/RocrandRandomNumberKernel.o + $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/RocrandRandomNumberKernel.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) @@ -730,8 +730,8 @@ else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 $(cu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif $(cu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cu_main): $(BUILDDIR)/gcheck_sa.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o - $(GPUCC) -o $@ $(BUILDDIR)/gcheck_sa.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cu_main): $(BUILDDIR)/gcheck_sa.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o $(BUILDDIR)/gRocrandRandomNumberKernel.o + $(GPUCC) -o $@ $(BUILDDIR)/gcheck_sa.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o $(BUILDDIR)/gRocrandRandomNumberKernel.o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- From fdfb335c4a113a2474cc67aefa02fe917321f1d3 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 30 Jan 2024 00:10:58 +0200 Subject: [PATCH 11/47] [rocrand] in gg_tt.mad, add symlink of RocrandRandomNumberKernel.cc to gRocrandRandomNumberKernel.cu --- .../SubProcesses/P1_gg_ttx/gRocrandRandomNumberKernel.cu | 1 + 1 file changed, 1 insertion(+) create mode 120000 epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/gRocrandRandomNumberKernel.cu diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/gRocrandRandomNumberKernel.cu b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/gRocrandRandomNumberKernel.cu new file mode 120000 index 0000000000..ef5c82ffab --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/gRocrandRandomNumberKernel.cu @@ -0,0 +1 @@ +RocrandRandomNumberKernel.cc \ No newline at end of file From 6ae85c8e95b6ada9b14f8d26d0f592c24912076a Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 30 Jan 2024 00:26:18 +0200 Subject: [PATCH 12/47] [rocrand] in gg_tt.mad cudacpp.mk, add HIPINC when compiling RocrandRandomNumberKernel --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 8c847687a1..e036aaddd4 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -236,6 +236,7 @@ else override GPUCC= override USE_NVTX= override CUINC= + override HIPINC= endif @@ -643,6 +644,9 @@ $(BUILDDIR)/gRocrandRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif +ifeq ($(HASROCRAND),hasRocrand) # rocrand headers +$(BUILDDIR)/RocrandRandomNumberKernel.o: CXXFLAGS += $(HIPINC) +endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) ifneq ($(shell $(CXX) --version | egrep '^(Intel)'),) From 465624e429424c77b7f708c8246e55d6b8f9c859 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 30 Jan 2024 00:32:28 +0200 Subject: [PATCH 13/47] [rocrand] in gg_tt.mad RocrandRandomNumberKernel.cc, do not set generator ordering (I could not find the syntax yet) --- .../SubProcesses/RocrandRandomNumberKernel.cc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc index 6c380963f3..ca907064d3 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc @@ -90,10 +90,13 @@ namespace mg5amcCpu { checkHiprand( hiprandCreateGeneratorHost( &m_rnGen, type ) ); } - //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_LEGACY ) ); // fails with code=104 (see #429) - checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_BEST ) ); - //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_DYNAMIC ) ); // fails with code=104 (see #429) - //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_SEEDED ) ); // fails with code=104 (see #429) + /* + // FIXME: implement hiprand/rocrand ordering... + //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_LEGACY ) ); // fails in curand (see #429) + checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_BEST ) ); + //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_DYNAMIC ) ); // fails in curand (see #429) + //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_SEEDED ) ); // fails in curand (see #429) + */ } //-------------------------------------------------------------------------- From 94124a1c33d4da32ed5a5f0592c48a920bbfb0a2 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 30 Jan 2024 00:35:05 +0200 Subject: [PATCH 14/47] [rocrand] in gg_tt.mad cudacpp.mk, add -lhiprand if HASROCRAND=hasRocrand --- epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index e036aaddd4..92b8a2cb4d 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -509,6 +509,8 @@ endif # Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASROCRAND choice (example: "make HASROCRAND=hasNoRocrand") ifeq ($(HASROCRAND),hasNoRocrand) override RNDCXXFLAGS += -DMGONGPU_HAS_NO_ROCRAND +else ifeq ($(HASROCRAND),hasRocrand) + override RNDLIBFLAGS += -L$(HIP_HOME)/lib/ -lhiprand else ifneq ($(HASROCRAND),hasRocrand) $(error Unknown HASROCRAND='$(HASROCRAND)': only 'hasRocrand' and 'hasNoRocrand' are supported) endif From 79490542e013e033a9bb011ef779e120b439cb5f Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 30 Jan 2024 00:50:18 +0200 Subject: [PATCH 15/47] [rocrand] in gg_tt.mad mgOnGpuConfig.h and RocrandRandomNumberKernel.cc, ensure __HIP_PLATFORM_NVIDIA__ is not defined and __HIP_PLATFORM_AMD__ is defined --- .../gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc | 8 ++++++-- epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h | 4 ++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc index ca907064d3..8ab1b32983 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc @@ -3,6 +3,8 @@ // Created by: A. Valassi (Jan 2024) for the MG5aMC CUDACPP plugin. // Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin. +#include "mgOnGpuConfig.h" + #include "GpuRuntime.h" #include "MemoryBuffers.h" #include "RandomNumberKernels.h" @@ -10,8 +12,10 @@ #include #ifndef MGONGPU_HAS_NO_ROCRAND /* clang-format off */ -//#include -#include "hiprand.h" +#ifndef __HIP_PLATFORM_AMD__ +#define __HIP_PLATFORM_AMD__ 1 // enable hiprand for AMD (rocrand) +#endif +#include #define checkHiprand( code ){ assertHiprand( code, __FILE__, __LINE__ ); } inline void assertHiprand( hiprandStatus_t code, const char *file, int line, bool abort = true ) { diff --git a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h index 42ea924047..6a78d71c0a 100644 --- a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h @@ -20,6 +20,10 @@ #undef MGONGPUCPP_GPUIMPL #endif +// Make sure that __HIP_PLATFORM_NVIDIA__ is undefined +// (__HIP_PLATFORM_AMD__ is defined by hipcc or in RocrandRandomNumberKernel.cc) +#undef __HIP_PLATFORM_NVIDIA__ // disable hiprand for NVidia (curand) + // ** NB1 Throughputs (e.g. 6.8E8) are events/sec for "./gcheck.exe -p 65536 128 12" // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) From ff3afb0d1e839831f76c8010f71e28cddf31006d Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 30 Jan 2024 00:59:00 +0200 Subject: [PATCH 16/47] [rocrand] in gg_tt.mad, fix check_sa.cc to allow RocrandDevice on AMD GPUs The status at this point is that - the code builds - gcheck.exe runs in rocranddevice, but produces nan MEs (1) - gcheck.exe and check.exe fail in rocrandhost (2) (1) nan MEs from rocranddevice Workflow summary = HIP:DBL+CXS:RORDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=8, zero=0) Complex type = STD::COMPLEX RanNumb memory layout = AOSOA[8] [HARDCODED FOR REPRODUCIBILITY] Momenta memory layout = AOSOA[4] Random number generation = ROCRND DEVICE (HIP code) ------------------------------------------------------------------------------- HelicityComb Good/Tot = 16/16 ------------------------------------------------------------------------------- NumberOfEntries = 1 TotalTime[Rnd+Rmb+ME] (123) = ( 1.329026e-02 ) sec TotalTime[Rambo+ME] (23) = ( 5.200556e-03 ) sec TotalTime[RndNumGen] (1) = ( 8.089702e-03 ) sec TotalTime[Rambo] (2) = ( 4.494608e-03 ) sec TotalTime[MatrixElems] (3) = ( 7.059480e-04 ) sec MeanTimeInMatrixElems = ( 7.059480e-04 ) sec [Min,Max]TimeInMatrixElems = [ 7.059480e-04 , 7.059480e-04 ] sec TotalTime[MECalcOnly] (3a) = ( 6.957380e-04 ) sec MeanTimeInMECalcOnly = ( 6.957380e-04 ) sec [Min,Max]TimeInMECalcOnly = [ 6.957380e-04 , 6.957380e-04 ] sec ------------------------------------------------------------------------------- TotalEventsComputed = 8 EvtsPerSec[Rnd+Rmb+ME](123) = ( 6.019447e+02 ) sec^-1 EvtsPerSec[Rmb+ME] (23) = ( 1.538297e+03 ) sec^-1 EvtsPerSec[MatrixElems] (3) = ( 1.133228e+04 ) sec^-1 EvtsPerSec[MECalcOnly] (3a) = ( 1.149858e+04 ) sec^-1 ******************************************************************************* NumMatrixElems(notAbnormal) = 0 MeanMatrixElemValue = ( -nan +- -nan ) GeV^0 [Min,Max]MatrixElemValue = [ 1.797693e+308 , -1.797693e+308 ] GeV^0 StdDevMatrixElemValue = ( -nan ) GeV^0 MeanWeight = ( -nan +- -nan [Min,Max]Weight = [ 1.797693e+308 , -1.797693e+308 ] StdDevWeight = ( -nan ) (2) failures from rocrandhost HiprandAssert: gRocrandRandomNumberKernel.cu:95 code=1000 gcheck.exe: gRocrandRandomNumberKernel.cu:25: void assertHiprand(hiprandStatus_t, const char *, int, bool): Assertion `code == HIPRAND_STATUS_SUCCESS' failed. Aborted --- epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc index 36cdaa27c1..ae8c9212d0 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc @@ -500,7 +500,7 @@ main( int argc, char** argv ) { #ifdef MGONGPU_HAS_NO_ROCRAND throw std::runtime_error( "INTERNAL ERROR! RocrandDevice is not supported because this application was built without Rocrand support" ); // INTERNAL ERROR (no path to this statement) -#elif defined __CUDACC__ +#elif defined __HIPCC__ const bool onDevice = true; prnk.reset( new RocrandRandomNumberKernel( devRndmom, onDevice ) ); #else From 3539ced7e488a618c31068e252cb038ffa6db9e7 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 30 Jan 2024 14:31:53 +0100 Subject: [PATCH 17/47] [rocrand] in gg_tt.mad check_sa.cc, BUG FIX that removes the nan's also on cuda gcheck.exe when using curand On LUMI HIP, this also fixes the nans in gcheck.exe using --rordev. However, the error in gheck.exe using --rorhst does remain. The same error for RocrandHost happens in check.exe using the default --rorhst. --- .../cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc index ae8c9212d0..4e06c88737 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc @@ -292,7 +292,7 @@ main( int argc, char** argv ) std::cout << "WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost" << std::endl; rndgen = RandomNumberMode::CurandHost; #else - std::cout << "WARNING! RamboHost selected: cannot use CurandDevice , will use CommonRandom" << std::endl; + std::cout << "WARNING! RamboHost selected: cannot use CurandDevice, will use CommonRandom" << std::endl; rndgen = RandomNumberMode::CommonRandom; #endif } @@ -303,7 +303,7 @@ main( int argc, char** argv ) std::cout << "WARNING! RamboHost selected: cannot use RocrandDevice, will use RocrandHost" << std::endl; rndgen = RandomNumberMode::RocrandHost; #else - std::cout << "WARNING! RamboHost selected: cannot use RocrandDevice , will use CommonRandom" << std::endl; + std::cout << "WARNING! RamboHost selected: cannot use RocrandDevice, will use CommonRandom" << std::endl; rndgen = RandomNumberMode::CommonRandom; #endif } @@ -579,8 +579,9 @@ main( int argc, char** argv ) //std::cout << "Got random numbers" << std::endl; #ifdef MGONGPUCPP_GPUIMPL - if( ( rndgen != RandomNumberMode::CurandDevice && rmbsmp == RamboSamplingMode::RamboDevice ) || - ( rndgen != RandomNumberMode::RocrandDevice && rmbsmp == RamboSamplingMode::RamboDevice ) ) + if( rndgen != RandomNumberMode::CurandDevice && + rndgen != RandomNumberMode::RocrandDevice && + rmbsmp == RamboSamplingMode::RamboDevice ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; From fb8d6fe7cf7c2b351d579db9ebc8a6652f623a40 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 2 Feb 2024 13:51:58 +0200 Subject: [PATCH 18/47] [rocrand] in gg_tt.mad RocrandRandomNumberKernel.cc, clarify that hiprand host generators are not supported yet --- .../gg_tt.mad/SubProcesses/RandomNumberKernels.h | 2 +- .../SubProcesses/RocrandRandomNumberKernel.cc | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h index d0680861e4..5f477f6139 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h @@ -182,7 +182,7 @@ namespace mg5amcCpu const bool m_isOnDevice; // The rocrand generator - // (NB: hipand.h defines typedef hiprandGenerator_t as a pointer to forward-defined 'struct hiprandGenerator_st') + // (NB: hiprand.h defines typedef hiprandGenerator_t as a pointer to forward-defined 'struct hiprandGenerator_st') hiprandGenerator_st* m_rnGen; }; diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc index 8ab1b32983..3b0ce3c7b8 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc @@ -80,19 +80,21 @@ namespace mg5amcCpu void RocrandRandomNumberKernel::createGenerator() { - // [NB Timings are for GenRnGen host|device (cpp|cuda) generation of 256*32*1 events with nproc=1: rn(0) is host=0.0012s] - const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_MTGP32; // 0.00082s | 0.00064s (FOR FAST TESTS) - //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_XORWOW; // 0.049s | 0.0016s - //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_MRG32K3A; // 0.71s | 0.0012s (better but slower, especially in c++) - //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_MT19937; // 21s | 0.021s - //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_PHILOX4_32_10; // 0.024s | 0.00026s (used to segfault?) + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_DEFAULT; + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_XORWOW; + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_MRG32K3A; + const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_MTGP32; // same as curand; not implemented yet (code=1000) in host code + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_MT19937; + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_PHILOX4_32_10; if( m_isOnDevice ) { checkHiprand( hiprandCreateGenerator( &m_rnGen, type ) ); } else { - checkHiprand( hiprandCreateGeneratorHost( &m_rnGen, type ) ); + // See https://github.com/ROCm/hipRAND/issues/76 + throw std::runtime_error( "RocrandRandomNumberKernel on host is not supported yet (hiprandCreateGeneratorHost is not implemented yet)" ); + //checkHiprand( hiprandCreateGeneratorHost( &m_rnGen, type ) ); // ALWAYS FAILS WITH CODE=1000 } /* // FIXME: implement hiprand/rocrand ordering... From ef359a96e32db8aae665c582257f18c30bc971c8 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 2 Feb 2024 17:00:11 +0200 Subject: [PATCH 19/47] [rocrand] in gg_tt.mad RocrandRandomNumberKernel.cc, clarify that hiprand ordering is not implemented yet --- .../SubProcesses/RocrandRandomNumberKernel.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc index 3b0ce3c7b8..187f541420 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc @@ -96,12 +96,13 @@ namespace mg5amcCpu throw std::runtime_error( "RocrandRandomNumberKernel on host is not supported yet (hiprandCreateGeneratorHost is not implemented yet)" ); //checkHiprand( hiprandCreateGeneratorHost( &m_rnGen, type ) ); // ALWAYS FAILS WITH CODE=1000 } + // FIXME: hiprand ordering is not implemented yet + // See https://github.com/ROCm/hipRAND/issues/75 /* - // FIXME: implement hiprand/rocrand ordering... - //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_LEGACY ) ); // fails in curand (see #429) - checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_BEST ) ); - //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_DYNAMIC ) ); // fails in curand (see #429) - //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, ROCRAND_ORDERING_PSEUDO_SEEDED ) ); // fails in curand (see #429) + //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_LEGACY ) ); + checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_BEST ) ); + //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_DYNAMIC ) ); + //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_SEEDED ) ); */ } From bd7395dadb2db54e1eb4b68a59b2999236ee8e59 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 2 Feb 2024 17:07:13 +0200 Subject: [PATCH 20/47] [rocrand] in gg_tt.mad check_sa.cc, clarify that RocrandHost is not yet implemented --- .../gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc index 4e06c88737..d948576cb6 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc @@ -224,7 +224,9 @@ main( int argc, char** argv ) #ifdef MGONGPU_HAS_NO_ROCRAND throw std::runtime_error( "RocrandHost is not supported because this application was built without Rocrand support" ); #else - rndgen = RandomNumberMode::RocrandHost; + // See https://github.com/ROCm/hipRAND/issues/76 + throw std::runtime_error( "RocrandRandomNumberKernel on host is not supported yet (hiprandCreateGeneratorHost is not implemented yet)" ); + //rndgen = RandomNumberMode::RocrandHost; #endif } else if( arg == "--common" ) @@ -300,8 +302,11 @@ main( int argc, char** argv ) if( rmbsmp == RamboSamplingMode::RamboHost && rndgen == RandomNumberMode::RocrandDevice ) { #if not defined MGONGPU_HAS_NO_ROCRAND - std::cout << "WARNING! RamboHost selected: cannot use RocrandDevice, will use RocrandHost" << std::endl; - rndgen = RandomNumberMode::RocrandHost; + // See https://github.com/ROCm/hipRAND/issues/76 + //std::cout << "WARNING! RamboHost selected: cannot use RocrandDevice, will use RocrandHost" << std::endl; + //rndgen = RandomNumberMode::RocrandHost; + std::cout << "WARNING! RamboHost selected: cannot use RocrandDevice, will use CommonRandom (as RocrandHost is not implemented yet)" << std::endl; + rndgen = RandomNumberMode::CommonRandom; #else std::cout << "WARNING! RamboHost selected: cannot use RocrandDevice, will use CommonRandom" << std::endl; rndgen = RandomNumberMode::CommonRandom; From 3b3b654fd2382715a9065d0d3e0afc5b64a1ba94 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 2 Feb 2024 17:25:31 +0200 Subject: [PATCH 21/47] [rocrand] in gg_tt.mad, rename rocrand as hiprand in all files names annd in all variables inside all files --- ...Kernel.cc => HiprandRandomNumberKernel.cc} | 26 +++--- .../P1_gg_ttx/HiprandRandomNumberKernel.cc | 1 + .../P1_gg_ttx/RocrandRandomNumberKernel.cc | 1 - .../SubProcesses/P1_gg_ttx/check_sa.cc | 90 +++++++++---------- .../P1_gg_ttx/gHiprandRandomNumberKernel.cu | 1 + .../P1_gg_ttx/gRocrandRandomNumberKernel.cu | 1 - .../SubProcesses/RandomNumberKernels.h | 12 +-- .../cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk | 54 +++++------ epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk | 4 +- epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h | 22 ++--- 10 files changed, 106 insertions(+), 106 deletions(-) rename epochX/cudacpp/gg_tt.mad/SubProcesses/{RocrandRandomNumberKernel.cc => HiprandRandomNumberKernel.cc} (85%) create mode 120000 epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HiprandRandomNumberKernel.cc delete mode 120000 epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/RocrandRandomNumberKernel.cc create mode 120000 epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/gHiprandRandomNumberKernel.cu delete mode 120000 epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/gRocrandRandomNumberKernel.cu diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/HiprandRandomNumberKernel.cc similarity index 85% rename from epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc rename to epochX/cudacpp/gg_tt.mad/SubProcesses/HiprandRandomNumberKernel.cc index 187f541420..2e4534f9d4 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/RocrandRandomNumberKernel.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/HiprandRandomNumberKernel.cc @@ -11,7 +11,7 @@ #include -#ifndef MGONGPU_HAS_NO_ROCRAND /* clang-format off */ +#ifndef MGONGPU_HAS_NO_HIPRAND /* clang-format off */ #ifndef __HIP_PLATFORM_AMD__ #define __HIP_PLATFORM_AMD__ 1 // enable hiprand for AMD (rocrand) #endif @@ -34,8 +34,8 @@ namespace mg5amcCpu #endif { //-------------------------------------------------------------------------- -#ifndef MGONGPU_HAS_NO_ROCRAND - RocrandRandomNumberKernel::RocrandRandomNumberKernel( BufferRndNumMomenta& rnarray, const bool onDevice ) +#ifndef MGONGPU_HAS_NO_HIPRAND + HiprandRandomNumberKernel::HiprandRandomNumberKernel( BufferRndNumMomenta& rnarray, const bool onDevice ) : RandomNumberKernelBase( rnarray ) , m_isOnDevice( onDevice ) { @@ -43,29 +43,29 @@ namespace mg5amcCpu { #ifdef MGONGPUCPP_GPUIMPL if( !m_rnarray.isOnDevice() ) - throw std::runtime_error( "RocrandRandomNumberKernel on device with a host random number array" ); + throw std::runtime_error( "HiprandRandomNumberKernel on device with a host random number array" ); #else - throw std::runtime_error( "RocrandRandomNumberKernel does not support HiprandDevice on CPU host" ); + throw std::runtime_error( "HiprandRandomNumberKernel does not support HiprandDevice on CPU host" ); #endif } else { if( m_rnarray.isOnDevice() ) - throw std::runtime_error( "RocrandRandomNumberKernel on host with a device random number array" ); + throw std::runtime_error( "HiprandRandomNumberKernel on host with a device random number array" ); } createGenerator(); } //-------------------------------------------------------------------------- - RocrandRandomNumberKernel::~RocrandRandomNumberKernel() + HiprandRandomNumberKernel::~HiprandRandomNumberKernel() { destroyGenerator(); } //-------------------------------------------------------------------------- - void RocrandRandomNumberKernel::seedGenerator( const unsigned int seed ) + void HiprandRandomNumberKernel::seedGenerator( const unsigned int seed ) { if( m_isOnDevice ) { @@ -78,7 +78,7 @@ namespace mg5amcCpu //-------------------------------------------------------------------------- - void RocrandRandomNumberKernel::createGenerator() + void HiprandRandomNumberKernel::createGenerator() { //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_DEFAULT; //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_XORWOW; @@ -93,7 +93,7 @@ namespace mg5amcCpu else { // See https://github.com/ROCm/hipRAND/issues/76 - throw std::runtime_error( "RocrandRandomNumberKernel on host is not supported yet (hiprandCreateGeneratorHost is not implemented yet)" ); + throw std::runtime_error( "HiprandRandomNumberKernel on host is not supported yet (hiprandCreateGeneratorHost is not implemented yet)" ); //checkHiprand( hiprandCreateGeneratorHost( &m_rnGen, type ) ); // ALWAYS FAILS WITH CODE=1000 } // FIXME: hiprand ordering is not implemented yet @@ -108,14 +108,14 @@ namespace mg5amcCpu //-------------------------------------------------------------------------- - void RocrandRandomNumberKernel::destroyGenerator() + void HiprandRandomNumberKernel::destroyGenerator() { checkHiprand( hiprandDestroyGenerator( m_rnGen ) ); } //-------------------------------------------------------------------------- - void RocrandRandomNumberKernel::generateRnarray() + void HiprandRandomNumberKernel::generateRnarray() { #if defined MGONGPU_FPTYPE_DOUBLE checkHiprand( hiprandGenerateUniformDouble( m_rnGen, m_rnarray.data(), m_rnarray.size() ) ); @@ -123,7 +123,7 @@ namespace mg5amcCpu checkHiprand( hiprandGenerateUniform( m_rnGen, m_rnarray.data(), m_rnarray.size() ) ); #endif /* - printf( "\nRocrandRandomNumberKernel::generateRnarray size = %d\n", (int)m_rnarray.size() ); + printf( "\nHiprandRandomNumberKernel::generateRnarray size = %d\n", (int)m_rnarray.size() ); fptype* data = m_rnarray.data(); #ifdef MGONGPUCPP_GPUIMPL if( m_rnarray.isOnDevice() ) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HiprandRandomNumberKernel.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HiprandRandomNumberKernel.cc new file mode 120000 index 0000000000..6691864f78 --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/HiprandRandomNumberKernel.cc @@ -0,0 +1 @@ +../HiprandRandomNumberKernel.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/RocrandRandomNumberKernel.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/RocrandRandomNumberKernel.cc deleted file mode 120000 index ab1b0c4ce1..0000000000 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/RocrandRandomNumberKernel.cc +++ /dev/null @@ -1 +0,0 @@ -../RocrandRandomNumberKernel.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc index d948576cb6..ccad53b082 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc @@ -58,7 +58,7 @@ int usage( char* argv0, int ret = 1 ) { std::cout << "Usage: " << argv0 - << " [--verbose|-v] [--debug|-d] [--performance|-p] [--json|-j] [--curhst|--curdev|--rorhst|--rordev|--common] [--rmbhst|--rmbdev] [--bridge]" + << " [--verbose|-v] [--debug|-d] [--performance|-p] [--json|-j] [--curhst|--curdev|--hirhst|--hirdev|--common] [--rmbhst|--rmbdev] [--bridge]" << " [#gpuBlocksPerGrid #gpuThreadsPerBlock] #iterations" << std::endl; std::cout << std::endl; std::cout << "The number of events per iteration is #gpuBlocksPerGrid * #gpuThreadsPerBlock" << std::endl; @@ -133,8 +133,8 @@ main( int argc, char** argv ) CommonRandom = 0, CurandHost = -1, CurandDevice = 1, - RocrandHost = -2, - RocrandDevice = 2 + HiprandHost = -2, + HiprandDevice = 2 }; #if defined __CUDACC__ #ifndef MGONGPU_HAS_NO_CURAND @@ -143,18 +143,18 @@ main( int argc, char** argv ) RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on NVidia GPU if build has no curand (PR #784 and #785) #endif #elif defined __HIPCC__ -#ifndef MGONGPU_HAS_NO_ROCRAND - RandomNumberMode rndgen = RandomNumberMode::RocrandDevice; // default on AMD GPU if build has rocrand +#ifndef MGONGPU_HAS_NO_HIPRAND + RandomNumberMode rndgen = RandomNumberMode::HiprandDevice; // default on AMD GPU if build has hiprand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on AMD GPU if build has no rocrand + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on AMD GPU if build has no hiprand #endif #else #ifndef MGONGPU_HAS_NO_CURAND RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand -#elif not defined MGONGPU_HAS_NO_ROCRAND - RandomNumberMode rndgen = RandomNumberMode::RocrandDevice; // default on CPU if build has rocrand +#elif not defined MGONGPU_HAS_NO_HIPRAND + RandomNumberMode rndgen = RandomNumberMode::HiprandDevice; // default on CPU if build has hiprand #else - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has neither curand nor rocrand + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has neither curand nor hiprand #endif #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) @@ -209,24 +209,24 @@ main( int argc, char** argv ) rndgen = RandomNumberMode::CurandHost; #endif } - else if( arg == "--rordev" ) + else if( arg == "--hirdev" ) { #ifndef __HIPCC__ - throw std::runtime_error( "RocrandDevice is not supported on CPUs or non-AMD GPUs" ); -#elif defined MGONGPU_HAS_NO_ROCRAND - throw std::runtime_error( "RocrandDevice is not supported because this application was built without Rocrand support" ); + throw std::runtime_error( "HiprandDevice is not supported on CPUs or non-AMD GPUs" ); +#elif defined MGONGPU_HAS_NO_HIPRAND + throw std::runtime_error( "HiprandDevice is not supported because this application was built without Hiprand support" ); #else - rndgen = RandomNumberMode::RocrandDevice; + rndgen = RandomNumberMode::HiprandDevice; #endif } - else if( arg == "--rorhst" ) + else if( arg == "--hirhst" ) { -#ifdef MGONGPU_HAS_NO_ROCRAND - throw std::runtime_error( "RocrandHost is not supported because this application was built without Rocrand support" ); +#ifdef MGONGPU_HAS_NO_HIPRAND + throw std::runtime_error( "HiprandHost is not supported because this application was built without Hiprand support" ); #else // See https://github.com/ROCm/hipRAND/issues/76 - throw std::runtime_error( "RocrandRandomNumberKernel on host is not supported yet (hiprandCreateGeneratorHost is not implemented yet)" ); - //rndgen = RandomNumberMode::RocrandHost; + throw std::runtime_error( "HiprandRandomNumberKernel on host is not supported yet (hiprandCreateGeneratorHost is not implemented yet)" ); + //rndgen = RandomNumberMode::HiprandHost; #endif } else if( arg == "--common" ) @@ -299,16 +299,16 @@ main( int argc, char** argv ) #endif } - if( rmbsmp == RamboSamplingMode::RamboHost && rndgen == RandomNumberMode::RocrandDevice ) + if( rmbsmp == RamboSamplingMode::RamboHost && rndgen == RandomNumberMode::HiprandDevice ) { -#if not defined MGONGPU_HAS_NO_ROCRAND +#if not defined MGONGPU_HAS_NO_HIPRAND // See https://github.com/ROCm/hipRAND/issues/76 - //std::cout << "WARNING! RamboHost selected: cannot use RocrandDevice, will use RocrandHost" << std::endl; - //rndgen = RandomNumberMode::RocrandHost; - std::cout << "WARNING! RamboHost selected: cannot use RocrandDevice, will use CommonRandom (as RocrandHost is not implemented yet)" << std::endl; + //std::cout << "WARNING! RamboHost selected: cannot use HiprandDevice, will use HiprandHost" << std::endl; + //rndgen = RandomNumberMode::HiprandHost; + std::cout << "WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet)" << std::endl; rndgen = RandomNumberMode::CommonRandom; #else - std::cout << "WARNING! RamboHost selected: cannot use RocrandDevice, will use CommonRandom" << std::endl; + std::cout << "WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom" << std::endl; rndgen = RandomNumberMode::CommonRandom; #endif } @@ -463,7 +463,7 @@ main( int argc, char** argv ) std::unique_ptr wavetimes( new double[niter] ); std::unique_ptr wv3atimes( new double[niter] ); - // --- 0c. Create curand, rocrand or common generator + // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; timermap.start( cgenKey ); // Allocate the appropriate RandomNumberKernel @@ -492,24 +492,24 @@ main( int argc, char** argv ) throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) #endif } - else if( rndgen == RandomNumberMode::RocrandHost ) + else if( rndgen == RandomNumberMode::HiprandHost ) { -#ifdef MGONGPU_HAS_NO_ROCRAND - throw std::runtime_error( "INTERNAL ERROR! RocrandHost is not supported because this application was built without Rocrand support" ); // INTERNAL ERROR (no path to this statement) +#ifdef MGONGPU_HAS_NO_HIPRAND + throw std::runtime_error( "INTERNAL ERROR! HiprandHost is not supported because this application was built without Hiprand support" ); // INTERNAL ERROR (no path to this statement) #else const bool onDevice = false; - prnk.reset( new RocrandRandomNumberKernel( hstRndmom, onDevice ) ); + prnk.reset( new HiprandRandomNumberKernel( hstRndmom, onDevice ) ); #endif } - else if( rndgen == RandomNumberMode::RocrandDevice ) + else if( rndgen == RandomNumberMode::HiprandDevice ) { -#ifdef MGONGPU_HAS_NO_ROCRAND - throw std::runtime_error( "INTERNAL ERROR! RocrandDevice is not supported because this application was built without Rocrand support" ); // INTERNAL ERROR (no path to this statement) +#ifdef MGONGPU_HAS_NO_HIPRAND + throw std::runtime_error( "INTERNAL ERROR! HiprandDevice is not supported because this application was built without Hiprand support" ); // INTERNAL ERROR (no path to this statement) #elif defined __HIPCC__ const bool onDevice = true; - prnk.reset( new RocrandRandomNumberKernel( devRndmom, onDevice ) ); + prnk.reset( new HiprandRandomNumberKernel( devRndmom, onDevice ) ); #else - throw std::logic_error( "INTERNAL ERROR! RocrandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) + throw std::logic_error( "INTERNAL ERROR! HiprandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) #endif } else throw std::logic_error( "INTERNAL ERROR! Unknown rndgen value?" ); // INTERNAL ERROR (no path to this statement) @@ -566,7 +566,7 @@ main( int argc, char** argv ) // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** double genrtime = 0; - // --- 1a. Seed rnd generator (to get same results on host and device in curand/rocrand) + // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions // can be called multiple times on the same generator to generate successive // blocks of results. For pseudorandom generators, multiple calls to generation @@ -585,7 +585,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL if( rndgen != RandomNumberMode::CurandDevice && - rndgen != RandomNumberMode::RocrandDevice && + rndgen != RandomNumberMode::HiprandDevice && rmbsmp == RamboSamplingMode::RamboDevice ) { // --- 1c. Copy rndmom from host to device @@ -832,10 +832,10 @@ main( int argc, char** argv ) rndgentxt = "CURAND HOST"; else if( rndgen == RandomNumberMode::CurandDevice ) rndgentxt = "CURAND DEVICE"; - else if( rndgen == RandomNumberMode::RocrandHost ) - rndgentxt = "ROCRND HOST"; - else if( rndgen == RandomNumberMode::RocrandDevice ) - rndgentxt = "ROCRND DEVICE"; + else if( rndgen == RandomNumberMode::HiprandHost ) + rndgentxt = "ROCRAND HOST"; + else if( rndgen == RandomNumberMode::HiprandDevice ) + rndgentxt = "ROCRAND DEVICE"; #ifdef __CUDACC__ rndgentxt += " (CUDA code)"; #elif defined __HIPCC__ @@ -897,10 +897,10 @@ main( int argc, char** argv ) wrkflwtxt += "CURHST+"; else if( rndgen == RandomNumberMode::CurandDevice ) wrkflwtxt += "CURDEV+"; - else if( rndgen == RandomNumberMode::RocrandHost ) - wrkflwtxt += "RORHST+"; - else if( rndgen == RandomNumberMode::RocrandDevice ) - wrkflwtxt += "RORDEV+"; + else if( rndgen == RandomNumberMode::HiprandHost ) + wrkflwtxt += "HIRHST+"; + else if( rndgen == RandomNumberMode::HiprandDevice ) + wrkflwtxt += "HIRDEV+"; else wrkflwtxt += "??????+"; // no path to this statement // -- HOST or DEVICE rambo sampling? diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/gHiprandRandomNumberKernel.cu b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/gHiprandRandomNumberKernel.cu new file mode 120000 index 0000000000..061ddfa5fb --- /dev/null +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/gHiprandRandomNumberKernel.cu @@ -0,0 +1 @@ +HiprandRandomNumberKernel.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/gRocrandRandomNumberKernel.cu b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/gRocrandRandomNumberKernel.cu deleted file mode 120000 index ef5c82ffab..0000000000 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/gRocrandRandomNumberKernel.cu +++ /dev/null @@ -1 +0,0 @@ -RocrandRandomNumberKernel.cc \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h index 5f477f6139..7ed728a26c 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/RandomNumberKernels.h @@ -13,7 +13,7 @@ // Forward definition from curand.h (the full header is only needed in CurandRandomKernel.cc) struct curandGenerator_st; -// Forward definition from hiprand.h (the full header is only needed in RocrandRandomKernel.cc) +// Forward definition from hiprand.h (the full header is only needed in HiprandRandomKernel.cc) struct rocrand_generator_base_type; typedef rocrand_generator_base_type hiprandGenerator_st; @@ -148,16 +148,16 @@ namespace mg5amcCpu //-------------------------------------------------------------------------- - // A class encapsulating ROCRAND random number generation on a CPU host or on a GPU device - class RocrandRandomNumberKernel final : public RandomNumberKernelBase + // A class encapsulating HIPRAND random number generation on a CPU host or on a GPU device + class HiprandRandomNumberKernel final : public RandomNumberKernelBase { public: // Constructor from an existing output buffer - RocrandRandomNumberKernel( BufferRndNumMomenta& rnarray, const bool onDevice ); + HiprandRandomNumberKernel( BufferRndNumMomenta& rnarray, const bool onDevice ); // Destructor - ~RocrandRandomNumberKernel(); + ~HiprandRandomNumberKernel(); // Seed the random number generator void seedGenerator( const unsigned int seed ) override final; @@ -181,7 +181,7 @@ namespace mg5amcCpu // Is this a host or device kernel? const bool m_isOnDevice; - // The rocrand generator + // The hiprand generator // (NB: hiprand.h defines typedef hiprandGenerator_t as a pointer to forward-defined 'struct hiprandGenerator_st') hiprandGenerator_st* m_rnGen; }; diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk index 92b8a2cb4d..045773b5c5 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cudacpp.mk @@ -356,7 +356,7 @@ export OMPFLAGS #------------------------------------------------------------------------------- -#=== Configure defaults and check if user-defined choices exist for RNDGEN (legacy!), HASCURAND, HASROCRAND +#=== Configure defaults and check if user-defined choices exist for RNDGEN (legacy!), HASCURAND, HASHIPRAND # If the legacy RNDGEN exists, this take precedence over any HASCURAND choice (but a warning is printed out) ###$(info RNDGEN=$(RNDGEN)) @@ -383,22 +383,22 @@ ifeq ($(HASCURAND),) endif endif -# Set the default HASROCRAND (rocrand random number generator) choice, if no prior choice exists for HASROCRAND -# (NB: allow HASROCRAND=hasRocrand even if $(GPUCC) does not point to hipcc: assume HIP_HOME was defined correctly...) -ifeq ($(HASROCRAND),) +# Set the default HASHIPRAND (hiprand random number generator) choice, if no prior choice exists for HASHIPRAND +# (NB: allow HASHIPRAND=hasHiprand even if $(GPUCC) does not point to hipcc: assume HIP_HOME was defined correctly...) +ifeq ($(HASHIPRAND),) ifeq ($(GPUCC),) # CPU-only build - override HASROCRAND = hasNoRocrand + override HASHIPRAND = hasNoHiprand else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build - override HASROCRAND = hasRocrand + override HASHIPRAND = hasHiprand else # non-AMD GPU build - override HASROCRAND = hasNoRocrand + override HASHIPRAND = hasNoHiprand endif endif -# Export HASCURAND, HASROCRAND so that it is not necessary to pass them to the src Makefile too +# Export HASCURAND, HASHIPRAND so that it is not necessary to pass them to the src Makefile too # (NB: these variables in cudacpp_src.mk are only used to define the build tag, they are NOT needed for RNDCXXFLAGS or RNDLIBFLAGS) export HASCURAND -export HASROCRAND +export HASHIPRAND #------------------------------------------------------------------------------- @@ -490,10 +490,10 @@ else ifneq ($(HRDCOD),0) endif -#=== Set the CUDA/HIP/C++ compiler and linker flags appropriate to user-defined choices of HASCURAND, HASROCRAND +#=== Set the CUDA/HIP/C++ compiler and linker flags appropriate to user-defined choices of HASCURAND, HASHIPRAND $(info HASCURAND=$(HASCURAND)) -$(info HASROCRAND=$(HASROCRAND)) +$(info HASHIPRAND=$(HASHIPRAND)) override RNDCXXFLAGS= override RNDLIBFLAGS= @@ -506,17 +506,17 @@ else $(error Unknown HASCURAND='$(HASCURAND)': only 'hasCurand' and 'hasNoCurand' are supported) endif -# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASROCRAND choice (example: "make HASROCRAND=hasNoRocrand") -ifeq ($(HASROCRAND),hasNoRocrand) - override RNDCXXFLAGS += -DMGONGPU_HAS_NO_ROCRAND -else ifeq ($(HASROCRAND),hasRocrand) +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASHIPRAND choice (example: "make HASHIPRAND=hasNoHiprand") +ifeq ($(HASHIPRAND),hasNoHiprand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_HIPRAND +else ifeq ($(HASHIPRAND),hasHiprand) override RNDLIBFLAGS += -L$(HIP_HOME)/lib/ -lhiprand -else ifneq ($(HASROCRAND),hasRocrand) - $(error Unknown HASROCRAND='$(HASROCRAND)': only 'hasRocrand' and 'hasNoRocrand' are supported) +else ifneq ($(HASHIPRAND),hasHiprand) + $(error Unknown HASHIPRAND='$(HASHIPRAND)': only 'hasHiprand' and 'hasNoHiprand' are supported) endif #$(info RNDCXXFLAGS=$(RNDCXXFLAGS)) -#$(info HASROCRAND=$(HASROCRAND)) +#$(info HASHIPRAND=$(HASHIPRAND)) #------------------------------------------------------------------------------- @@ -528,7 +528,7 @@ override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) # Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) # (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) -override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASROCRAND) +override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASHIPRAND) # Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 ifeq ($(USEBUILDDIR),1) @@ -641,13 +641,13 @@ $(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) $(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/RocrandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) -$(BUILDDIR)/gRocrandRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/gHiprandRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) ifeq ($(HASCURAND),hasCurand) # curand headers, #679 $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif -ifeq ($(HASROCRAND),hasRocrand) # rocrand headers -$(BUILDDIR)/RocrandRandomNumberKernel.o: CXXFLAGS += $(HIPINC) +ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers +$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIPINC) endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) @@ -725,8 +725,8 @@ endif # Target (and build rules): C++ and CUDA standalone executables $(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/RocrandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/RocrandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o + $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) @@ -736,8 +736,8 @@ else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 $(cu_main): LIBFLAGS += -L$(patsubst %bin/nvc++,%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif $(cu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cu_main): $(BUILDDIR)/gcheck_sa.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o $(BUILDDIR)/gRocrandRandomNumberKernel.o - $(GPUCC) -o $@ $(BUILDDIR)/gcheck_sa.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o $(BUILDDIR)/gRocrandRandomNumberKernel.o $(RNDLIBFLAGS) +$(cu_main): $(BUILDDIR)/gcheck_sa.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o $(BUILDDIR)/gHiprandRandomNumberKernel.o + $(GPUCC) -o $@ $(BUILDDIR)/gcheck_sa.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o $(BUILDDIR)/gHiprandRandomNumberKernel.o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk b/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk index 99fd00c01b..05c479d395 100644 --- a/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk +++ b/epochX/cudacpp/gg_tt.mad/src/cudacpp_src.mk @@ -87,7 +87,7 @@ endif #------------------------------------------------------------------------------- #=== Set the CUDA/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD (exported from cudacpp.mk) -#=== (NB the RNDCXXFLAGS and RNDLIBFLAGS appropriate to user-defined choices of HASCURAND and HASROCRAND have been exported from cudacpp.mk) +#=== (NB the RNDCXXFLAGS and RNDLIBFLAGS appropriate to user-defined choices of HASCURAND and HASHIPRAND have been exported from cudacpp.mk) # Set the build flags appropriate to OMPFLAGS ###$(info OMPFLAGS=$(OMPFLAGS)) @@ -186,7 +186,7 @@ override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) # Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) # (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) -override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASROCRAND) +override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASHIPRAND) # Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 ###$(info Current directory is $(shell pwd)) diff --git a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h index 6a78d71c0a..d64d025629 100644 --- a/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h +++ b/epochX/cudacpp/gg_tt.mad/src/mgOnGpuConfig.h @@ -21,14 +21,14 @@ #endif // Make sure that __HIP_PLATFORM_NVIDIA__ is undefined -// (__HIP_PLATFORM_AMD__ is defined by hipcc or in RocrandRandomNumberKernel.cc) +// (__HIP_PLATFORM_AMD__ is defined by hipcc or in HiprandRandomNumberKernel.cc) #undef __HIP_PLATFORM_NVIDIA__ // disable hiprand for NVidia (curand) // ** NB1 Throughputs (e.g. 6.8E8) are events/sec for "./gcheck.exe -p 65536 128 12" // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For HIP, by default, do not allow curand to be used (rocrand or common random numbers will be used instead) +// For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead) // For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785) #if defined __HIPCC__ @@ -43,19 +43,19 @@ //#endif #endif -// Choose if rocrand is supported for generating random numbers -// For CUDA, by default, do not allow rocrand to be used (curand or common random numbers will be used instead) -// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_ROCRAND -// (there may exist HIP installations which do not include rocrand?) +// Choose if hiprand is supported for generating random numbers +// For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead) +// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND +// (there may exist HIP installations which do not include hiprand?) #if defined __CUDACC__ -#define MGONGPU_HAS_NO_ROCRAND 1 +#define MGONGPU_HAS_NO_HIPRAND 1 #else //#ifdef __HIPCC__ -//#undef MGONGPU_HAS_NO_ROCRAND // default -////#define MGONGPU_HAS_NO_ROCRAND 1 +//#undef MGONGPU_HAS_NO_HIPRAND // default +////#define MGONGPU_HAS_NO_HIPRAND 1 //#else -//#undef MGONGPU_HAS_NO_ROCRAND // default -////#define MGONGPU_HAS_NO_ROCRAND 1 +//#undef MGONGPU_HAS_NO_HIPRAND // default +////#define MGONGPU_HAS_NO_HIPRAND 1 //#endif #endif From 8941a64f8e0e0f326b34104a7ab2128ee56c9e83 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Tue, 30 Jan 2024 01:15:05 +0100 Subject: [PATCH 22/47] [jt774] in tput scripts, add -rorhst and -hip flags --- epochX/cudacpp/tput/allTees.sh | 11 ++++++++--- epochX/cudacpp/tput/teeThroughputX.sh | 2 ++ epochX/cudacpp/tput/throughputX.sh | 3 +++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/tput/allTees.sh b/epochX/cudacpp/tput/allTees.sh index 4d1599e547..9be4f5d4fc 100755 --- a/epochX/cudacpp/tput/allTees.sh +++ b/epochX/cudacpp/tput/allTees.sh @@ -12,6 +12,7 @@ suff=".mad" # Parse command line arguments ggttggg=-ggttggg +rndhst=-curhst while [ "$1" != "" ]; do if [ "$1" == "-short" ]; then # Short (no ggttggg) or long version? @@ -30,8 +31,12 @@ while [ "$1" != "" ]; do # Only build all tests instead of building and running them? opts+=" -makeonly" shift + elif [ "$1" == "-hip" ]; then + # Random numbers use rocrand instead of curand? + rndhst=-rorhst + shift else - echo "Usage: $0 [-short] [-e] [-sa] [-makeonly]" + echo "Usage: $0 [-short] [-e] [-sa] [-makeonly] [-hip]" exit 1 fi done @@ -70,8 +75,8 @@ cmd="./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst ${opts}" $cmd; status=$? ended4="$cmd\nENDED(4) AT $(date) [Status=$status]" -# (72/78) Two extra logs (double/float x hrd0 x inl0 + curhst) only in three of the six processes (no rebuild needed) -cmd="./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -curhst ${opts}" +# (72/78) Two extra logs (double/float x hrd0 x inl0 + rndhst) only in three of the six processes (no rebuild needed) +cmd="./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt ${rndhst} ${opts}" $cmd; status=$? ended5="$cmd\nENDED(5) AT $(date) [Status=$status]" diff --git a/epochX/cudacpp/tput/teeThroughputX.sh b/epochX/cudacpp/tput/teeThroughputX.sh index bd478452ac..de0a1e912a 100755 --- a/epochX/cudacpp/tput/teeThroughputX.sh +++ b/epochX/cudacpp/tput/teeThroughputX.sh @@ -93,6 +93,8 @@ for arg in $*; do rndgen=$arg elif [ "$arg" == "-curhst" ]; then rndgen=$arg + elif [ "$arg" == "-rorhst" ]; then + rndgen=$arg elif [ "$arg" == "-rmbhst" ]; then rmbsmp=$arg elif [ "$arg" == "-bridge" ]; then diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index 1e5b427b1f..503d060237 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -187,6 +187,9 @@ while [ "$1" != "" ]; do elif [ "$1" == "-curhst" ]; then rndgen=" -${1}" shift + elif [ "$1" == "-rorhst" ]; then + rndgen=" -${1}" + shift elif [ "$1" == "-rmbhst" ]; then rmbsmp=" -${1}" shift From a16495b4f0a3ef537ab4be6662a3f0c700107094 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 2 Feb 2024 17:28:03 +0200 Subject: [PATCH 23/47] [rocrand] in tput scripts, rename -rorhst as -hirhst (hiprand host instead of rocrand host) --- epochX/cudacpp/tput/allTees.sh | 4 ++-- epochX/cudacpp/tput/throughputX.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/epochX/cudacpp/tput/allTees.sh b/epochX/cudacpp/tput/allTees.sh index 9be4f5d4fc..14ae997d81 100755 --- a/epochX/cudacpp/tput/allTees.sh +++ b/epochX/cudacpp/tput/allTees.sh @@ -32,8 +32,8 @@ while [ "$1" != "" ]; do opts+=" -makeonly" shift elif [ "$1" == "-hip" ]; then - # Random numbers use rocrand instead of curand? - rndhst=-rorhst + # Random numbers use hiprand instead of curand? + rndhst=-hirhst shift else echo "Usage: $0 [-short] [-e] [-sa] [-makeonly] [-hip]" diff --git a/epochX/cudacpp/tput/throughputX.sh b/epochX/cudacpp/tput/throughputX.sh index 503d060237..efb282fc58 100755 --- a/epochX/cudacpp/tput/throughputX.sh +++ b/epochX/cudacpp/tput/throughputX.sh @@ -187,7 +187,7 @@ while [ "$1" != "" ]; do elif [ "$1" == "-curhst" ]; then rndgen=" -${1}" shift - elif [ "$1" == "-rorhst" ]; then + elif [ "$1" == "-hirhst" ]; then rndgen=" -${1}" shift elif [ "$1" == "-rmbhst" ]; then From 755020dc744c92368931363c1855b0da42be1d7e Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 2 Feb 2024 17:30:12 +0200 Subject: [PATCH 24/47] [rocrand] in tput scripts, use common random for HIP instead of hiprand host, since the latter is not yet implemented --- epochX/cudacpp/tput/allTees.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/tput/allTees.sh b/epochX/cudacpp/tput/allTees.sh index 14ae997d81..4df3f34210 100755 --- a/epochX/cudacpp/tput/allTees.sh +++ b/epochX/cudacpp/tput/allTees.sh @@ -32,8 +32,11 @@ while [ "$1" != "" ]; do opts+=" -makeonly" shift elif [ "$1" == "-hip" ]; then - # Random numbers use hiprand instead of curand? - rndhst=-hirhst + #### Random numbers use hiprand instead of curand? + ###rndhst=-hirhst + # See https://github.com/ROCm/hipRAND/issues/76 + # Random numbers use common (not hiprand) instead of curand? + rndhst=-common shift else echo "Usage: $0 [-short] [-e] [-sa] [-makeonly] [-hip]" From a5156063417f60bebc2880c36aa3f60a622fecdf Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 2 Feb 2024 17:04:48 +0100 Subject: [PATCH 25/47] [rocrand] fix clang formatting for itscrd80 in CODEGEN runTest.cc --- .../madgraph/iolibs/template_files/gpu/runTest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/runTest.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/runTest.cc index de327f2321..7f248d29a4 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/runTest.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/runTest.cc @@ -238,7 +238,7 @@ struct CUDATest : public CUDA_CPU_TestBase return MemoryAccessMatrixElements::ieventAccessConst( hstMatrixElements.data(), ievt ); } }; -#endif +#endif /* clang-format off */ // Use two levels of macros to force stringification at the right level // (see https://gcc.gnu.org/onlinedocs/gcc-3.0.1/cpp_3.html#SEC17 and https://stackoverflow.com/a/3419392) @@ -260,4 +260,4 @@ INSTANTIATE_TEST_SUITE_P( prefix, \ MG_INSTANTIATE_TEST_SUITE_GPU( XTESTID_GPU( MG_EPOCH_PROCESS_ID ), MadgraphTest ); #else MG_INSTANTIATE_TEST_SUITE_CPU( XTESTID_CPU( MG_EPOCH_PROCESS_ID ), MadgraphTest ); -#endif +#endif /* clang-format on */ From 7a29e44146821684ebeb239c3baf3bd80d2f89f4 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 2 Feb 2024 17:06:43 +0100 Subject: [PATCH 26/47] [rocrand] in CODEGEN, backport from gg_tt.mad the changes adding hiprand support (clarify however that hiprand host and hiprand ordering are not yet implemented) --- .../gpu/CurandRandomNumberKernel.cc | 1 + .../gpu/HiprandRandomNumberKernel.cc | 145 ++++++++++++++++++ .../template_files/gpu/RandomNumberKernels.h | 60 ++++++-- .../iolibs/template_files/gpu/check_sa.cc | 111 ++++++++++++-- .../iolibs/template_files/gpu/cudacpp.mk | 142 +++++++++++------ .../iolibs/template_files/gpu/cudacpp_src.mk | 17 +- .../iolibs/template_files/gpu/mgOnGpuConfig.h | 22 ++- .../CUDACPP_SA_OUTPUT/model_handling.py | 1 + .../PLUGIN/CUDACPP_SA_OUTPUT/output.py | 7 +- 9 files changed, 418 insertions(+), 88 deletions(-) create mode 100644 epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/HiprandRandomNumberKernel.cc diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CurandRandomNumberKernel.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CurandRandomNumberKernel.cc index 08a16f6f2c..c160c5e06b 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CurandRandomNumberKernel.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/CurandRandomNumberKernel.cc @@ -10,6 +10,7 @@ #include #ifndef MGONGPU_HAS_NO_CURAND /* clang-format off */ +// NB This must come AFTER mgOnGpuConfig.h which contains our definition of __global__ when MGONGPUCPP_GPUIMPL is not defined #include "curand.h" #define checkCurand( code ){ assertCurand( code, __FILE__, __LINE__ ); } inline void assertCurand( curandStatus_t code, const char *file, int line, bool abort = true ) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/HiprandRandomNumberKernel.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/HiprandRandomNumberKernel.cc new file mode 100644 index 0000000000..2e4534f9d4 --- /dev/null +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/HiprandRandomNumberKernel.cc @@ -0,0 +1,145 @@ +// Copyright (C) 2020-2024 CERN and UCLouvain. +// Licensed under the GNU Lesser General Public License (version 3 or later). +// Created by: A. Valassi (Jan 2024) for the MG5aMC CUDACPP plugin. +// Further modified by: A. Valassi (2024) for the MG5aMC CUDACPP plugin. + +#include "mgOnGpuConfig.h" + +#include "GpuRuntime.h" +#include "MemoryBuffers.h" +#include "RandomNumberKernels.h" + +#include + +#ifndef MGONGPU_HAS_NO_HIPRAND /* clang-format off */ +#ifndef __HIP_PLATFORM_AMD__ +#define __HIP_PLATFORM_AMD__ 1 // enable hiprand for AMD (rocrand) +#endif +#include +#define checkHiprand( code ){ assertHiprand( code, __FILE__, __LINE__ ); } +inline void assertHiprand( hiprandStatus_t code, const char *file, int line, bool abort = true ) +{ + if ( code != HIPRAND_STATUS_SUCCESS ) + { + printf( "HiprandAssert: %s:%d code=%d\n", file, line, code ); + if ( abort ) assert( code == HIPRAND_STATUS_SUCCESS ); + } +} +#endif /* clang-format on */ + +#ifdef MGONGPUCPP_GPUIMPL +namespace mg5amcGpu +#else +namespace mg5amcCpu +#endif +{ + //-------------------------------------------------------------------------- +#ifndef MGONGPU_HAS_NO_HIPRAND + HiprandRandomNumberKernel::HiprandRandomNumberKernel( BufferRndNumMomenta& rnarray, const bool onDevice ) + : RandomNumberKernelBase( rnarray ) + , m_isOnDevice( onDevice ) + { + if( m_isOnDevice ) + { +#ifdef MGONGPUCPP_GPUIMPL + if( !m_rnarray.isOnDevice() ) + throw std::runtime_error( "HiprandRandomNumberKernel on device with a host random number array" ); +#else + throw std::runtime_error( "HiprandRandomNumberKernel does not support HiprandDevice on CPU host" ); +#endif + } + else + { + if( m_rnarray.isOnDevice() ) + throw std::runtime_error( "HiprandRandomNumberKernel on host with a device random number array" ); + } + createGenerator(); + } + + //-------------------------------------------------------------------------- + + HiprandRandomNumberKernel::~HiprandRandomNumberKernel() + { + destroyGenerator(); + } + + //-------------------------------------------------------------------------- + + void HiprandRandomNumberKernel::seedGenerator( const unsigned int seed ) + { + if( m_isOnDevice ) + { + destroyGenerator(); // workaround for #429 + createGenerator(); // workaround for #429 + } + //printf( "seedGenerator: seed %d\n", seed ); + checkHiprand( hiprandSetPseudoRandomGeneratorSeed( m_rnGen, seed ) ); + } + + //-------------------------------------------------------------------------- + + void HiprandRandomNumberKernel::createGenerator() + { + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_DEFAULT; + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_XORWOW; + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_MRG32K3A; + const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_MTGP32; // same as curand; not implemented yet (code=1000) in host code + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_MT19937; + //const hiprandRngType_t type = HIPRAND_RNG_PSEUDO_PHILOX4_32_10; + if( m_isOnDevice ) + { + checkHiprand( hiprandCreateGenerator( &m_rnGen, type ) ); + } + else + { + // See https://github.com/ROCm/hipRAND/issues/76 + throw std::runtime_error( "HiprandRandomNumberKernel on host is not supported yet (hiprandCreateGeneratorHost is not implemented yet)" ); + //checkHiprand( hiprandCreateGeneratorHost( &m_rnGen, type ) ); // ALWAYS FAILS WITH CODE=1000 + } + // FIXME: hiprand ordering is not implemented yet + // See https://github.com/ROCm/hipRAND/issues/75 + /* + //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_LEGACY ) ); + checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_BEST ) ); + //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_DYNAMIC ) ); + //checkHiprand( hiprandSetGeneratorOrdering( *&m_rnGen, HIPRAND_ORDERING_PSEUDO_SEEDED ) ); + */ + } + + //-------------------------------------------------------------------------- + + void HiprandRandomNumberKernel::destroyGenerator() + { + checkHiprand( hiprandDestroyGenerator( m_rnGen ) ); + } + + //-------------------------------------------------------------------------- + + void HiprandRandomNumberKernel::generateRnarray() + { +#if defined MGONGPU_FPTYPE_DOUBLE + checkHiprand( hiprandGenerateUniformDouble( m_rnGen, m_rnarray.data(), m_rnarray.size() ) ); +#elif defined MGONGPU_FPTYPE_FLOAT + checkHiprand( hiprandGenerateUniform( m_rnGen, m_rnarray.data(), m_rnarray.size() ) ); +#endif + /* + printf( "\nHiprandRandomNumberKernel::generateRnarray size = %d\n", (int)m_rnarray.size() ); + fptype* data = m_rnarray.data(); +#ifdef MGONGPUCPP_GPUIMPL + if( m_rnarray.isOnDevice() ) + { + data = new fptype[m_rnarray.size()](); + checkCuda( cudaMemcpy( data, m_rnarray.data(), m_rnarray.bytes(), cudaMemcpyDeviceToHost ) ); + } +#endif + for( int i = 0; i < ( (int)m_rnarray.size() / 4 ); i++ ) + printf( "[%4d] %f %f %f %f\n", i * 4, data[i * 4], data[i * 4 + 2], data[i * 4 + 2], data[i * 4 + 3] ); +#ifdef MGONGPUCPP_GPUIMPL + if( m_rnarray.isOnDevice() ) delete[] data; +#endif + */ + } + + //-------------------------------------------------------------------------- +#endif +} diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/RandomNumberKernels.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/RandomNumberKernels.h index 21d63beeac..7ed728a26c 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/RandomNumberKernels.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/RandomNumberKernels.h @@ -1,21 +1,22 @@ -// Copyright (C) 2020-2023 CERN and UCLouvain. +// Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Created by: A. Valassi (Dec 2021) for the MG5aMC CUDACPP plugin. -// Further modified by: J. Teig, A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin. #ifndef RANDOMNUMBERKERNELS_H #define RANDOMNUMBERKERNELS_H 1 #include "mgOnGpuConfig.h" -// NB This must come AFTER mgOnGpuConfig.h which contains our definition of __global__ when MGONGPUCPP_GPUIMPL is not defined -#ifndef MGONGPU_HAS_NO_CURAND -//#include "curand.h" -struct curandGenerator_st; // forward definition from curand.h -#endif - #include "MemoryBuffers.h" +// Forward definition from curand.h (the full header is only needed in CurandRandomKernel.cc) +struct curandGenerator_st; + +// Forward definition from hiprand.h (the full header is only needed in HiprandRandomKernel.cc) +struct rocrand_generator_base_type; +typedef rocrand_generator_base_type hiprandGenerator_st; + #ifdef MGONGPUCPP_GPUIMPL namespace mg5amcGpu #else @@ -107,7 +108,6 @@ namespace mg5amcCpu //-------------------------------------------------------------------------- -#ifndef MGONGPU_HAS_NO_CURAND // A class encapsulating CURAND random number generation on a CPU host or on a GPU device class CurandRandomNumberKernel final : public RandomNumberKernelBase { @@ -142,11 +142,49 @@ namespace mg5amcCpu const bool m_isOnDevice; // The curand generator - // (NB: curand.h defines typedef generator_t as a pointer to forward-defined 'struct curandGenerator_st') + // (NB: curand.h defines typedef curandGenerator_t as a pointer to forward-defined 'struct curandGenerator_st') curandGenerator_st* m_rnGen; }; -#endif + //-------------------------------------------------------------------------- + + // A class encapsulating HIPRAND random number generation on a CPU host or on a GPU device + class HiprandRandomNumberKernel final : public RandomNumberKernelBase + { + public: + + // Constructor from an existing output buffer + HiprandRandomNumberKernel( BufferRndNumMomenta& rnarray, const bool onDevice ); + + // Destructor + ~HiprandRandomNumberKernel(); + + // Seed the random number generator + void seedGenerator( const unsigned int seed ) override final; + + // Generate the random number array + void generateRnarray() override final; + + // Is this a host or device kernel? + bool isOnDevice() const override final { return m_isOnDevice; } + + private: + + // Create the generator (workaround for #429: do this in every seedGenerator call rather than only in the ctor) + void createGenerator(); + + // Destroy the generator (workaround for #429: do this in every seedGenerator call rather than only in the ctor) + void destroyGenerator(); + + private: + + // Is this a host or device kernel? + const bool m_isOnDevice; + + // The hiprand generator + // (NB: hiprand.h defines typedef hiprandGenerator_t as a pointer to forward-defined 'struct hiprandGenerator_st') + hiprandGenerator_st* m_rnGen; + }; //-------------------------------------------------------------------------- } diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc index aab490dc5b..ccad53b082 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc @@ -1,10 +1,10 @@ // Copyright (C) 2010 The MadGraph5_aMC@NLO development team and contributors. // Created by: J. Alwall (Oct 2010) for the MG5aMC CPP backend. //========================================================================== -// Copyright (C) 2020-2023 CERN and UCLouvain. +// Copyright (C) 2020-2024 CERN and UCLouvain. // Licensed under the GNU Lesser General Public License (version 3 or later). // Modified by: O. Mattelaer (Nov 2020) for the MG5aMC CUDACPP plugin. -// Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. +// Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin. //========================================================================== #include "mgOnGpuConfig.h" @@ -58,7 +58,7 @@ int usage( char* argv0, int ret = 1 ) { std::cout << "Usage: " << argv0 - << " [--verbose|-v] [--debug|-d] [--performance|-p] [--json|-j] [--curhst|--curdev|--common] [--rmbhst|--rmbdev] [--bridge]" + << " [--verbose|-v] [--debug|-d] [--performance|-p] [--json|-j] [--curhst|--curdev|--hirhst|--hirdev|--common] [--rmbhst|--rmbdev] [--bridge]" << " [#gpuBlocksPerGrid #gpuThreadsPerBlock] #iterations" << std::endl; std::cout << std::endl; std::cout << "The number of events per iteration is #gpuBlocksPerGrid * #gpuThreadsPerBlock" << std::endl; @@ -131,17 +131,31 @@ main( int argc, char** argv ) enum class RandomNumberMode { CommonRandom = 0, - CurandHost = 1, - CurandDevice = 2 + CurandHost = -1, + CurandDevice = 1, + HiprandHost = -2, + HiprandDevice = 2 }; -#ifdef MGONGPU_HAS_NO_CURAND - RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // this is the only supported mode if build has no curand (PR #784 and #785) -#elif defined __HIPCC__ -#error Internal error: MGONGPU_HAS_NO_CURAND should have been set for __HIPCC__ // default on AMD GPUs should be common random -#elif defined __CUDACC__ +#if defined __CUDACC__ +#ifndef MGONGPU_HAS_NO_CURAND RandomNumberMode rndgen = RandomNumberMode::CurandDevice; // default on NVidia GPU if build has curand #else + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on NVidia GPU if build has no curand (PR #784 and #785) +#endif +#elif defined __HIPCC__ +#ifndef MGONGPU_HAS_NO_HIPRAND + RandomNumberMode rndgen = RandomNumberMode::HiprandDevice; // default on AMD GPU if build has hiprand +#else + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on AMD GPU if build has no hiprand +#endif +#else +#ifndef MGONGPU_HAS_NO_CURAND RandomNumberMode rndgen = RandomNumberMode::CurandHost; // default on CPU if build has curand +#elif not defined MGONGPU_HAS_NO_HIPRAND + RandomNumberMode rndgen = RandomNumberMode::HiprandDevice; // default on CPU if build has hiprand +#else + RandomNumberMode rndgen = RandomNumberMode::CommonRandom; // default on CPU if build has neither curand nor hiprand +#endif #endif // Rambo sampling mode (NB RamboHost implies CommonRandom or CurandHost!) enum class RamboSamplingMode @@ -193,6 +207,26 @@ main( int argc, char** argv ) throw std::runtime_error( "CurandHost is not supported because this application was built without Curand support" ); #else rndgen = RandomNumberMode::CurandHost; +#endif + } + else if( arg == "--hirdev" ) + { +#ifndef __HIPCC__ + throw std::runtime_error( "HiprandDevice is not supported on CPUs or non-AMD GPUs" ); +#elif defined MGONGPU_HAS_NO_HIPRAND + throw std::runtime_error( "HiprandDevice is not supported because this application was built without Hiprand support" ); +#else + rndgen = RandomNumberMode::HiprandDevice; +#endif + } + else if( arg == "--hirhst" ) + { +#ifdef MGONGPU_HAS_NO_HIPRAND + throw std::runtime_error( "HiprandHost is not supported because this application was built without Hiprand support" ); +#else + // See https://github.com/ROCm/hipRAND/issues/76 + throw std::runtime_error( "HiprandRandomNumberKernel on host is not supported yet (hiprandCreateGeneratorHost is not implemented yet)" ); + //rndgen = RandomNumberMode::HiprandHost; #endif } else if( arg == "--common" ) @@ -265,6 +299,20 @@ main( int argc, char** argv ) #endif } + if( rmbsmp == RamboSamplingMode::RamboHost && rndgen == RandomNumberMode::HiprandDevice ) + { +#if not defined MGONGPU_HAS_NO_HIPRAND + // See https://github.com/ROCm/hipRAND/issues/76 + //std::cout << "WARNING! RamboHost selected: cannot use HiprandDevice, will use HiprandHost" << std::endl; + //rndgen = RandomNumberMode::HiprandHost; + std::cout << "WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet)" << std::endl; + rndgen = RandomNumberMode::CommonRandom; +#else + std::cout << "WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom" << std::endl; + rndgen = RandomNumberMode::CommonRandom; +#endif + } + constexpr int neppM = MemoryAccessMomenta::neppM; // AOSOA layout constexpr int neppR = MemoryAccessRandomNumbers::neppR; // AOSOA layout @@ -415,7 +463,7 @@ main( int argc, char** argv ) std::unique_ptr wavetimes( new double[niter] ); std::unique_ptr wv3atimes( new double[niter] ); - // --- 0c. Create curand or common generator + // --- 0c. Create curand, hiprand or common generator const std::string cgenKey = "0c GenCreat"; timermap.start( cgenKey ); // Allocate the appropriate RandomNumberKernel @@ -433,7 +481,7 @@ main( int argc, char** argv ) prnk.reset( new CurandRandomNumberKernel( hstRndmom, onDevice ) ); #endif } - else + else if( rndgen == RandomNumberMode::CurandDevice ) { #ifdef MGONGPU_HAS_NO_CURAND throw std::runtime_error( "INTERNAL ERROR! CurandDevice is not supported because this application was built without Curand support" ); // INTERNAL ERROR (no path to this statement) @@ -444,7 +492,28 @@ main( int argc, char** argv ) throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) #endif } - + else if( rndgen == RandomNumberMode::HiprandHost ) + { +#ifdef MGONGPU_HAS_NO_HIPRAND + throw std::runtime_error( "INTERNAL ERROR! HiprandHost is not supported because this application was built without Hiprand support" ); // INTERNAL ERROR (no path to this statement) +#else + const bool onDevice = false; + prnk.reset( new HiprandRandomNumberKernel( hstRndmom, onDevice ) ); +#endif + } + else if( rndgen == RandomNumberMode::HiprandDevice ) + { +#ifdef MGONGPU_HAS_NO_HIPRAND + throw std::runtime_error( "INTERNAL ERROR! HiprandDevice is not supported because this application was built without Hiprand support" ); // INTERNAL ERROR (no path to this statement) +#elif defined __HIPCC__ + const bool onDevice = true; + prnk.reset( new HiprandRandomNumberKernel( devRndmom, onDevice ) ); +#else + throw std::logic_error( "INTERNAL ERROR! HiprandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) +#endif + } + else throw std::logic_error( "INTERNAL ERROR! Unknown rndgen value?" ); // INTERNAL ERROR (no path to this statement) + // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; if( rmbsmp == RamboSamplingMode::RamboHost ) @@ -497,7 +566,7 @@ main( int argc, char** argv ) // *** START THE OLD-STYLE TIMER FOR RANDOM GEN *** double genrtime = 0; - // --- 1a. Seed rnd generator (to get same results on host and device in curand) + // --- 1a. Seed rnd generator (to get same results on host and device in curand/hiprand) // [NB This should not be necessary using the host API: "Generation functions // can be called multiple times on the same generator to generate successive // blocks of results. For pseudorandom generators, multiple calls to generation @@ -515,7 +584,9 @@ main( int argc, char** argv ) //std::cout << "Got random numbers" << std::endl; #ifdef MGONGPUCPP_GPUIMPL - if( rndgen != RandomNumberMode::CurandDevice && rmbsmp == RamboSamplingMode::RamboDevice ) + if( rndgen != RandomNumberMode::CurandDevice && + rndgen != RandomNumberMode::HiprandDevice && + rmbsmp == RamboSamplingMode::RamboDevice ) { // --- 1c. Copy rndmom from host to device const std::string htodKey = "1c CpHTDrnd"; @@ -761,6 +832,10 @@ main( int argc, char** argv ) rndgentxt = "CURAND HOST"; else if( rndgen == RandomNumberMode::CurandDevice ) rndgentxt = "CURAND DEVICE"; + else if( rndgen == RandomNumberMode::HiprandHost ) + rndgentxt = "ROCRAND HOST"; + else if( rndgen == RandomNumberMode::HiprandDevice ) + rndgentxt = "ROCRAND DEVICE"; #ifdef __CUDACC__ rndgentxt += " (CUDA code)"; #elif defined __HIPCC__ @@ -822,6 +897,10 @@ main( int argc, char** argv ) wrkflwtxt += "CURHST+"; else if( rndgen == RandomNumberMode::CurandDevice ) wrkflwtxt += "CURDEV+"; + else if( rndgen == RandomNumberMode::HiprandHost ) + wrkflwtxt += "HIRHST+"; + else if( rndgen == RandomNumberMode::HiprandDevice ) + wrkflwtxt += "HIRDEV+"; else wrkflwtxt += "??????+"; // no path to this statement // -- HOST or DEVICE rambo sampling? @@ -1095,7 +1174,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL //<< "\"Wavefunction GPU memory\": " << "\"LOCAL\"," << std::endl #endif - << "\"Curand generation\": " + << "\"Random generation\": " << "\"" << rndgentxt << "\"," << std::endl; double minelem = hstStats.minME; diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk index 4900a659b2..07c076f443 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp.mk @@ -173,11 +173,6 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),) comma:=, CUARCHFLAGS = $(foreach arch,$(subst $(comma), ,$(MADGRAPH_CUDA_ARCHITECTURE)),-gencode arch=compute_$(arch),code=compute_$(arch) -gencode arch=compute_$(arch),code=sm_$(arch)) CUINC = -I$(CUDA_HOME)/include/ - ifeq ($(RNDGEN),hasNoCurand) - CURANDLIBFLAGS= - else - CURANDLIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! - endif CUOPTFLAGS = -lineinfo ###GPUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math GPUFLAGS = $(foreach opt, $(OPTFLAGS), -Xcompiler $(opt)) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math @@ -241,7 +236,7 @@ else override GPUCC= override USE_NVTX= override CUINC= - override CURANDLIBFLAGS= + override HIPINC= endif @@ -291,7 +286,7 @@ endif #------------------------------------------------------------------------------- -#=== Configure defaults and check if user-defined choices exist for OMPFLAGS, AVX, FPTYPE, HELINL, HRDCOD, RNDGEN +#=== Configure defaults and check if user-defined choices exist for OMPFLAGS, AVX, FPTYPE, HELINL, HRDCOD # Set the default OMPFLAGS choice ifneq ($(findstring hipcc,$(GPUCC)),) @@ -352,29 +347,62 @@ ifeq ($(HRDCOD),) override HRDCOD = 0 endif -# Set the default RNDGEN (random number generator) choice -ifeq ($(RNDGEN),) - ifeq ($(GPUCC),) - override RNDGEN = hasNoCurand - # Edgecase for HIP compilation - else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) - override RNDGEN = hasNoCurand - else ifeq ($(RNDGEN),) - override RNDGEN = hasCurand - endif -endif - -# Export AVX, FPTYPE, HELINL, HRDCOD, RNDGEN, OMPFLAGS so that it is not necessary to pass them to the src Makefile too +# Export AVX, FPTYPE, HELINL, HRDCOD, OMPFLAGS so that it is not necessary to pass them to the src Makefile too export AVX export FPTYPE export HELINL export HRDCOD -export RNDGEN export OMPFLAGS #------------------------------------------------------------------------------- -#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD, RNDGEN +#=== Configure defaults and check if user-defined choices exist for RNDGEN (legacy!), HASCURAND, HASHIPRAND + +# If the legacy RNDGEN exists, this take precedence over any HASCURAND choice (but a warning is printed out) +###$(info RNDGEN=$(RNDGEN)) +ifneq ($(RNDGEN),) + $(warning Environment variable RNDGEN is no longer supported, please use HASCURAND instead!) + ifeq ($(RNDGEN),hasCurand) + override HASCURAND = $(RNDGEN) + else ifeq ($(RNDGEN),hasNoCurand) + override HASCURAND = $(RNDGEN) + else ifneq ($(RNDGEN),hasNoCurand) + $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported - but use HASCURAND instead!) + endif +endif + +# Set the default HASCURAND (curand random number generator) choice, if no prior choice exists for HASCURAND +# (NB: allow HASCURAND=hasCurand even if $(GPUCC) does not point to nvcc: assume CUDA_HOME was defined correctly...) +ifeq ($(HASCURAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASCURAND = hasNoCurand + else ifeq ($(findstring nvcc,$(GPUCC)),nvcc) # Nvidia GPU build + override HASCURAND = hasCurand + else # non-Nvidia GPU build + override HASCURAND = hasNoCurand + endif +endif + +# Set the default HASHIPRAND (hiprand random number generator) choice, if no prior choice exists for HASHIPRAND +# (NB: allow HASHIPRAND=hasHiprand even if $(GPUCC) does not point to hipcc: assume HIP_HOME was defined correctly...) +ifeq ($(HASHIPRAND),) + ifeq ($(GPUCC),) # CPU-only build + override HASHIPRAND = hasNoHiprand + else ifeq ($(findstring hipcc,$(GPUCC)),hipcc) # AMD GPU build + override HASHIPRAND = hasHiprand + else # non-AMD GPU build + override HASHIPRAND = hasNoHiprand + endif +endif + +# Export HASCURAND, HASHIPRAND so that it is not necessary to pass them to the src Makefile too +# (NB: these variables in cudacpp_src.mk are only used to define the build tag, they are NOT needed for RNDCXXFLAGS or RNDLIBFLAGS) +export HASCURAND +export HASHIPRAND + +#------------------------------------------------------------------------------- + +#=== Set the CUDA/HIP/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD # Set the build flags appropriate to OMPFLAGS $(info OMPFLAGS=$(OMPFLAGS)) @@ -432,13 +460,13 @@ CXXFLAGS+= $(AVXFLAGS) $(info FPTYPE=$(FPTYPE)) ifeq ($(FPTYPE),d) CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE - GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_DOUBLE else ifeq ($(FPTYPE),f) CXXFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT - GPUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_FLOAT -DMGONGPU_FPTYPE2_FLOAT else ifeq ($(FPTYPE),m) CXXFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT - GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT + GPUFLAGS += -DMGONGPU_FPTYPE_DOUBLE -DMGONGPU_FPTYPE2_FLOAT else $(error Unknown FPTYPE='$(FPTYPE)': only 'd', 'f' and 'm' are supported) endif @@ -447,7 +475,7 @@ endif $(info HELINL=$(HELINL)) ifeq ($(HELINL),1) CXXFLAGS += -DMGONGPU_INLINE_HELAMPS - GPUFLAGS += -DMGONGPU_INLINE_HELAMPS + GPUFLAGS += -DMGONGPU_INLINE_HELAMPS else ifneq ($(HELINL),0) $(error Unknown HELINL='$(HELINL)': only '0' and '1' are supported) endif @@ -456,21 +484,40 @@ endif $(info HRDCOD=$(HRDCOD)) ifeq ($(HRDCOD),1) CXXFLAGS += -DMGONGPU_HARDCODE_PARAM - GPUFLAGS += -DMGONGPU_HARDCODE_PARAM + GPUFLAGS += -DMGONGPU_HARDCODE_PARAM else ifneq ($(HRDCOD),0) $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) endif -# Set the build flags appropriate to each RNDGEN choice (example: "make RNDGEN=hasNoCurand") -$(info RNDGEN=$(RNDGEN)) -ifeq ($(RNDGEN),hasNoCurand) - override CXXFLAGSCURAND = -DMGONGPU_HAS_NO_CURAND -else ifeq ($(RNDGEN),hasCurand) - override CXXFLAGSCURAND = + +#=== Set the CUDA/HIP/C++ compiler and linker flags appropriate to user-defined choices of HASCURAND, HASHIPRAND + +$(info HASCURAND=$(HASCURAND)) +$(info HASHIPRAND=$(HASHIPRAND)) +override RNDCXXFLAGS= +override RNDLIBFLAGS= + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASCURAND choice (example: "make HASCURAND=hasNoCurand") +ifeq ($(HASCURAND),hasNoCurand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_CURAND +else ifeq ($(HASCURAND),hasCurand) + override RNDLIBFLAGS += -L$(CUDA_HOME)/lib64/ -lcurand # NB: -lcuda is not needed here! else - $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported) + $(error Unknown HASCURAND='$(HASCURAND)': only 'hasCurand' and 'hasNoCurand' are supported) +endif + +# Set the RNDCXXFLAGS and RNDLIBFLAGS build flags appropriate to each HASHIPRAND choice (example: "make HASHIPRAND=hasNoHiprand") +ifeq ($(HASHIPRAND),hasNoHiprand) + override RNDCXXFLAGS += -DMGONGPU_HAS_NO_HIPRAND +else ifeq ($(HASHIPRAND),hasHiprand) + override RNDLIBFLAGS += -L$(HIP_HOME)/lib/ -lhiprand +else ifneq ($(HASHIPRAND),hasHiprand) + $(error Unknown HASHIPRAND='$(HASHIPRAND)': only 'hasHiprand' and 'hasNoHiprand' are supported) endif +#$(info RNDCXXFLAGS=$(RNDCXXFLAGS)) +#$(info HASHIPRAND=$(HASHIPRAND)) + #------------------------------------------------------------------------------- #=== Configure build directories and build lockfiles === @@ -481,7 +528,7 @@ override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) # Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) # (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) -override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(RNDGEN) +override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASHIPRAND) # Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 ifeq ($(USEBUILDDIR),1) @@ -589,14 +636,19 @@ endif $(BUILDDIR)/check_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) $(BUILDDIR)/gcheck_sa.o: CXXFLAGS += $(USE_NVTX) $(CUINC) -# Apply special build flags only to check_sa and CurandRandomNumberKernel (curand headers, #679) -$(BUILDDIR)/check_sa.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CXXFLAGSCURAND) -$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(CXXFLAGSCURAND) -ifeq ($(RNDGEN),hasCurand) +# Apply special build flags only to check_sa and (Cu|Roc)randRandomNumberKernel +$(BUILDDIR)/check_sa.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/gcheck_sa.o: CUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/gCurandRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(RNDCXXFLAGS) +$(BUILDDIR)/gHiprandRandomNumberKernel.o: CUFLAGS += $(RNDCXXFLAGS) +ifeq ($(HASCURAND),hasCurand) # curand headers, #679 $(BUILDDIR)/CurandRandomNumberKernel.o: CXXFLAGS += $(CUINC) endif +ifeq ($(HASHIPRAND),hasHiprand) # hiprand headers +$(BUILDDIR)/HiprandRandomNumberKernel.o: CXXFLAGS += $(HIPINC) +endif # Avoid "warning: builtin __has_trivial_... is deprecated; use __is_trivially_... instead" in GPUCC with icx2023 (#592) ifneq ($(shell $(CXX) --version | egrep '^(Intel)'),) @@ -673,8 +725,8 @@ endif # Target (and build rules): C++ and CUDA standalone executables $(cxx_main): LIBFLAGS += $(CXXLIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o - $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(CURANDLIBFLAGS) +$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MG5AMC_CXXLIB).so $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o + $(CXX) -o $@ $(BUILDDIR)/check_sa.o $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CXXLIB) $(cxx_objects_exe) $(BUILDDIR)/CurandRandomNumberKernel.o $(BUILDDIR)/HiprandRandomNumberKernel.o $(RNDLIBFLAGS) ifneq ($(GPUCC),) ifneq ($(shell $(CXX) --version | grep ^Intel),) @@ -684,8 +736,8 @@ else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531 $(cu_main): LIBFLAGS += -L$(patsubst %%bin/nvc++,%%lib,$(subst ccache ,,$(CXX))) -lnvhpcatm -lnvcpumath -lnvc endif $(cu_main): LIBFLAGS += $(CULIBFLAGSRPATH) # avoid the need for LD_LIBRARY_PATH -$(cu_main): $(BUILDDIR)/gcheck_sa.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o - $(GPUCC) -o $@ $(BUILDDIR)/gcheck_sa.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o $(CURANDLIBFLAGS) +$(cu_main): $(BUILDDIR)/gcheck_sa.o $(LIBDIR)/lib$(MG5AMC_CULIB).so $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o $(BUILDDIR)/gHiprandRandomNumberKernel.o + $(GPUCC) -o $@ $(BUILDDIR)/gcheck_sa.o $(CUARCHFLAGS) $(LIBFLAGS) -L$(LIBDIR) -l$(MG5AMC_CULIB) $(cu_objects_exe) $(BUILDDIR)/gCurandRandomNumberKernel.o $(BUILDDIR)/gHiprandRandomNumberKernel.o $(RNDLIBFLAGS) endif #------------------------------------------------------------------------------- diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk index 2c084615d9..9360d9ce0d 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/cudacpp_src.mk @@ -45,13 +45,13 @@ endif #------------------------------------------------------------------------------- -#=== Configure the CUDA compiler (note: GPUCC is already exported including ccache) +#=== Configure the CUDA compiler (note: GPUCC have been exported from cudacpp.mk including ccache) ###$(info GPUCC=$(GPUCC)) #------------------------------------------------------------------------------- -#=== Configure ccache for C++ builds (note: GPUCC is already exported including ccache) +#=== Configure ccache for C++ builds (note: GPUCC have been exported from cudacpp.mk including ccache) # Enable ccache if USECCACHE=1 ifeq ($(USECCACHE)$(shell echo $(CXX) | grep ccache),1) @@ -86,7 +86,8 @@ endif #------------------------------------------------------------------------------- -#=== Set the CUDA/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD, RNDGEN +#=== Set the CUDA/C++ compiler flags appropriate to user-defined choices of AVX, FPTYPE, HELINL, HRDCOD (exported from cudacpp.mk) +#=== (NB the RNDCXXFLAGS and RNDLIBFLAGS appropriate to user-defined choices of HASCURAND and HASHIPRAND have been exported from cudacpp.mk) # Set the build flags appropriate to OMPFLAGS ###$(info OMPFLAGS=$(OMPFLAGS)) @@ -175,14 +176,6 @@ else ifneq ($(HRDCOD),0) $(error Unknown HRDCOD='$(HRDCOD)': only '0' and '1' are supported) endif -# Set the build flags appropriate to each RNDGEN choice (example: "make RNDGEN=hasNoCurand") -###$(info RNDGEN=$(RNDGEN)) -ifeq ($(RNDGEN),hasNoCurand) - CXXFLAGS += -DMGONGPU_HAS_NO_CURAND -else ifneq ($(RNDGEN),hasCurand) - $(error Unknown RNDGEN='$(RNDGEN)': only 'hasCurand' and 'hasNoCurand' are supported) -endif - #------------------------------------------------------------------------------- #=== Configure build directories and build lockfiles === @@ -193,7 +186,7 @@ override DIRTAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD) # Build lockfile "full" tag (defines full specification of build options that cannot be intermixed) # (Rationale: avoid mixing of CUDA and no-CUDA environment builds with different random number generators) -override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(RNDGEN) +override TAG = $(AVX)_$(FPTYPE)_inl$(HELINL)_hrd$(HRDCOD)_$(HASCURAND)_$(HASHIPRAND) # Build directory: current directory by default, or build.$(DIRTAG) if USEBUILDDIR==1 ###$(info Current directory is $(shell pwd)) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h index 989b3f0eea..74f9d90b61 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/mgOnGpuConfig.h @@ -20,11 +20,15 @@ #undef MGONGPUCPP_GPUIMPL #endif +// Make sure that __HIP_PLATFORM_NVIDIA__ is undefined +// (__HIP_PLATFORM_AMD__ is defined by hipcc or in HiprandRandomNumberKernel.cc) +#undef __HIP_PLATFORM_NVIDIA__ // disable hiprand for NVidia (curand) + // ** NB1 Throughputs (e.g. 6.8E8) are events/sec for "./gcheck.exe -p 65536 128 12" // ** NB2 Baseline on b7g47n0004 fluctuates (probably depends on load on other VMs) // Choose if curand is supported for generating random numbers -// For HIP, by default, do not use curand (common random numbers will be used instead) +// For HIP, by default, do not allow curand to be used (hiprand or common random numbers will be used instead) // For both CUDA and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_CURAND // (there exist CUDA installations, e.g. using the HPC package, which do not include curand - see PR #784 and #785) #if defined __HIPCC__ @@ -39,6 +43,22 @@ //#endif #endif +// Choose if hiprand is supported for generating random numbers +// For CUDA, by default, do not allow hiprand to be used (curand or common random numbers will be used instead) +// For both HIP and C++, by default, do not inline, but allow this macro to be set from outside with e.g. -DMGONGPU_HAS_NO_HIPRAND +// (there may exist HIP installations which do not include hiprand?) +#if defined __CUDACC__ +#define MGONGPU_HAS_NO_HIPRAND 1 +#else +//#ifdef __HIPCC__ +//#undef MGONGPU_HAS_NO_HIPRAND // default +////#define MGONGPU_HAS_NO_HIPRAND 1 +//#else +//#undef MGONGPU_HAS_NO_HIPRAND // default +////#define MGONGPU_HAS_NO_HIPRAND 1 +//#endif +#endif + // Choose floating point precision (for everything but color algebra #537) // If one of these macros has been set from outside with e.g. -DMGONGPU_FPTYPE_FLOAT, nothing happens (issue #167) #if not defined MGONGPU_FPTYPE_DOUBLE and not defined MGONGPU_FPTYPE_FLOAT diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py index 3e0ebe545f..01280ae463 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/model_handling.py @@ -1347,6 +1347,7 @@ def generate_process_files(self): files.ln(pjoin(self.path, 'RamboSamplingKernels.cc'), self.path, 'gRamboSamplingKernels.cu') files.ln(pjoin(self.path, 'CommonRandomNumberKernel.cc'), self.path, 'gCommonRandomNumberKernel.cu') files.ln(pjoin(self.path, 'CurandRandomNumberKernel.cc'), self.path, 'gCurandRandomNumberKernel.cu') + files.ln(pjoin(self.path, 'HiprandRandomNumberKernel.cc'), self.path, 'gHiprandRandomNumberKernel.cu') files.ln(pjoin(self.path, 'BridgeKernels.cc'), self.path, 'gBridgeKernels.cu') # NB: symlink of cudacpp.mk to makefile is overwritten by madevent makefile if this exists (#480) # NB: this relies on the assumption that cudacpp code is generated before madevent code diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py index c89295c01f..f2ed8897b3 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py @@ -101,8 +101,8 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU): s+'gpu/CrossSectionKernels.cc', s+'gpu/CrossSectionKernels.h', s+'gpu/MatrixElementKernels.cc', s+'gpu/MatrixElementKernels.h', s+'gpu/RamboSamplingKernels.cc', s+'gpu/RamboSamplingKernels.h', - s+'gpu/RandomNumberKernels.h', - s+'gpu/CommonRandomNumberKernel.cc', s+'gpu/CurandRandomNumberKernel.cc', + s+'gpu/RandomNumberKernels.h', s+'gpu/CommonRandomNumberKernel.cc', + s+'gpu/CurandRandomNumberKernel.cc', s+'gpu/HiprandRandomNumberKernel.cc', s+'gpu/Bridge.h', s+'gpu/BridgeKernels.cc', s+'gpu/BridgeKernels.h', s+'gpu/fbridge.cc', s+'gpu/fbridge.inc', s+'gpu/fsampler.cc', s+'gpu/fsampler.inc', s+'gpu/MadgraphTest.h', s+'gpu/runTest.cc', @@ -122,7 +122,8 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU): 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', - 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', + 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', + 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', From 987da65f1b0c3db4dd3bd798e154d8aa599d7df4 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 2 Feb 2024 17:18:13 +0100 Subject: [PATCH 27/47] [rocrand] in CODEGEN check_sa.cc, fix clang formatting --- .../madgraph/iolibs/template_files/gpu/check_sa.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc index ccad53b082..c296f451c2 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/madgraph/iolibs/template_files/gpu/check_sa.cc @@ -166,7 +166,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -489,7 +489,7 @@ main( int argc, char** argv ) const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); #else - throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) #endif } else if( rndgen == RandomNumberMode::HiprandHost ) @@ -512,8 +512,9 @@ main( int argc, char** argv ) throw std::logic_error( "INTERNAL ERROR! HiprandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) #endif } - else throw std::logic_error( "INTERNAL ERROR! Unknown rndgen value?" ); // INTERNAL ERROR (no path to this statement) - + else + throw std::logic_error( "INTERNAL ERROR! Unknown rndgen value?" ); // INTERNAL ERROR (no path to this statement) + // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; if( rmbsmp == RamboSamplingMode::RamboHost ) @@ -863,7 +864,7 @@ main( int argc, char** argv ) wrkflwtxt += "FLT+"; #else wrkflwtxt += "???+"; // no path to this statement -#endif /* clang-format on */ +#endif // -- CUCOMPLEX or THRUST or STD complex numbers? #ifdef __CUDACC__ #if defined MGONGPU_CUCXTYPE_CUCOMPLEX @@ -888,7 +889,7 @@ main( int argc, char** argv ) wrkflwtxt += "CXS:"; #else wrkflwtxt += "???:"; // no path to this statement -#endif +#endif /* clang-format on */ #endif // -- COMMON or CURAND HOST or CURAND DEVICE random numbers? if( rndgen == RandomNumberMode::CommonRandom ) From 21c4eea86a1ae9203d080a599038d6afcf84677b Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 2 Feb 2024 17:22:27 +0100 Subject: [PATCH 28/47] [rocrand] regenerate gg_tt.mad - all ok except for additional formatting changes --- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 24 +++++++++---------- .../SubProcesses/P1_gg_ttx/check_sa.cc | 13 +++++----- .../cudacpp/gg_tt.mad/SubProcesses/runTest.cc | 4 ++-- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 9e8e783b82..c5a5956db3 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0057849884033203125  +DEBUG: model prefixing takes 0.005265712738037109  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -162,10 +162,10 @@ Load PLUGIN.CUDACPP_OUTPUT Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 160]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  INFO: initialize a new directory: CODEGEN_mad_gg_tt INFO: remove old information in CODEGEN_mad_gg_tt -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 165]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  @@ -175,7 +175,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -191,16 +191,16 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.104 s +Wrote files for 10 helas calls in 0.100 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.148 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 202]  +ALOHA: aloha creates 2 routines in 0.136 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.135 s +ALOHA: aloha creates 4 routines in 0.123 s VVV1 FFV1 FFV1 @@ -230,16 +230,16 @@ DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/S patching file auto_dsig1.f patching file driver.f patching file matrix1.f -DEBUG: p.returncode =  0 [output.py at line 237]  +DEBUG: p.returncode =  0 [output.py at line 238]  Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README Run "open index.html" to see more information about this process. quit -real 0m1.738s -user 0m1.518s -sys 0m0.218s +real 0m1.709s +user 0m1.447s +sys 0m0.254s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc index ccad53b082..c296f451c2 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc @@ -166,7 +166,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -489,7 +489,7 @@ main( int argc, char** argv ) const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); #else - throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) #endif } else if( rndgen == RandomNumberMode::HiprandHost ) @@ -512,8 +512,9 @@ main( int argc, char** argv ) throw std::logic_error( "INTERNAL ERROR! HiprandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) #endif } - else throw std::logic_error( "INTERNAL ERROR! Unknown rndgen value?" ); // INTERNAL ERROR (no path to this statement) - + else + throw std::logic_error( "INTERNAL ERROR! Unknown rndgen value?" ); // INTERNAL ERROR (no path to this statement) + // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; if( rmbsmp == RamboSamplingMode::RamboHost ) @@ -863,7 +864,7 @@ main( int argc, char** argv ) wrkflwtxt += "FLT+"; #else wrkflwtxt += "???+"; // no path to this statement -#endif /* clang-format on */ +#endif // -- CUCOMPLEX or THRUST or STD complex numbers? #ifdef __CUDACC__ #if defined MGONGPU_CUCXTYPE_CUCOMPLEX @@ -888,7 +889,7 @@ main( int argc, char** argv ) wrkflwtxt += "CXS:"; #else wrkflwtxt += "???:"; // no path to this statement -#endif +#endif /* clang-format on */ #endif // -- COMMON or CURAND HOST or CURAND DEVICE random numbers? if( rndgen == RandomNumberMode::CommonRandom ) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/runTest.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/runTest.cc index de327f2321..7f248d29a4 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/runTest.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/runTest.cc @@ -238,7 +238,7 @@ struct CUDATest : public CUDA_CPU_TestBase return MemoryAccessMatrixElements::ieventAccessConst( hstMatrixElements.data(), ievt ); } }; -#endif +#endif /* clang-format off */ // Use two levels of macros to force stringification at the right level // (see https://gcc.gnu.org/onlinedocs/gcc-3.0.1/cpp_3.html#SEC17 and https://stackoverflow.com/a/3419392) @@ -260,4 +260,4 @@ INSTANTIATE_TEST_SUITE_P( prefix, \ MG_INSTANTIATE_TEST_SUITE_GPU( XTESTID_GPU( MG_EPOCH_PROCESS_ID ), MadgraphTest ); #else MG_INSTANTIATE_TEST_SUITE_CPU( XTESTID_CPU( MG_EPOCH_PROCESS_ID ), MadgraphTest ); -#endif +#endif /* clang-format on */ From 5f7cf129408fe5edef7be151ab066d5065e55803 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 2 Feb 2024 17:28:04 +0100 Subject: [PATCH 29/47] [rocrand] generate gg_tt.mad --- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 42 +-- .../cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat | 2 +- epochX/cudacpp/gg_tt.mad/MGMEVersion.txt | 2 +- epochX/cudacpp/gg_tt.mad/Source/run_card.inc | 266 +++++++------- .../gg_tt.mad/SubProcesses/MGVersion.txt | 2 +- .../SubProcesses/P1_gg_ttx/CPPProcess.cc | 2 +- .../SubProcesses/P1_gg_ttx/CPPProcess.h | 2 +- .../SubProcesses/P1_gg_ttx/auto_dsig.f | 2 +- .../SubProcesses/P1_gg_ttx/auto_dsig1.f | 4 +- .../SubProcesses/P1_gg_ttx/check_sa.cc | 13 +- .../SubProcesses/P1_gg_ttx/matrix1.f | 4 +- epochX/cudacpp/gg_tt.mad/SubProcesses/myamp.f | 10 +- .../cudacpp/gg_tt.mad/SubProcesses/refine.sh | 4 +- .../cudacpp/gg_tt.mad/SubProcesses/runTest.cc | 4 +- .../cudacpp/gg_tt.mad/SubProcesses/setcuts.f | 26 +- .../cudacpp/gg_tt.mad/bin/internal/banner.py | 8 +- .../cudacpp/gg_tt.mad/bin/internal/cluster.py | 3 +- .../bin/internal/common_run_interface.py | 65 +++- .../gg_tt.mad/bin/internal/gen_ximprove.py | 6 +- .../gg_tt.mad/bin/internal/hel_recycle.py | 4 +- .../gg_tt.mad/bin/internal/launch_plugin.py | 2 +- .../gg_tt.mad/bin/internal/lhe_parser.py | 13 + epochX/cudacpp/gg_tt.mad/bin/internal/misc.py | 45 ++- .../gg_tt.mad/bin/internal/shower_card.py | 340 +++++++++++++----- .../gg_tt.mad/bin/internal/systematics.py | 4 +- epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h | 2 +- epochX/cudacpp/gg_tt.mad/src/Parameters_sm.cc | 2 +- epochX/cudacpp/gg_tt.mad/src/Parameters_sm.h | 2 +- 28 files changed, 575 insertions(+), 306 deletions(-) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index f6226e7392..082ddbf799 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -14,7 +14,7 @@ Running MG5 in debug mode * * * * * * * * * * * * -* VERSION 3.5.2_lo_vect 2023-11-08 * +* VERSION 3.5.3_lo_vect 2023-12-23 * * * * WARNING: UNKNOWN DEVELOPMENT VERSION. * * WARNING: DO NOT USE FOR PRODUCTION * @@ -62,7 +62,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005857229232788086  +DEBUG: model prefixing takes 0.005126237869262695  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -155,17 +155,19 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.009 s +1 processes with 3 diagrams generated in 0.008 s Total: 1 processes with 3 diagrams output madevent ../TMPOUT/CODEGEN_mad_gg_tt --hel_recycling=False --vector_size=32 --me_exporter=standalone_cudacpp Load PLUGIN.CUDACPP_OUTPUT +Plugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. +It has been validated for the last time with version: 3.5.2 Addition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT Output will be done with PLUGIN: CUDACPP_OUTPUT DEBUG: cformat =  standalone_cudacpp [export_cpp.py at line 3071]  -DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 160]  +DEBUG: Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [output.py at line 161]  INFO: initialize a new directory: CODEGEN_mad_gg_tt INFO: remove old information in CODEGEN_mad_gg_tt -DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 165]  +DEBUG: Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [output.py at line 166]  WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt  INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt WARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards  @@ -175,7 +177,7 @@ INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 INFO: Processing color information for process: g g > t t~ @1 INFO: Creating files in directory P1_gg_ttx DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1058]  -DEBUG: process_exporter_cpp =  [export_v4.py at line 6262]  +DEBUG: process_exporter_cpp =  [export_v4.py at line 6261]  INFO: Creating files in directory . FileWriter for ././CPPProcess.h FileWriter for ././CPPProcess.cc @@ -184,23 +186,23 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. DEBUG: config_map =  [1, 2, 3] [export_cpp.py at line 711]  DEBUG: subproc_number =  0 [export_cpp.py at line 712]  DEBUG: Done [export_cpp.py at line 713]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 32 [export_v4.py at line 1872]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 32 [export_v4.py at line 1872]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1872]  -DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1872]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 32 [export_v4.py at line 1871]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  False True 32 [export_v4.py at line 1871]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  +DEBUG: vector, subproc_group,self.opt['vector_size'] =  32 True 32 [export_v4.py at line 1871]  INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 INFO: Finding symmetric diagrams for subprocess group gg_ttx Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.108 s +Wrote files for 10 helas calls in 0.100 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.154 s -DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 202]  +ALOHA: aloha creates 2 routines in 0.135 s +DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 203]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.139 s +ALOHA: aloha creates 4 routines in 0.123 s VVV1 FFV1 FFV1 @@ -230,16 +232,16 @@ DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/S patching file auto_dsig1.f patching file driver.f patching file matrix1.f -DEBUG: p.returncode =  0 [output.py at line 237]  +DEBUG: p.returncode =  0 [output.py at line 238]  Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done. Type "launch" to generate events from this process, or see /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README Run "open index.html" to see more information about this process. quit -real 0m1.820s -user 0m1.561s -sys 0m0.253s +real 0m2.040s +user 0m1.710s +sys 0m0.290s Code generation completed in 2 seconds ************************************************************ * * @@ -253,7 +255,7 @@ Code generation completed in 2 seconds * * * * * * * * * * * * -* VERSION 3.5.2_lo_vect * +* VERSION 3.5.3_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * @@ -283,7 +285,7 @@ launch in debug mode * * * * * * * * * * * * -* VERSION 3.5.2_lo_vect * +* VERSION 3.5.3_lo_vect * * * * The MadGraph5_aMC@NLO Development Team - Find us at * * https://server06.fynu.ucl.ac.be/projects/madgraph * diff --git a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat index cf111e2e6d..9973b6a3db 100644 --- a/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat +++ b/epochX/cudacpp/gg_tt.mad/Cards/proc_card_mg5.dat @@ -8,7 +8,7 @@ #* * * * #* * #* * -#* VERSION 3.5.2_lo_vect 2023-11-08 * +#* VERSION 3.5.3_lo_vect 2023-12-23 * #* * #* WARNING: UNKNOWN DEVELOPMENT VERSION. * #* WARNING: DO NOT USE FOR PRODUCTION * diff --git a/epochX/cudacpp/gg_tt.mad/MGMEVersion.txt b/epochX/cudacpp/gg_tt.mad/MGMEVersion.txt index 85c67c3554..9d3a5c0ba0 100644 --- a/epochX/cudacpp/gg_tt.mad/MGMEVersion.txt +++ b/epochX/cudacpp/gg_tt.mad/MGMEVersion.txt @@ -1 +1 @@ -3.5.2_lo_vect \ No newline at end of file +3.5.3_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/Source/run_card.inc b/epochX/cudacpp/gg_tt.mad/Source/run_card.inc index 57b2b878dc..67af0f2051 100644 --- a/epochX/cudacpp/gg_tt.mad/Source/run_card.inc +++ b/epochX/cudacpp/gg_tt.mad/Source/run_card.inc @@ -8,13 +8,13 @@ LPP(2) = 1 - EBEAM(1) = 6.5000000000D+03 + EBEAM(1) = 6.500000000000000D+03 - EBEAM(2) = 6.5000000000D+03 + EBEAM(2) = 6.500000000000000D+03 - PB1 = 0.0000000000D+00 + PB1 = 0.000000000000000D+00 - PB2 = 0.0000000000D+00 + PB2 = 0.000000000000000D+00 NB_PROTON(1) = 1 @@ -24,9 +24,9 @@ NB_NEUTRON(2) = 0 - MASS_ION(1) = -1.0000000000D+00 + MASS_ION(1) = -1.000000000000000D+00 - MASS_ION(2) = -1.0000000000D+00 + MASS_ION(2) = -1.000000000000000D+00 PDLABEL = 'nn23lo1' @@ -44,21 +44,21 @@ FIXED_EXTRA_SCALE = .FALSE. - SCALE = 9.1188000000D+01 + SCALE = 9.118800000000000D+01 - SF1 = 9.1188000000D+01 + SF1 = 9.118800000000000D+01 - SF2 = 9.1188000000D+01 + SF2 = 9.118800000000000D+01 - MUE_REF_FIXED = 9.1188000000D+01 + MUE_REF_FIXED = 9.118800000000000D+01 DYNAMICAL_SCALE_CHOICE = -1 - MUE_OVER_REF = 1.0000000000D+00 + MUE_OVER_REF = 1.000000000000000D+00 IEVO_EVA = 0 - SCALEFACT = 1.0000000000D+00 + SCALEFACT = 1.000000000000000D+00 ICKKW = 0 @@ -66,7 +66,7 @@ KTSCHEME = 1 - ALPSFACT = 1.0000000000D+00 + ALPSFACT = 1.000000000000000D+00 CHCLUSTER = .FALSE. @@ -76,217 +76,217 @@ CLUSINFO = .TRUE. - LHE_VERSION = 3.0000000000D+00 + LHE_VERSION = 3.000000000000000D+00 FRAME_ID = 6 AUTO_PTJ_MJJ = .TRUE. - BWCUTOFF = 1.5000000000D+01 + BWCUTOFF = 1.500000000000000D+01 CUT_DECAYS = .FALSE. - DSQRT_SHAT = 0.0000000000D+00 + DSQRT_SHAT = 0.000000000000000D+00 - PTJ = 2.0000000000D+01 + PTJ = 2.000000000000000D+01 - PTB = 0.0000000000D+00 + PTB = 0.000000000000000D+00 - PTA = 1.0000000000D+01 + PTA = 1.000000000000000D+01 - PTL = 1.0000000000D+01 + PTL = 1.000000000000000D+01 - MISSET = 0.0000000000D+00 + MISSET = 0.000000000000000D+00 - PTHEAVY = 0.0000000000D+00 + PTHEAVY = 0.000000000000000D+00 - PTJMAX = -1.0000000000D+00 + PTJMAX = -1.000000000000000D+00 - PTBMAX = -1.0000000000D+00 + PTBMAX = -1.000000000000000D+00 - PTAMAX = -1.0000000000D+00 + PTAMAX = -1.000000000000000D+00 - PTLMAX = -1.0000000000D+00 + PTLMAX = -1.000000000000000D+00 - MISSETMAX = -1.0000000000D+00 + MISSETMAX = -1.000000000000000D+00 - EJ = 0.0000000000D+00 + EJ = 0.000000000000000D+00 - EB = 0.0000000000D+00 + EB = 0.000000000000000D+00 - EA = 0.0000000000D+00 + EA = 0.000000000000000D+00 - EL = 0.0000000000D+00 + EL = 0.000000000000000D+00 - EJMAX = -1.0000000000D+00 + EJMAX = -1.000000000000000D+00 - EBMAX = -1.0000000000D+00 + EBMAX = -1.000000000000000D+00 - EAMAX = -1.0000000000D+00 + EAMAX = -1.000000000000000D+00 - ELMAX = -1.0000000000D+00 + ELMAX = -1.000000000000000D+00 - ETAJ = 5.0000000000D+00 + ETAJ = 5.000000000000000D+00 - ETAB = -1.0000000000D+00 + ETAB = -1.000000000000000D+00 - ETAA = 2.5000000000D+00 + ETAA = 2.500000000000000D+00 - ETAL = 2.5000000000D+00 + ETAL = 2.500000000000000D+00 - ETAJMIN = 0.0000000000D+00 + ETAJMIN = 0.000000000000000D+00 - ETABMIN = 0.0000000000D+00 + ETABMIN = 0.000000000000000D+00 - ETAAMIN = 0.0000000000D+00 + ETAAMIN = 0.000000000000000D+00 - ETALMIN = 0.0000000000D+00 + ETALMIN = 0.000000000000000D+00 - DRJJ = 4.0000000000D-01 + DRJJ = 4.000000000000000D-01 - DRBB = 0.0000000000D+00 + DRBB = 0.000000000000000D+00 - DRLL = 4.0000000000D-01 + DRLL = 4.000000000000000D-01 - DRAA = 4.0000000000D-01 + DRAA = 4.000000000000000D-01 - DRBJ = 0.0000000000D+00 + DRBJ = 0.000000000000000D+00 - DRAJ = 4.0000000000D-01 + DRAJ = 4.000000000000000D-01 - DRJL = 4.0000000000D-01 + DRJL = 4.000000000000000D-01 - DRAB = 0.0000000000D+00 + DRAB = 0.000000000000000D+00 - DRBL = 0.0000000000D+00 + DRBL = 0.000000000000000D+00 - DRAL = 4.0000000000D-01 + DRAL = 4.000000000000000D-01 - DRJJMAX = -1.0000000000D+00 + DRJJMAX = -1.000000000000000D+00 - DRBBMAX = -1.0000000000D+00 + DRBBMAX = -1.000000000000000D+00 - DRLLMAX = -1.0000000000D+00 + DRLLMAX = -1.000000000000000D+00 - DRAAMAX = -1.0000000000D+00 + DRAAMAX = -1.000000000000000D+00 - DRBJMAX = -1.0000000000D+00 + DRBJMAX = -1.000000000000000D+00 - DRAJMAX = -1.0000000000D+00 + DRAJMAX = -1.000000000000000D+00 - DRJLMAX = -1.0000000000D+00 + DRJLMAX = -1.000000000000000D+00 - DRABMAX = -1.0000000000D+00 + DRABMAX = -1.000000000000000D+00 - DRBLMAX = -1.0000000000D+00 + DRBLMAX = -1.000000000000000D+00 - DRALMAX = -1.0000000000D+00 + DRALMAX = -1.000000000000000D+00 - MMJJ = 0.0000000000D+00 + MMJJ = 0.000000000000000D+00 - MMBB = 0.0000000000D+00 + MMBB = 0.000000000000000D+00 - MMAA = 0.0000000000D+00 + MMAA = 0.000000000000000D+00 - MMLL = 0.0000000000D+00 + MMLL = 0.000000000000000D+00 - MMJJMAX = -1.0000000000D+00 + MMJJMAX = -1.000000000000000D+00 - MMBBMAX = -1.0000000000D+00 + MMBBMAX = -1.000000000000000D+00 - MMAAMAX = -1.0000000000D+00 + MMAAMAX = -1.000000000000000D+00 - MMLLMAX = -1.0000000000D+00 + MMLLMAX = -1.000000000000000D+00 - MMNL = 0.0000000000D+00 + MMNL = 0.000000000000000D+00 - MMNLMAX = -1.0000000000D+00 + MMNLMAX = -1.000000000000000D+00 - PTLLMIN = 0.0000000000D+00 + PTLLMIN = 0.000000000000000D+00 - PTLLMAX = -1.0000000000D+00 + PTLLMAX = -1.000000000000000D+00 - XPTJ = 0.0000000000D+00 + XPTJ = 0.000000000000000D+00 - XPTB = 0.0000000000D+00 + XPTB = 0.000000000000000D+00 - XPTA = 0.0000000000D+00 + XPTA = 0.000000000000000D+00 - XPTL = 0.0000000000D+00 + XPTL = 0.000000000000000D+00 - PTJ1MIN = 0.0000000000D+00 + PTJ1MIN = 0.000000000000000D+00 - PTJ1MAX = -1.0000000000D+00 + PTJ1MAX = -1.000000000000000D+00 - PTJ2MIN = 0.0000000000D+00 + PTJ2MIN = 0.000000000000000D+00 - PTJ2MAX = -1.0000000000D+00 + PTJ2MAX = -1.000000000000000D+00 - PTJ3MIN = 0.0000000000D+00 + PTJ3MIN = 0.000000000000000D+00 - PTJ3MAX = -1.0000000000D+00 + PTJ3MAX = -1.000000000000000D+00 - PTJ4MIN = 0.0000000000D+00 + PTJ4MIN = 0.000000000000000D+00 - PTJ4MAX = -1.0000000000D+00 + PTJ4MAX = -1.000000000000000D+00 CUTUSE = 0 - PTL1MIN = 0.0000000000D+00 + PTL1MIN = 0.000000000000000D+00 - PTL1MAX = -1.0000000000D+00 + PTL1MAX = -1.000000000000000D+00 - PTL2MIN = 0.0000000000D+00 + PTL2MIN = 0.000000000000000D+00 - PTL2MAX = -1.0000000000D+00 + PTL2MAX = -1.000000000000000D+00 - PTL3MIN = 0.0000000000D+00 + PTL3MIN = 0.000000000000000D+00 - PTL3MAX = -1.0000000000D+00 + PTL3MAX = -1.000000000000000D+00 - PTL4MIN = 0.0000000000D+00 + PTL4MIN = 0.000000000000000D+00 - PTL4MAX = -1.0000000000D+00 + PTL4MAX = -1.000000000000000D+00 - HTJMIN = 0.0000000000D+00 + HTJMIN = 0.000000000000000D+00 - HTJMAX = -1.0000000000D+00 + HTJMAX = -1.000000000000000D+00 - IHTMIN = 0.0000000000D+00 + IHTMIN = 0.000000000000000D+00 - IHTMAX = -1.0000000000D+00 + IHTMAX = -1.000000000000000D+00 - HT2MIN = 0.0000000000D+00 + HT2MIN = 0.000000000000000D+00 - HT3MIN = 0.0000000000D+00 + HT3MIN = 0.000000000000000D+00 - HT4MIN = 0.0000000000D+00 + HT4MIN = 0.000000000000000D+00 - HT2MAX = -1.0000000000D+00 + HT2MAX = -1.000000000000000D+00 - HT3MAX = -1.0000000000D+00 + HT3MAX = -1.000000000000000D+00 - HT4MAX = -1.0000000000D+00 + HT4MAX = -1.000000000000000D+00 - PTGMIN = 0.0000000000D+00 + PTGMIN = 0.000000000000000D+00 - R0GAMMA = 4.0000000000D-01 + R0GAMMA = 4.000000000000000D-01 - XN = 1.0000000000D+00 + XN = 1.000000000000000D+00 - EPSGAMMA = 1.0000000000D+00 + EPSGAMMA = 1.000000000000000D+00 ISOEM = .TRUE. - XETAMIN = 0.0000000000D+00 + XETAMIN = 0.000000000000000D+00 - DELTAETA = 0.0000000000D+00 + DELTAETA = 0.000000000000000D+00 - KT_DURHAM = -1.0000000000D+00 + KT_DURHAM = -1.000000000000000D+00 - D_PARAMETER = 4.0000000000D-01 + D_PARAMETER = 4.000000000000000D-01 - PT_LUND = -1.0000000000D+00 + PT_LUND = -1.000000000000000D+00 PDGS_FOR_MERGING_CUT(0) = 7 @@ -306,7 +306,7 @@ MAXJETFLAVOR = 4 - XQCUT = 0.0000000000D+00 + XQCUT = 0.000000000000000D+00 USE_SYST = .TRUE. @@ -316,15 +316,15 @@ MC_GROUPED_SUBPROC = .TRUE. - XMTC = 0.0000000000D+00 + XMTC = 0.000000000000000D+00 - D = 1.0000000000D+00 + D = 1.000000000000000D+00 ISSGRIDFILE = '' - TMIN_FOR_CHANNEL = -1.0000000000D+00 + TMIN_FOR_CHANNEL = -1.000000000000000D+00 - SMALL_WIDTH_TREATMENT = 1.0000000000D-06 + SMALL_WIDTH_TREATMENT = 1.000000000000000D-06 SDE_STRAT = 1 @@ -332,33 +332,33 @@ PDG_CUT(1) = 0 - PTMIN4PDG(0) = 1.0000000000D+00 + PTMIN4PDG(0) = 1.000000000000000D+00 - PTMIN4PDG(1) = 0.0000000000D+00 + PTMIN4PDG(1) = 0.000000000000000D+00 - PTMAX4PDG(0) = 1.0000000000D+00 + PTMAX4PDG(0) = 1.000000000000000D+00 - PTMAX4PDG(1) = -1.0000000000D+00 + PTMAX4PDG(1) = -1.000000000000000D+00 - EMIN4PDG(0) = 1.0000000000D+00 + EMIN4PDG(0) = 1.000000000000000D+00 - EMIN4PDG(1) = 0.0000000000D+00 + EMIN4PDG(1) = 0.000000000000000D+00 - EMAX4PDG(0) = 1.0000000000D+00 + EMAX4PDG(0) = 1.000000000000000D+00 - EMAX4PDG(1) = -1.0000000000D+00 + EMAX4PDG(1) = -1.000000000000000D+00 - ETAMIN4PDG(0) = 1.0000000000D+00 + ETAMIN4PDG(0) = 1.000000000000000D+00 - ETAMIN4PDG(1) = 0.0000000000D+00 + ETAMIN4PDG(1) = 0.000000000000000D+00 - ETAMAX4PDG(0) = 1.0000000000D+00 + ETAMAX4PDG(0) = 1.000000000000000D+00 - ETAMAX4PDG(1) = -1.0000000000D+00 + ETAMAX4PDG(1) = -1.000000000000000D+00 - MXXMIN4PDG(0) = 1.0000000000D+00 + MXXMIN4PDG(0) = 1.000000000000000D+00 - MXXMIN4PDG(1) = 0.0000000000D+00 + MXXMIN4PDG(1) = 0.000000000000000D+00 MXXPART_ANTIPART(1) = .FALSE. diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/gg_tt.mad/SubProcesses/MGVersion.txt index 85c67c3554..9d3a5c0ba0 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/MGVersion.txt +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/MGVersion.txt @@ -1 +1 @@ -3.5.2_lo_vect \ No newline at end of file +3.5.3_lo_vect \ No newline at end of file diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc index dbaa56b35c..09a2a7b6fb 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc @@ -7,7 +7,7 @@ // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 +// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h index 4a88a07226..aee80d0295 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h @@ -7,7 +7,7 @@ // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2023) for the MG5aMC CUDACPP plugin. //========================================================================== // This file has been automatically generated for CUDA/C++ standalone by -// MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 +// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23 // By the MadGraph5_aMC@NLO Development Team // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch //========================================================================== diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f index d80d770784..7bff4b9455 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f @@ -359,7 +359,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f index 9346ee4c6a..c4acdbe58d 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f @@ -1,7 +1,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -216,7 +216,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT, $ ALL_CM_RAP, ALL_WGT, IMODE, ALL_OUT, VECSIZE_USED) C **************************************************** C -C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc index 9ce91fce4b..e086ae12d9 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/check_sa.cc @@ -166,7 +166,7 @@ main( int argc, char** argv ) #ifdef MGONGPUCPP_GPUIMPL RamboSamplingMode rmbsmp = RamboSamplingMode::RamboDevice; // default on GPU #else - RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU + RamboSamplingMode rmbsmp = RamboSamplingMode::RamboHost; // default on CPU #endif // Bridge emulation mode (NB Bridge implies RamboHost!) bool bridge = false; @@ -489,7 +489,7 @@ main( int argc, char** argv ) const bool onDevice = true; prnk.reset( new CurandRandomNumberKernel( devRndmom, onDevice ) ); #else - throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) + throw std::logic_error( "INTERNAL ERROR! CurandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) #endif } else if( rndgen == RandomNumberMode::HiprandHost ) @@ -512,8 +512,9 @@ main( int argc, char** argv ) throw std::logic_error( "INTERNAL ERROR! HiprandDevice is not supported on CPUs or non-NVidia GPUs" ); // INTERNAL ERROR (no path to this statement) #endif } - else throw std::logic_error( "INTERNAL ERROR! Unknown rndgen value?" ); // INTERNAL ERROR (no path to this statement) - + else + throw std::logic_error( "INTERNAL ERROR! Unknown rndgen value?" ); // INTERNAL ERROR (no path to this statement) + // --- 0c. Create rambo sampling kernel [keep this in 0c for the moment] std::unique_ptr prsk; if( rmbsmp == RamboSamplingMode::RamboHost ) @@ -863,7 +864,7 @@ main( int argc, char** argv ) wrkflwtxt += "FLT+"; #else wrkflwtxt += "???+"; // no path to this statement -#endif /* clang-format on */ +#endif // -- CUCOMPLEX or THRUST or STD or CXSIMPLE complex numbers? #ifdef __CUDACC__ #if defined MGONGPU_CUCXTYPE_CUCOMPLEX @@ -888,7 +889,7 @@ main( int argc, char** argv ) wrkflwtxt += "CXS:"; #else wrkflwtxt += "???:"; // no path to this statement -#endif +#endif /* clang-format on */ #endif // -- COMMON or CURAND HOST or CURAND DEVICE random numbers? if( rndgen == RandomNumberMode::CommonRandom ) diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f index 0c2ce6ec40..27fbe7302c 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f @@ -1,7 +1,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, $ ICOL) C -C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C @@ -301,7 +301,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL, REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC) C -C Generated by MadGraph5_aMC@NLO v. 3.5.2_lo_vect, 2023-11-08 +C Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23 C By the MadGraph5_aMC@NLO Development Team C Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch C diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/myamp.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/myamp.f index d26e7d0fa2..9e5f8d44dd 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/myamp.f +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/myamp.f @@ -473,7 +473,9 @@ subroutine set_peaks endif if (xo.eq.0d0) xo=MIN(10d0/stot, stot/50d0, 0.5) c if (prwidth_tmp(i, iconfig) .eq. 0d0.or.iden_part(i).gt.0) then - call setgrid(-i,xo,a,1) + if (tsgn .ne. 1d0.or.i .ne. -(nexternal-(nincoming+1))) then !s channel for shat + call setgrid(-i,xo,a,1) + endif c else c write(*,*) 'Using flat grid for BW',i,nbw, c $ prmass(i,iconfig) @@ -551,7 +553,11 @@ subroutine set_peaks swidth(i) = xo spole(i)= -2.0d0 ! 1/s pole write(*,*) "Transforming s_hat 1/s ",i,xo, smin, stot - else + else if(smin/stot.gt.spole(i)+bwcutoff*max(swidth(i), spole(i)*small_width_treatment)) then + swidth(i) = smin/stot + spole(i) = -2d0 + write(*,*) "Transforming s_hat 1/s ",i,xo, smin, stot + else write(*,*) "Transforming s_hat BW ",spole(i), max(swidth(i), spole(i)*small_width_treatment) endif endif diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/refine.sh b/epochX/cudacpp/gg_tt.mad/SubProcesses/refine.sh index 3e6dab022a..afb9b99ad1 100644 --- a/epochX/cudacpp/gg_tt.mad/SubProcesses/refine.sh +++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/refine.sh @@ -57,7 +57,7 @@ j=%(directory)s for((try=1;try<=16;try+=1)); do if [ "$keeplog" = true ] ; then - %(Ppath)s/madevent 2>&1 >> $k &1 >> $k &1 >> log.txt &1 >> log.txt