SC-SGS · timniederhausen · Apr 14, 2025 · Apr 14, 2025 · Apr 15, 2025 · Apr 15, 2025
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -3,7 +3,6 @@ name: Code linting
 on:
   push:
     branches:
-      - main
   pull_request:
 
 jobs:

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -20,11 +20,15 @@ cmake_dependent_option(GPRAT_ENABLE_TESTS "Build unit and integration tests"
                        ${PROJECT_IS_TOP_LEVEL} "GPRAT_BUILD_CORE" OFF)
 cmake_dependent_option(GPRAT_ENABLE_MKL "Enable support for Intel oneMKL"
                        ${PROJECT_IS_TOP_LEVEL} "GPRAT_BUILD_CORE" OFF)
+option(GPRAT_ENABLE_BENCHMARK_CACHE_EVICTIONS
+       "Evict data from caches before running BLAS operations" ON)
 
 option(GPRAT_ENABLE_FORMAT_TARGETS "Enable clang-format / cmake-format targets"
        ${PROJECT_IS_TOP_LEVEL})
 
 if(GPRAT_ENABLE_FORMAT_TARGETS)
+  set(CMAKE_FORMAT_EXCLUDE "^external_ports/")
+
   find_package(format QUIET)
   if(NOT format_FOUND)
     include(FetchContent)

diff --git a/CMakePresets.json b/CMakePresets.json
@@ -21,6 +21,21 @@
         "deprecated": true
       }
     },
+    {
+      "name": "vcpkg",
+      "hidden": true,
+      "cacheVariables": {
+        "CMAKE_TOOLCHAIN_FILE": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
+        "X_VCPKG_APPLOCAL_DEPS_INSTALL": "ON"
+      }
+    },
+    {
+      "name": "vcpkg-win64-static",
+      "hidden": true,
+      "cacheVariables": {
+        "VCPKG_TARGET_TRIPLET": "x64-windows-static-md-release"
+      }
+    },
     {
       "name": "cppcheck",
       "hidden": true,
@@ -67,7 +82,7 @@
       "description": "Note that all the flags after /W4 are required for MSVC to conform to the language standard",
       "hidden": true,
       "cacheVariables": {
-        "CMAKE_CXX_FLAGS": "/sdl /guard:cf /utf-8 /diagnostics:caret /w14165 /w44242 /w44254 /w44263 /w34265 /w34287 /w44296 /w44365 /w44388 /w44464 /w14545 /w14546 /w14547 /w14549 /w14555 /w34619 /w34640 /w24826 /w14905 /w14906 /w14928 /w45038 /W4 /permissive- /volatile:iso /Zc:inline /Zc:preprocessor /Zc:enumTypes /Zc:lambda /Zc:__cplusplus /Zc:externConstexpr /Zc:throwingNew /EHsc",
+        "CMAKE_CXX_FLAGS": "/sdl /guard:cf /utf-8 /diagnostics:caret /w14165 /w44242 /w44254 /w44263 /w34265 /w34287 /w44296 /w44365 /w44388 /w44464 /w14545 /w14546 /w14547 /w14549 /w14555 /w34619 /w34640 /w24826 /w14905 /w14906 /w14928 /w45038 /W4 /permissive- /volatile:iso /Zc:inline /Zc:preprocessor /Zc:enumTypes /Zc:lambda /Zc:__cplusplus /Zc:externConstexpr /Zc:throwingNew /EHsc /D_CRT_SECURE_NO_WARNINGS",
         "CMAKE_EXE_LINKER_FLAGS": "/machine:x64 /guard:cf",
         "CMAKE_SHARED_LINKER_FLAGS": "/machine:x64 /guard:cf"
       }
@@ -146,7 +161,7 @@
     },
     {
       "name": "ci-windows",
-      "inherits": ["ci-build", "ci-win64", "ci-multi-config"]
+      "inherits": ["ci-build", "ci-win64", "ci-multi-config", "vcpkg", "vcpkg-win64-static"]
     },
     {
       "name": "ci-ubuntu-24.04",

diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@ code.
 
 ## Dependencies
 
-GPRat depends on [HPX](https://hpx-docs.stellar-group.org/latest/html/index.html) for asynchronous task-based parallelization. 
+GPRat depends on [HPX](https://hpx-docs.stellar-group.org/latest/html/index.html) for asynchronous task-based parallelization.
 Furthermore, for CPU-only BLAS computation GPRat requires [OpenBLAS](http://www.openmathlib.org/OpenBLAS/) or [MKL](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html).
 A [CUDA](https://developer.nvidia.com/cuda-toolkit) installation is required for GPU-only BLAS computations.
 
@@ -20,6 +20,9 @@ A script to install and setup spack for `GPRat` is provided in [`spack-repo`](sp
 Spack environment configurations and setup scripts for CPU and GPU use are provided in
 [`spack-repo/environments`](spack-repo/environments).
 
+Since Spack is not available on Windows, we also support dependency installation using vcpkg.
+For now, vcpkg builds are only tested on Windows.
+
 ## How To Compile
 
 GPRat makes use of [CMake presets][1] to simplify the process of configuring the project.
@@ -35,6 +38,7 @@ ctest --preset=dev-linux
 As a developer, you may create a `CMakeUserPresets.json` file at the root of the project that contains additional
 presets local to your machine.
 In addition to the build configuration `dev-linux`, there are `release-linux`, `dev-linux-gpu`, and `release-linux-gpu`.
+For Windows, we have similar presets called `dev-windows` and `release-windows`.
 The configurations suffixed with `-gpu` build the library with CUDA.
 
 GPRat can be build with or without Python bindings.

diff --git a/bindings/CMakeLists.txt b/bindings/CMakeLists.txt
@@ -1,5 +1,5 @@
 # try finding pybind11
-set(GPRat_pybind11_VERSION 2.10.3)
+set(GPRat_pybind11_VERSION 2.13.6)
 find_package(pybind11 ${GPRat_pybind11_VERSION} QUIET)
 if(pybind11_FOUND)
   message(STATUS "Found package pybind11.")

diff --git a/bindings/gprat_py.cpp b/bindings/gprat_py.cpp
@@ -1,4 +1,5 @@
-#include "gprat_c.hpp"
+#include "gprat/gprat.hpp"
+
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 
@@ -31,19 +32,19 @@ void init_gprat(py::module &m)
     // Set hyperparameters to default values in `AdamParams` class, unless
     // specified. Python object has full access to each hyperparameter and a
     // string representation `__repr__`.
-    py::class_<gprat_hyper::AdamParams>(m, "AdamParams")
+    py::class_<gprat::AdamParams>(m, "AdamParams")
         .def(py::init<double, double, double, double, int>(),
              py::arg("learning_rate") = 0.001,
              py::arg("beta1") = 0.9,
              py::arg("beta2") = 0.999,
              py::arg("epsilon") = 1e-8,
              py::arg("opt_iter") = 0)
-        .def_readwrite("learning_rate", &gprat_hyper::AdamParams::learning_rate)
-        .def_readwrite("beta1", &gprat_hyper::AdamParams::beta1)
-        .def_readwrite("beta2", &gprat_hyper::AdamParams::beta2)
-        .def_readwrite("epsilon", &gprat_hyper::AdamParams::epsilon)
-        .def_readwrite("opt_iter", &gprat_hyper::AdamParams::opt_iter)
-        .def("__repr__", &gprat_hyper::AdamParams::repr);
+        .def_readwrite("learning_rate", &gprat::AdamParams::learning_rate)
+        .def_readwrite("beta1", &gprat::AdamParams::beta1)
+        .def_readwrite("beta2", &gprat::AdamParams::beta2)
+        .def_readwrite("epsilon", &gprat::AdamParams::epsilon)
+        .def_readwrite("opt_iter", &gprat::AdamParams::opt_iter)
+        .def("__repr__", &gprat::AdamParams::repr);
 
     // Initializes Gaussian Process with `GP` class. Sets default parameters for
     // squared exponential kernel, number of regressors and trainable, unless

diff --git a/bindings/utils_py.cpp b/bindings/utils_py.cpp
@@ -1,5 +1,6 @@
-#include "target.hpp"
-#include "utils_c.hpp"
+#include "gprat/target.hpp"
+#include "gprat/utils.hpp"
+
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 
@@ -32,7 +33,7 @@ void start_hpx_wrapper(std::vector<std::string> args, std::size_t n_cores)
     }
     argv.push_back(nullptr);
     int argc = static_cast<int>(args.size());
-    utils::start_hpx_runtime(argc, argv.data());
+    gprat::start_hpx_runtime(argc, argv.data());
 }
 
 /**
@@ -43,7 +44,7 @@ void start_hpx_wrapper(std::vector<std::string> args, std::size_t n_cores)
 void init_utils(py::module &m)
 {
     m.def("compute_train_tiles",
-          &utils::compute_train_tiles,
+          &gprat::compute_train_tiles,
           py::arg("n_samples"),
           py::arg("n_tile_size"),
           R"pbdoc(
@@ -58,7 +59,7 @@ void init_utils(py::module &m)
           )pbdoc");
 
     m.def("compute_train_tile_size",
-          &utils::compute_train_tile_size,
+          &gprat::compute_train_tile_size,
           py::arg("n_samples"),
           py::arg("n_tiles"),
           R"pbdoc(
@@ -73,7 +74,7 @@ void init_utils(py::module &m)
           )pbdoc");
 
     m.def("compute_test_tiles",
-          &utils::compute_test_tiles,
+          &gprat::compute_test_tiles,
           py::arg("m_samples"),
           py::arg("n_tiles"),
           py::arg("n_tile_size"),
@@ -90,19 +91,19 @@ void init_utils(py::module &m)
           )pbdoc");
 
     m.def("print_vector",
-          &utils::print_vector,
+          &gprat::print_vector,
           py::arg("vec"),
           py::arg("start") = 0,
           py::arg("end") = -1,
           py::arg("separator") = " ",
           "Print elements of a vector with optional start, end, and separator parameters");
 
     m.def("start_hpx", &start_hpx_wrapper, py::arg("args"), py::arg("n_cores"));  // Using the wrapper function
-    m.def("resume_hpx", &utils::resume_hpx_runtime);
-    m.def("suspend_hpx", &utils::suspend_hpx_runtime);
-    m.def("stop_hpx", &utils::stop_hpx_runtime);
+    m.def("resume_hpx", &gprat::resume_hpx_runtime);
+    m.def("suspend_hpx", &gprat::suspend_hpx_runtime);
+    m.def("stop_hpx", &gprat::stop_hpx_runtime);
 
-    m.def("compiled_with_cuda", &utils::compiled_with_cuda, "Check if the code was compiled with CUDA support");
+    m.def("compiled_with_cuda", &gprat::compiled_with_cuda, "Check if the code was compiled with CUDA support");
 
     m.def("print_available_gpus", &gprat::print_available_gpus, "Print available GPUs with their properties");
     m.def("gpu_count", &gprat::gpu_count, "Return the number of available GPUs");

diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
@@ -1,18 +1,20 @@
+# Option for GPU support with CUDA, cuSolver, cuBLAS
+option(GPRAT_WITH_CUDA "Enable GPU support with CUDA, cuSolver, cuBLAS" OFF)
+
 if(GPRAT_WITH_CUDA)
+  set(CMAKE_CUDA_STANDARD 20)
+  set(CMAKE_CUDA_EXTENSIONS OFF)
   enable_language(CUDA)
 endif()
 
-# Option for GPU support with CUDA, cuSolver, cuBLAS
-option(GPRAT_WITH_CUDA "Enable GPU support with CUDA, cuSolver, cuBLAS" OFF)
-# Pass variable to C++ code
-add_compile_definitions(GPRAT_WITH_CUDA=$<BOOL:${GPRAT_WITH_CUDA}>)
-
 set(SOURCE_FILES
-    src/gprat_c.cpp
-    src/utils_c.cpp
+    src/gprat.cpp
+    src/utils.cpp
+    src/performance_counters.cpp
     src/target.cpp
-    src/gp_kernels.cpp
-    src/gp_hyperparameters.cpp
+    src/tile_data.cpp
+    src/kernels.cpp
+    src/hyperparameters.cpp
     src/cpu/gp_functions.cpp
     src/cpu/gp_algorithms.cpp
     src/cpu/gp_uncertainty.cpp
@@ -54,7 +56,10 @@ target_sources(gprat_core PRIVATE ${header_files})
 target_link_libraries(gprat_core PUBLIC HPX::hpx)
 
 if(GPRAT_WITH_CUDA)
+  find_package(CUDAToolkit MODULE REQUIRED)
   target_link_libraries(gprat_core PUBLIC CUDA::cusolver CUDA::cublas)
+  # Flag not working for CLANG CUDA
+  target_compile_features(gprat_core PUBLIC cuda_std_${CMAKE_CUDA_STANDARD})
 endif()
 
 # Include directories
@@ -66,16 +71,19 @@ if(GPRAT_ENABLE_MKL)
   # Link Intel oneMKL
   target_link_libraries(gprat_core PUBLIC MKL::mkl_intel_lp64 MKL::mkl_core
                                           MKL::MKL MKL::mkl_sequential)
+  target_compile_definitions(gprat_core PUBLIC GPRAT_ENABLE_MKL)
 else()
   # Link OpenBLAS
   target_link_libraries(gprat_core PUBLIC ${OpenBLAS_LIB})
 endif()
 
-if(GPRAT_ENABLE_MKL)
-  target_compile_definitions(gprat_core PUBLIC GPRAT_ENABLE_MKL)
+target_compile_definitions(gprat_core
+                           PUBLIC GPRAT_WITH_CUDA=$<BOOL:${GPRAT_WITH_CUDA}>)
+if(GPRAT_ENABLE_BENCHMARK_CACHE_EVICTIONS)
+  target_compile_definitions(gprat_core
+                             PUBLIC GPRAT_ENABLE_BENCHMARK_CACHE_EVICTIONS)
 endif()
-
-target_compile_features(gprat_core PUBLIC cxx_std_17)
+target_compile_features(gprat_core PUBLIC cxx_std_20)
 
 set_property(TARGET gprat_core PROPERTY POSITION_INDEPENDENT_CODE ON)
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,7 +3,6 @@ name: Code linting @@
     on:
       push:
         branches:
-          - main
       pull_request:
     jobs:
@@ Expand Down @@