Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
f75c30f
fix(test): Link to Boost::boost for all installed header-only libs
timniederhausen Apr 14, 2025
f88e807
chore(core): Make code more portable
timniederhausen Apr 14, 2025
6a68745
feat: Add vcpkg as an alternative to spack (mainly for Windows)
timniederhausen Apr 15, 2025
50809bb
chore(vcpkg): Import vanilla intel-mkl port
timniederhausen Apr 15, 2025
6b7be05
chore(vcpkg): Switch MKL build to lp64 + sequential
timniederhausen Apr 15, 2025
8f935a2
fix: Exclude vcpkg *.cmake files from formatting
timniederhausen Apr 15, 2025
5976c4d
refactor!(core): Move include files into gprat/ subdirectory
timniederhausen Jul 11, 2025
0017764
refactor!(core): Move everything into the gprat namespace
timniederhausen Jul 14, 2025
67ce1fc
refactor!(core): Remove unnecessary prefixes/suffixes from filenames
timniederhausen Jul 20, 2025
21b69e6
fix(ci): Always enable lint workflows
timniederhausen Jul 20, 2025
42412be
feat(core): Support serializing AdamParams and SEKParams
timniederhausen Jul 22, 2025
5ce8c1a
feat!(core): Introduce const_tile_data + mutable_tile_data
timniederhausen Jul 20, 2025
1977eea
feat(core): Add simple-to-use per-function performance counters
timniederhausen Aug 10, 2025
928b269
feat(core): Use NUMA-aware allocator for tile data
timniederhausen Jul 25, 2025
3df5ebd
chore(core): Consistently use std::size_t
timniederhausen Aug 11, 2025
07998a1
feat(examples): Add command-line arguments for all algorithm parameters
timniederhausen Apr 26, 2025
986bf4f
fix(examples): Don't try to write results outside of the target direc…
timniederhausen May 6, 2025
80a880a
feat(core): Track function invocation count as well
timniederhausen Sep 20, 2025
e2b0700
refactor!(core): Add scheduler type and make algorithms use it
timniederhausen Sep 20, 2025
48a8a76
chore: Upgrade dependencies
timniederhausen Aug 10, 2025
bbf35bc
chore(core): Fix issues with CUDA / nvcc under Windows
timniederhausen Nov 9, 2025
ae13f9d
feat(core): Add optional cache eviction before BLAS operation
timniederhausen Nov 9, 2025
068da77
chore: Add some minimal docs on Windows support
timniederhausen Nov 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ name: Code linting
on:
push:
branches:
- main
pull_request:

jobs:
Expand Down
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,15 @@ cmake_dependent_option(GPRAT_ENABLE_TESTS "Build unit and integration tests"
${PROJECT_IS_TOP_LEVEL} "GPRAT_BUILD_CORE" OFF)
cmake_dependent_option(GPRAT_ENABLE_MKL "Enable support for Intel oneMKL"
${PROJECT_IS_TOP_LEVEL} "GPRAT_BUILD_CORE" OFF)
option(GPRAT_ENABLE_BENCHMARK_CACHE_EVICTIONS
"Evict data from caches before running BLAS operations" ON)

option(GPRAT_ENABLE_FORMAT_TARGETS "Enable clang-format / cmake-format targets"
${PROJECT_IS_TOP_LEVEL})

if(GPRAT_ENABLE_FORMAT_TARGETS)
set(CMAKE_FORMAT_EXCLUDE "^external_ports/")

find_package(format QUIET)
if(NOT format_FOUND)
include(FetchContent)
Expand Down
19 changes: 17 additions & 2 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,21 @@
"deprecated": true
}
},
{
"name": "vcpkg",
"hidden": true,
"cacheVariables": {
"CMAKE_TOOLCHAIN_FILE": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake",
"X_VCPKG_APPLOCAL_DEPS_INSTALL": "ON"
}
},
{
"name": "vcpkg-win64-static",
"hidden": true,
"cacheVariables": {
"VCPKG_TARGET_TRIPLET": "x64-windows-static-md-release"
}
},
{
"name": "cppcheck",
"hidden": true,
Expand Down Expand Up @@ -67,7 +82,7 @@
"description": "Note that all the flags after /W4 are required for MSVC to conform to the language standard",
"hidden": true,
"cacheVariables": {
"CMAKE_CXX_FLAGS": "/sdl /guard:cf /utf-8 /diagnostics:caret /w14165 /w44242 /w44254 /w44263 /w34265 /w34287 /w44296 /w44365 /w44388 /w44464 /w14545 /w14546 /w14547 /w14549 /w14555 /w34619 /w34640 /w24826 /w14905 /w14906 /w14928 /w45038 /W4 /permissive- /volatile:iso /Zc:inline /Zc:preprocessor /Zc:enumTypes /Zc:lambda /Zc:__cplusplus /Zc:externConstexpr /Zc:throwingNew /EHsc",
"CMAKE_CXX_FLAGS": "/sdl /guard:cf /utf-8 /diagnostics:caret /w14165 /w44242 /w44254 /w44263 /w34265 /w34287 /w44296 /w44365 /w44388 /w44464 /w14545 /w14546 /w14547 /w14549 /w14555 /w34619 /w34640 /w24826 /w14905 /w14906 /w14928 /w45038 /W4 /permissive- /volatile:iso /Zc:inline /Zc:preprocessor /Zc:enumTypes /Zc:lambda /Zc:__cplusplus /Zc:externConstexpr /Zc:throwingNew /EHsc /D_CRT_SECURE_NO_WARNINGS",
"CMAKE_EXE_LINKER_FLAGS": "/machine:x64 /guard:cf",
"CMAKE_SHARED_LINKER_FLAGS": "/machine:x64 /guard:cf"
}
Expand Down Expand Up @@ -146,7 +161,7 @@
},
{
"name": "ci-windows",
"inherits": ["ci-build", "ci-win64", "ci-multi-config"]
"inherits": ["ci-build", "ci-win64", "ci-multi-config", "vcpkg", "vcpkg-win64-static"]
},
{
"name": "ci-ubuntu-24.04",
Expand Down
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ code.

## Dependencies

GPRat depends on [HPX](https://hpx-docs.stellar-group.org/latest/html/index.html) for asynchronous task-based parallelization.
GPRat depends on [HPX](https://hpx-docs.stellar-group.org/latest/html/index.html) for asynchronous task-based parallelization.
Furthermore, for CPU-only BLAS computation GPRat requires [OpenBLAS](http://www.openmathlib.org/OpenBLAS/) or [MKL](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html).
A [CUDA](https://developer.nvidia.com/cuda-toolkit) installation is required for GPU-only BLAS computations.

Expand All @@ -20,6 +20,9 @@ A script to install and setup spack for `GPRat` is provided in [`spack-repo`](sp
Spack environment configurations and setup scripts for CPU and GPU use are provided in
[`spack-repo/environments`](spack-repo/environments).

Since Spack is not available on Windows, we also support dependency installation using vcpkg.
For now, vcpkg builds are only tested on Windows.

## How To Compile

GPRat makes use of [CMake presets][1] to simplify the process of configuring the project.
Expand All @@ -35,6 +38,7 @@ ctest --preset=dev-linux
As a developer, you may create a `CMakeUserPresets.json` file at the root of the project that contains additional
presets local to your machine.
In addition to the build configuration `dev-linux`, there are `release-linux`, `dev-linux-gpu`, and `release-linux-gpu`.
For Windows, we have similar presets called `dev-windows` and `release-windows`.
The configurations suffixed with `-gpu` build the library with CUDA.

GPRat can be build with or without Python bindings.
Expand Down
2 changes: 1 addition & 1 deletion bindings/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# try finding pybind11
set(GPRat_pybind11_VERSION 2.10.3)
set(GPRat_pybind11_VERSION 2.13.6)
find_package(pybind11 ${GPRat_pybind11_VERSION} QUIET)
if(pybind11_FOUND)
message(STATUS "Found package pybind11.")
Expand Down
17 changes: 9 additions & 8 deletions bindings/gprat_py.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "gprat_c.hpp"
#include "gprat/gprat.hpp"

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

Expand Down Expand Up @@ -31,19 +32,19 @@ void init_gprat(py::module &m)
// Set hyperparameters to default values in `AdamParams` class, unless
// specified. Python object has full access to each hyperparameter and a
// string representation `__repr__`.
py::class_<gprat_hyper::AdamParams>(m, "AdamParams")
py::class_<gprat::AdamParams>(m, "AdamParams")
.def(py::init<double, double, double, double, int>(),
py::arg("learning_rate") = 0.001,
py::arg("beta1") = 0.9,
py::arg("beta2") = 0.999,
py::arg("epsilon") = 1e-8,
py::arg("opt_iter") = 0)
.def_readwrite("learning_rate", &gprat_hyper::AdamParams::learning_rate)
.def_readwrite("beta1", &gprat_hyper::AdamParams::beta1)
.def_readwrite("beta2", &gprat_hyper::AdamParams::beta2)
.def_readwrite("epsilon", &gprat_hyper::AdamParams::epsilon)
.def_readwrite("opt_iter", &gprat_hyper::AdamParams::opt_iter)
.def("__repr__", &gprat_hyper::AdamParams::repr);
.def_readwrite("learning_rate", &gprat::AdamParams::learning_rate)
.def_readwrite("beta1", &gprat::AdamParams::beta1)
.def_readwrite("beta2", &gprat::AdamParams::beta2)
.def_readwrite("epsilon", &gprat::AdamParams::epsilon)
.def_readwrite("opt_iter", &gprat::AdamParams::opt_iter)
.def("__repr__", &gprat::AdamParams::repr);

// Initializes Gaussian Process with `GP` class. Sets default parameters for
// squared exponential kernel, number of regressors and trainable, unless
Expand Down
23 changes: 12 additions & 11 deletions bindings/utils_py.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "target.hpp"
#include "utils_c.hpp"
#include "gprat/target.hpp"
#include "gprat/utils.hpp"

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

Expand Down Expand Up @@ -32,7 +33,7 @@ void start_hpx_wrapper(std::vector<std::string> args, std::size_t n_cores)
}
argv.push_back(nullptr);
int argc = static_cast<int>(args.size());
utils::start_hpx_runtime(argc, argv.data());
gprat::start_hpx_runtime(argc, argv.data());
}

/**
Expand All @@ -43,7 +44,7 @@ void start_hpx_wrapper(std::vector<std::string> args, std::size_t n_cores)
void init_utils(py::module &m)
{
m.def("compute_train_tiles",
&utils::compute_train_tiles,
&gprat::compute_train_tiles,
py::arg("n_samples"),
py::arg("n_tile_size"),
R"pbdoc(
Expand All @@ -58,7 +59,7 @@ void init_utils(py::module &m)
)pbdoc");

m.def("compute_train_tile_size",
&utils::compute_train_tile_size,
&gprat::compute_train_tile_size,
py::arg("n_samples"),
py::arg("n_tiles"),
R"pbdoc(
Expand All @@ -73,7 +74,7 @@ void init_utils(py::module &m)
)pbdoc");

m.def("compute_test_tiles",
&utils::compute_test_tiles,
&gprat::compute_test_tiles,
py::arg("m_samples"),
py::arg("n_tiles"),
py::arg("n_tile_size"),
Expand All @@ -90,19 +91,19 @@ void init_utils(py::module &m)
)pbdoc");

m.def("print_vector",
&utils::print_vector,
&gprat::print_vector,
py::arg("vec"),
py::arg("start") = 0,
py::arg("end") = -1,
py::arg("separator") = " ",
"Print elements of a vector with optional start, end, and separator parameters");

m.def("start_hpx", &start_hpx_wrapper, py::arg("args"), py::arg("n_cores")); // Using the wrapper function
m.def("resume_hpx", &utils::resume_hpx_runtime);
m.def("suspend_hpx", &utils::suspend_hpx_runtime);
m.def("stop_hpx", &utils::stop_hpx_runtime);
m.def("resume_hpx", &gprat::resume_hpx_runtime);
m.def("suspend_hpx", &gprat::suspend_hpx_runtime);
m.def("stop_hpx", &gprat::stop_hpx_runtime);

m.def("compiled_with_cuda", &utils::compiled_with_cuda, "Check if the code was compiled with CUDA support");
m.def("compiled_with_cuda", &gprat::compiled_with_cuda, "Check if the code was compiled with CUDA support");

m.def("print_available_gpus", &gprat::print_available_gpus, "Print available GPUs with their properties");
m.def("gpu_count", &gprat::gpu_count, "Return the number of available GPUs");
Expand Down
34 changes: 21 additions & 13 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
# Option for GPU support with CUDA, cuSolver, cuBLAS
option(GPRAT_WITH_CUDA "Enable GPU support with CUDA, cuSolver, cuBLAS" OFF)

if(GPRAT_WITH_CUDA)
set(CMAKE_CUDA_STANDARD 20)
set(CMAKE_CUDA_EXTENSIONS OFF)
enable_language(CUDA)
endif()

# Option for GPU support with CUDA, cuSolver, cuBLAS
option(GPRAT_WITH_CUDA "Enable GPU support with CUDA, cuSolver, cuBLAS" OFF)
# Pass variable to C++ code
add_compile_definitions(GPRAT_WITH_CUDA=$<BOOL:${GPRAT_WITH_CUDA}>)

set(SOURCE_FILES
src/gprat_c.cpp
src/utils_c.cpp
src/gprat.cpp
src/utils.cpp
src/performance_counters.cpp
src/target.cpp
src/gp_kernels.cpp
src/gp_hyperparameters.cpp
src/tile_data.cpp
src/kernels.cpp
src/hyperparameters.cpp
src/cpu/gp_functions.cpp
src/cpu/gp_algorithms.cpp
src/cpu/gp_uncertainty.cpp
Expand Down Expand Up @@ -54,7 +56,10 @@ target_sources(gprat_core PRIVATE ${header_files})
target_link_libraries(gprat_core PUBLIC HPX::hpx)

if(GPRAT_WITH_CUDA)
find_package(CUDAToolkit MODULE REQUIRED)
target_link_libraries(gprat_core PUBLIC CUDA::cusolver CUDA::cublas)
# Flag not working for CLANG CUDA
target_compile_features(gprat_core PUBLIC cuda_std_${CMAKE_CUDA_STANDARD})
endif()

# Include directories
Expand All @@ -66,16 +71,19 @@ if(GPRAT_ENABLE_MKL)
# Link Intel oneMKL
target_link_libraries(gprat_core PUBLIC MKL::mkl_intel_lp64 MKL::mkl_core
MKL::MKL MKL::mkl_sequential)
target_compile_definitions(gprat_core PUBLIC GPRAT_ENABLE_MKL)
else()
# Link OpenBLAS
target_link_libraries(gprat_core PUBLIC ${OpenBLAS_LIB})
endif()

if(GPRAT_ENABLE_MKL)
target_compile_definitions(gprat_core PUBLIC GPRAT_ENABLE_MKL)
target_compile_definitions(gprat_core
PUBLIC GPRAT_WITH_CUDA=$<BOOL:${GPRAT_WITH_CUDA}>)
if(GPRAT_ENABLE_BENCHMARK_CACHE_EVICTIONS)
target_compile_definitions(gprat_core
PUBLIC GPRAT_ENABLE_BENCHMARK_CACHE_EVICTIONS)
endif()

target_compile_features(gprat_core PUBLIC cxx_std_17)
target_compile_features(gprat_core PUBLIC cxx_std_20)

set_property(TARGET gprat_core PROPERTY POSITION_INDEPENDENT_CODE ON)

Expand Down
Loading